Chapter 14_5 _ENV和load

本文深入探讨了Lua中_ENV环境变量的作用与应用,通过实例展示了如何利用loadfile函数及debug.setupvalue函数来改变代码块的运行环境。同时,介绍了通过预加载代码设置_ENV的另一种方法。

    load在加载代码块时,通常使用全局环境去初始化代码块的_ENV上值。

然而,load选项的第四个参数给出的值就是供_ENV用的。(loadfile函数参数也是一样)

举一个有关初始化的例子,假如我们有一个普通的配置文件,定义了一些常量和函数供程序使用。

--file 'config.lua '
width = 200
height = 300
...

然后用下面的代码加载:

env = { }
f = loadfile"config.lua","t",env)
f()

配置文件中的代码就会在空环境env中运行。

有时,需要多次运行同一块代码,每次运行的环境表都不同。

上面的情况中,load的额外参数就满足不着了。有两个选择可以满足该需求:

第一个,使用debug库的debug.setupvalue 函数。允许改变给定函数的任何上值:

f = loadfile(name)
...
env = { }
gebug. setupvalue(f,1,env)

该函数的第一个参数是函数,第二个参数是上值的索引,第三个参数是upvalue的新值。

对于修改环境变量的情况:就是针对第一个参数是load 或loadfile 返回的一个函数,Lua会保证它只有唯一的一个_ENV 上值。这时setupvalue 的第二个参数永远是1。

这种情况的选择有一个不好的缺点是依赖debug库。

第二个选择,就是在加载代码前做点手脚。比如加载前加上下面的一行代码:

_ENV = ...

之前提到过,Lua 编译代码块时,把它当作变参函数。因此,上面的那一行代码会被当作chunk代码块的第一个参数。

因此就用它设置了环境。加载完这段代码块,我们再调用执行代码块后的返回函数。该函数就完成了把目标环境当作第一个参数传递过去。例如:

f = loadwithprefix("local _ENV = ...;",io.lines(filename,"*L"))
...
env = {}
f(env)

转载于:https://www.cnblogs.com/daiker/p/5862195.html

import base64 import urllib.parse import json import os import time import asyncio import aiohttp import random from tqdm import tqdm from ebooklib import epub import datetime import re class BQGNovelDownloader: def __init__(self, config=None): # 默认配置 - 间隔时间翻倍 self.config = { 'concurrency': 8, 'min_interval': 400, 'max_interval': 800, 'max_retries': 3, 'retry_min_interval': 4000, 'retry_max_interval': 8000 } # 更新用户配置 if config: self.config.update(config) self.headers = { "Accept": "*/*", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Referer": "https://bqg123.net/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36", "sec-ch-ua": "\"Google Chrome\";v=\"141\", \"Not?A_Brand\";v=\"8\", \"Chromium\";v=\"141\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"" } # 固定URLparams - 保持原来的格式方便修改 self.book_url = "https://bv-jp.booktt.cc/v3/load_book_info/27257389/972315.js" self.params = { "ws": "9191920", "tk": "0404" } self.session = None self.semaphore = asyncio.Semaphore(self.config['concurrency']) # 创建目录 self.save_dir = "saved_pages" self.epub_dir = "epub_books" self.log_dir = "download_logs" for directory in [self.save_dir, self.epub_dir, self.log_dir]: if not os.path.exists(directory): os.makedirs(directory) # 小说信息 self.novel_info = { 'title': '', 'author': '', 'description': '', 'cover_url': '' } async def create_session(self): """创建aiohttp会话""" if self.session is None: timeout = aiohttp.ClientTimeout(total=60) connector = aiohttp.TCPConnector(limit=self.config['concurrency']) self.session = aiohttp.ClientSession(timeout=timeout, connector=connector, headers=self.headers) async def close_session(self): """关闭会话""" if self.session: await self.session.close() self.session = None async def random_delay(self, min_ms, max_ms): """随机延迟""" delay = random.uniform(min_ms / 1000, max_ms / 1000) await asyncio.sleep(delay) async def fetch_with_retry(self, url, params=None): """带重试机制的异步请求""" for attempt in range(self.config['max_retries'] + 1): try: # 请求前延迟 await self.random_delay(self.config['min_interval'], self.config['max_interval']) async with self.session.get(url, params=params, headers=self.headers) as response: if response.status == 200: return await response.text() elif attempt < self.config['max_retries']: # 重试延迟 retry_delay = random.uniform( self.config['retry_min_interval'] / 1000, self.config['retry_max_interval'] / 1000 ) await asyncio.sleep(retry_delay) continue except Exception as e: if attempt < self.config['max_retries']: retry_delay = random.uniform( self.config['retry_min_interval'] / 1000, self.config['retry_max_interval'] / 1000 ) await asyncio.sleep(retry_delay) continue return None def decode_encoded_str(self, encoded_str): """解密编码字符串""" try: url_decoded = urllib.parse.unquote(encoded_str) base64_decoded = base64.b64decode(url_decoded) decoded_str = base64_decoded.decode('utf-8') final_decoded_str = urllib.parse.unquote(decoded_str) return json.loads(final_decoded_str) except Exception: return None async def get_book_info(self): """获取小说信息并选择数据源""" response_text = await self.fetch_with_retry(self.book_url, self.params) if not response_text: return None # 解密 book_info_str book_info_str = None lines = response_text.split('\n') for line in lines: if 'window[\'book_info_str\']' in line: start_idx = line.find('"') + 1 end_idx = line.rfind('"') if start_idx > 0 and end_idx > start_idx: book_info_str = line[start_idx:end_idx] break if not book_info_str: return None book_info = self.decode_encoded_str(book_info_str) if not book_info: return None # 保存小说信息 self.novel_info['title'] = book_info.get('book_name', '未知') self.novel_info['author'] = book_info.get('author', '未知') self.novel_info['description'] = book_info.get('intro', '') # 显示数据源信息 print(f"小说名称: {self.novel_info['title']}") print(f"作者: {self.novel_info['author']}") if self.novel_info['description']: print(f"简介: {self.novel_info['description'][:100]}...") print(f"数据源数量: {book_info.get('source_count', 1)}") # 主数据源 print(f"\n数据源 0:") print(f" 章节数: {book_info.get('chapter_count_source', '未知')}") # 其他数据源 other_sources = book_info.get('other_source', []) for i, source in enumerate(other_sources, 1): print(f"数据源 {i}:") print(f" 章节数: {source.get('chapter_count_getok', '未知')}") # 选择数据源 total_sources = 1 + len(other_sources) selected_source = 0 if total_sources > 1: choice = input(f"\n请选择数据源 (0-{total_sources - 1}, 默认0): ").strip() if choice: selected_source = int(choice) # 构建目录请求参数 if selected_source == 0: chapter_list_url = book_info.get('url_chapter_list_kv', '') time_param = book_info.get('time_chapter_list_kv', '') else: chapter_list_url = book_info.get('url_chapter_list_kv', '') time_param = other_sources[selected_source - 1].get('time_chapter_list_kv', other_sources[selected_source - 1].get('time_update', '')) return { 'book_info': book_info, 'chapter_list_url': chapter_list_url, 'time_param': time_param } async def get_chapter_list(self, chapter_list_url, time_param): """获取章节列表""" final_url = f"https://bv-jp.booktt.cc/load_chapter_list/{chapter_list_url}.js" params = {"t": str(time_param), "tk": "0404"} response_text = await self.fetch_with_retry(final_url, params) if not response_text: return None # 解密章节列表 start_marker = 'var chapter_list_data_str="' start_idx = response_text.find(start_marker) if start_idx == -1: return None start_idx += len(start_marker) end_idx = response_text.find('"', start_idx) if end_idx == -1: return None encoded_str = response_text[start_idx:end_idx] return self.decode_encoded_str(encoded_str) def save_decrypted_content(self, chapter_name, chapter_data): """保存解密内容到TXT文件""" # 清理文件名 safe_name = re.sub(r'[<>:"/\\|?*]', '_', chapter_name) file_path = os.path.join(self.save_dir, f"{safe_name}.txt") with open(file_path, 'w', encoding='utf-8') as f: f.write(f"章节名称: {chapter_name}\n") f.write("=" * 50 + "\n") f.write("完整解密数据:\n") f.write(json.dumps(chapter_data, ensure_ascii=False, indent=2)) f.write("\n" + "=" * 50 + "\n") # 提取正文内容 if 'chapter_kv' in chapter_data and 'content' in chapter_data['chapter_kv']: content_text = chapter_data['chapter_kv']['content'] if "通知:新站book4.cc" in content_text: content_text = content_text.split("通知:新站book4.cc")[0] content_text = content_text.strip() f.write("\n正文内容:\n") f.write("=" * 50 + "\n") f.write(content_text) def write_realtime_log(self, chapter_results): """实时写入下载日志""" log_filename = f"{self.novel_info['title']}_下载日志.txt" log_filename = re.sub(r'[<>:"/\\|?*]', '_', log_filename) log_path = os.path.join(self.log_dir, log_filename) with open(log_path, 'w', encoding='utf-8') as f: f.write(f"小说下载日志\n") f.write("=" * 50 + "\n") f.write(f"书名: {self.novel_info['title']}\n") f.write(f"作者: {self.novel_info['author']}\n") f.write(f"下载时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"EPUB文件: epub_books\\{self.novel_info['title']}({self.novel_info['author']}).epub\n") f.write("=" * 50 + "\n\n") # 成功章节 success_chapters = [r for r in chapter_results if r['success']] f.write(f"成功下载章节: {len(success_chapters)} 章\n") for i, result in enumerate(success_chapters, 1): f.write(f"{i}. {result['chapter']} (原: {result['original_title']})\n") # 失败章节 failed_chapters = [r for r in chapter_results if not r['success']] if failed_chapters: f.write(f"\n失败章节: {len(failed_chapters)} 章\n") for i, result in enumerate(failed_chapters, 1): f.write(f"{i}. {result['chapter']} - URL: {result['url']}\n") f.write(f"\n配置信息:\n") f.write(f"并发数: {self.config['concurrency']}\n") f.write(f"最大重试次数: {self.config['max_retries']}\n") f.write(f"请求间隔: {self.config['min_interval']}-{self.config['max_interval']}ms\n") async def download_chapter(self, chapter_info, pbar, chapter_results): """下载单个章节""" async with self.semaphore: chapter_name = chapter_info['name'] url_kv = chapter_info['url_kv'] chapter_len = chapter_info['len'] # 获取章节内容 url = f"https://dmit.xsjs.cc/load_chapter/{url_kv}.js" params = {"t": str(chapter_len), "tk": "0404"} response_text = await self.fetch_with_retry(url, params) if not response_text: chapter_results.append({ 'success': False, 'chapter': chapter_name, 'url': url, 'original_title': chapter_name }) # 实时更新日志 self.write_realtime_log(chapter_results) return None # 解密章节内容 start_marker = 'var chapter_data_str="' start_idx = response_text.find(start_marker) if start_idx == -1: chapter_results.append({ 'success': False, 'chapter': chapter_name, 'url': url, 'original_title': chapter_name }) self.write_realtime_log(chapter_results) return None start_idx += len(start_marker) end_idx = response_text.find('"', start_idx) if end_idx == -1: chapter_results.append({ 'success': False, 'chapter': chapter_name, 'url': url, 'original_title': chapter_name }) self.write_realtime_log(chapter_results) return None encoded_str = response_text[start_idx:end_idx] chapter_data = self.decode_encoded_str(encoded_str) if not chapter_data: chapter_results.append({ 'success': False, 'chapter': chapter_name, 'url': url, 'original_title': chapter_name }) self.write_realtime_log(chapter_results) return None # 保存完整解密内容到TXT文件 self.save_decrypted_content(chapter_name, chapter_data) # 提取正文内容 if 'chapter_kv' in chapter_data and 'content' in chapter_data['chapter_kv']: content_text = chapter_data['chapter_kv']['content'] # 清理内容 if "通知:新站book4.cc" in content_text: content_text = content_text.split("通知:新站book4.cc")[0] content_text = content_text.strip() else: chapter_results.append({ 'success': False, 'chapter': chapter_name, 'url': url, 'original_title': chapter_name }) self.write_realtime_log(chapter_results) return None chapter_results.append({ 'success': True, 'chapter': chapter_name, 'original_title': chapter_name, 'content': content_text }) # 实时更新日志 self.write_realtime_log(chapter_results) return { 'name': chapter_name, 'content': content_text } async def download_all_chapters(self, chapter_list): """下载所有章节""" chapters_data = [] chapter_results = [] # 用于实时日志 print("开始下载章节...") with tqdm(total=len(chapter_list), desc="下载进度", unit="章") as pbar: tasks = [] for chapter_info in chapter_list: task = asyncio.create_task(self.download_chapter(chapter_info, pbar, chapter_results)) tasks.append(task) # 使用asyncio.as_completed来实时更新进度条 for task in asyncio.as_completed(tasks): result = await task if result is not None: chapters_data.append(result) pbar.update(1) if result and result.get('name'): pbar.set_description(f"✓ {result['name'][:20]:<20}") return chapters_data, chapter_results def create_epub(self, chapters_data): """创建EPUB电子书""" book = epub.EpubBook() book.set_identifier(f'bqg_novel_{int(time.time())}') book.set_title(self.novel_info['title']) book.add_author(self.novel_info['author']) book.set_language('zh') # 添加简介章节 intro_chapter = epub.EpubHtml(title='简介', file_name='intro.xhtml', lang='zh') intro_content = f""" <html> <head> <title>简介</title> <style> body {{ font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }} h1 {{ color: #333; border-bottom: 2px solid #333; }} .description {{ margin: 20px 0; }} </style> </head> <body> <h1>{self.novel_info['title']}</h1> <h2>作者:{self.novel_info['author']}</h2> <div class="description"> {self.novel_info['description'].replace(chr(10), '<br/>')} </div> </body> </html> """ intro_chapter.content = intro_content book.add_item(intro_chapter) # 添加样式 style = ''' body { font-family: "Microsoft YaHei", sans-serif; font-size: 16px; line-height: 1.6; margin: 20px; text-align: justify; } h1 { text-align: center; margin-bottom: 40px; font-size: 24px; border-bottom: 1px solid #ccc; padding-bottom: 10px; } p { margin-bottom: 15px; text-indent: 2em; } ''' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) # 创建EPUB章节 chapters = [] for i, chapter_data in enumerate(chapters_data): content_paragraphs = chapter_data['content'].split('\n') formatted_content = ''.join( f'<p>{paragraph.strip()}</p>' for paragraph in content_paragraphs if paragraph.strip()) chapter_html = f''' <!DOCTYPE html> <html> <head> <title>{chapter_data['name']}</title> <link rel="stylesheet" type="text/css" href="../style/nav.css" /> </head> <body> <h1>{chapter_data['name']}</h1> <div>{formatted_content}</div> </body> </html> ''' epub_chapter = epub.EpubHtml( title=chapter_data['name'], file_name=f'chapter_{i + 1:04d}.xhtml', lang='zh' ) epub_chapter.content = chapter_html book.add_item(epub_chapter) chapters.append(epub_chapter) # 设置书籍结构 book.spine = ['nav', intro_chapter] + chapters book.toc = [epub.Link('intro.xhtml', '简介', 'intro')] + chapters book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) epub_filename = f"{self.novel_info['title']}({self.novel_info['author']}).epub" epub_filename = re.sub(r'[<>:"/\\|?*]', '_', epub_filename) output_path = os.path.join(self.epub_dir, epub_filename) epub.write_epub(output_path, book, {}) return output_path async def download_novel(self): """下载整本小说""" await self.create_session() print("获取小说信息...") book_data = await self.get_book_info() if not book_data: print("获取小说信息失败") return False print("获取章节列表...") chapter_list_data = await self.get_chapter_list( book_data['chapter_list_url'], book_data['time_param'] ) if not chapter_list_data: print("获取章节列表失败") return False total_chapters = len(chapter_list_data['chapter_list']) print(f"找到 {total_chapters} 个章节") # 下载所有章节 chapters_data, chapter_results = await self.download_all_chapters(chapter_list_data['chapter_list']) successful_chapters = len(chapters_data) failed_chapters = total_chapters - successful_chapters print(f"\n下载完成!") print(f"成功: {successful_chapters} 个章节") print(f"失败: {failed_chapters} 个章节") if successful_chapters == 0: print("没有成功下载任何章节") return False # 生成EPUB print("\n正在创建EPUB电子书...") epub_path = self.create_epub(chapters_data) print(f"✓ EPUB电子书已创建: {epub_path}") # 最终日志 log_filename = f"{self.novel_info['title']}_下载日志.txt" log_filename = re.sub(r'[<>:"/\\|?*]', '_', log_filename) log_path = os.path.join(self.log_dir, log_filename) print(f"✓ 下载日志已保存: {log_path}") # 清理临时文件 if failed_chapters == 0: print("\n所有章节下载成功,清理临时文件...") if os.path.exists(self.save_dir): import shutil shutil.rmtree(self.save_dir) print("✓ 临时文件已清理") print(f"\n最终结果:") print(f"EPUB电子书: {epub_path}") print(f"下载日志: {log_path}") await self.close_session() return True async def main(): print("=" * 50) print(" BQG小说下载器") print("=" * 50) # 获取用户配置 config = {} # 并发数 while True: try: concurrency_input = input("请输入并发数 (1-100,推荐3): ").strip() if not concurrency_input: config['concurrency'] = 8 break concurrency = int(concurrency_input) if 1 <= concurrency <= 100: config['concurrency'] = concurrency break else: print("并发数必须在1-100之间!") except ValueError: print("请输入有效的数字!") # 重试次数 while True: try: retries_input = input("请输入最大重试次数 (默认3): ").strip() if not retries_input: config['max_retries'] = 3 break retries = int(retries_input) if retries >= 0: config['max_retries'] = retries break else: print("重试次数必须大于等于0!") except ValueError: print("请输入有效的数字!") # 使用默认的时间间隔配置 config.update({ 'min_interval': 400, 'max_interval': 800, 'retry_min_interval': 4000, 'retry_max_interval': 8000 }) print(f"\n配置信息:") print(f"并发数: {config['concurrency']}") print(f"最大重试次数: {config['max_retries']}") print(f"请求间隔: {config['min_interval']}-{config['max_interval']}ms") print(f"重试间隔: {config['retry_min_interval']}-{config['retry_max_interval']}ms") print("-" * 50) # 创建下载器并开始下载 downloader = BQGNovelDownloader(config=config) success = await downloader.download_novel() if success: print("\n✓ 下载完成!") else: print("\n✗ 下载失败!") if __name__ == "__main__": asyncio.run(main()) 以这个网站爬虫为示例,告诉我怎么改成直接JS引擎调用来实现爬虫
最新发布
11-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值