当执行[object selector:para]的时候,发生了什么?

本文详细解读了Objective-C中的方法调用过程,包括SEL和IMP类型的作用,以及objc_msgSend函数的工作原理。

本来只是想写一下SEL,IMP这些类型到底有什么意义的,结果,网上查资料挺丰富的,干脆把整个过程都捋捋.

在Objective-C中的方法调用,其实是调用下面这个函数向对象发送一个信息.

id objc_msgSend(id self, SEL op, ...)

self就是接受该消息的对象,...就是参数列表,这里比较特别的是SEL这个类型.

从Apple的文档中,可以看到   

 typedef struct objc_selector *SEL;

并解释了SEL代表了一个Method Selector.而Method Selecto则代表了Method在运行时的名字.

光看文档解释,可能也不是很清楚,接下来,我们慢慢发掘其中的奥妙.

除了SEL,在文档中,还能看到IMP这种类型.

id (*IMP)(id, SEL, ...)

其实就是个函数指针嘛,指到哪呢?必然是指到函数的开始地址.

因为苹果对我们隐藏了实现的细节,所以我只能根据收集的资料猜测,调用[object selector:para]方法时,

(1)调用sel_registerName("selector")来获取SEL

(2)调用objc_msgSend(object, SEL, para)

(3)在objc-msgSend中通过SEL获取到IMP

(4)调用IMP指向的函数.

帮我优化以下代码: import xbot from xbot import web, print, sleep from . import package import os import hashlib import win32com.client # 用于处理doc格式(需安装pywin32) from openpyxl import load_workbook import requests import csv from pdfplumber import open as open_pdf from docx import Document # -------------------------- 新增:.doc转.docx工具函数 -------------------------- def convert_doc_to_docx(doc_path): """将.doc文件转换为.docx(仅转换一次,避免重复操作)""" docx_path = os.path.splitext(doc_path)[0] + ".docx" # 生成目标路径(同目录+改后缀) if os.path.exists(docx_path): return docx_path # 已转换过,直接返回结果 word_app = None try: # 启动Word进程(后台运行) word_app = win32com.client.DispatchEx("Word.Application") word_app.Visible = False # 隐藏窗口 word_app.DisplayAlerts = False # 禁用警告弹窗 # 打开.doc文件 doc = word_app.Documents.Open(doc_path) # 另存为.docx格式(FileFormat=16是docx的固定代码) doc.SaveAs2(docx_path, FileFormat=16) doc.Close(SaveChanges=False) # 关闭文档,不保存修改 print(f"已将.doc转换为.docx:{os.path.basename(docx_path)}") return docx_path except Exception as e: print(f".doc转换失败({os.path.basename(doc_path)}):{str(e)}") return doc_path # 转换失败则返回原路径(继续用原方式提取) finally: if word_app: word_app.Quit() # 确保关闭Word进程,避免残留 def crawl_list_pages(start_page, end_page, excel_path): """爬取列表页的标题、链接及页面URL,保存到Excel""" all_data = [] for page in range(start_page, end_page + 1): list_page_url = "https://www.nmpa.gov.cn/yaopin/ypfgwj/index.html" if page == 1 else f"https://www.nmpa.gov.cn/yaopin/ypfgwj/index_{page-1}.html" print(f"正在爬取第{page}页数据(列表页URL:{list_page_url})") web_object = web.create(list_page_url, 'edge', load_timeout=30) a_elements = web_object.find_all_by_xpath("//div[@class='list']/ul[1]/li/a") print(f"第{page}页找到{len(a_elements)}条记录") current_page_data = [] for element in a_elements: href = element.get_attribute("href") detail_url = href.replace("../..", "https://www.nmpa.gov.cn") title = element.get_text().strip() current_page_data.append({"标题": title, "详情页URL": detail_url, "列表页URL": list_page_url}) all_data.extend(current_page_data) web_object.close() save_to_excel(current_page_data, excel_path) return all_data def save_to_excel(data, excel_path): """将数据(含列表页URL)追加写入Excel""" if not os.path.exists(excel_path): print(f"错误:Excel文件不存在 → {excel_path}") return wb = load_workbook(excel_path) ws = wb["Sheet1"] max_row = ws.max_row start_row = 2 if max_row == 0 else max_row + 1 if max_row == 0: ws.cell(row=1, column=1, value="标题") ws.cell(row=1, column=2, value="详情页URL") ws.cell(row=1, column=3, value="列表页URL") for i, item in enumerate(data): ws.cell(row=start_row + i, column=1, value=item["标题"]) ws.cell(row=start_row + i, column=2, value=item["详情页URL"]) ws.cell(row=start_row + i, column=3, value=item["列表页URL"]) wb.save(excel_path) wb.close() print(f"已追加{len(data)}条数据到Excel") def extract_page_data(web_object): """从详情页提取核心数据""" suoyin_elems = web_object.find_all_by_xpath("/html/body/div[4]/div[1]/table/tbody/tr[1]/td[2]") suoyin = suoyin_elems[0].get_text().strip() if (suoyin_elems and suoyin_elems[0].get_text().strip()) else "" fenlei_elems = web_object.find_all_by_xpath("/html/body/div[4]/div[1]/table/tbody/tr[1]/td[4]") fenlei = fenlei_elems[0].get_text().strip() if (fenlei_elems and fenlei_elems[0].get_text().strip()) else "" biaoti_elems = web_object.find_all_by_xpath("/html/body/div[4]/div[1]/table/tbody/tr[2]/td[2]") biaoti_web = biaoti_elems[0].get_text().strip() if (biaoti_elems and biaoti_elems[0].get_text().strip()) else "" day_elems = web_object.find_all_by_xpath("/html/body/div[4]/div[1]/table/tbody/tr[3]/td[2]") day = day_elems[0].get_text().strip() if (day_elems and day_elems[0].get_text().strip()) else "" neirong_elems = web_object.find_all_by_xpath("/html/body/div[4]/div[5]") page_content = neirong_elems[0].get_text().strip() if (neirong_elems and neirong_elems[0].get_text().strip()) else "" return { "suoyin": suoyin, "fenlei": fenlei, "biaoti_web": biaoti_web, "day": day, "page_content": page_content } def url_to_md5(url): """URL转MD5值(32位小写)""" md5_hash = hashlib.md5() md5_hash.update(url.encode('utf-8')) return md5_hash.hexdigest() # -------------------------- 注释掉原doc提取函数(不再使用) -------------------------- # def extract_doc_content(file_path): # """提取doc格式内容(调用Windows Word应用)""" # word_app = None # try: # word_app = win32com.client.DispatchEx("Word.Application") # word_app.Visible = False # word_app.DisplayAlerts = False # doc = word_app.Documents.Open(file_path) # content = doc.Content.Text.strip() # doc.Close(SaveChanges=False) # return content # except Exception as e: # return f"【doc格式提取失败:{str(e)}(需确保本地安装Office/WPS)】" # finally: # if word_app: # word_app.Quit() # -------------------------- 修改:提取文件内容函数(加入转换逻辑) -------------------------- def extract_file_content(file_path): """提取PDF、docx、doc三种格式的内容(.doc先转为.docx)""" file_ext = file_path.split(".")[-1].lower() content = "" try: # 核心修改:.doc先转为.docx,再按docx提取 if file_ext == "doc": file_path = convert_doc_to_docx(file_path) # 转换为docx file_ext = "docx" # 切换处理逻辑 if file_ext == "pdf": # PDF提取(pdfplumber) with open_pdf(file_path) as pdf: for page in pdf.pages: content += (page.extract_text() or "") + "\n\n" elif file_ext == "docx": # docx提取(python-docx) doc = Document(file_path) for para in doc.paragraphs: para_text = para.text.strip() if para_text: content += para_text + "\n" else: content = f"【不支持的文件格式:.{file_ext}】" except Exception as e: content = f"【文件提取异常:{str(e)}】" return content.strip() def process_attachments(web_object, root_save_dir, day): """处理附件下载+多格式内容提取""" attach_info = [] lianjie_elems = web_object.find_all_by_xpath('//*[contains(@class, "text")]//p//a') for elem in lianjie_elems: href = elem.get_attribute("href") if href and "/directory/" in href: full_link = href.replace("/directory/", "https://www.nmpa.gov.cn/directory/") chinese_name = elem.get_text().strip() if "." in chinese_name: chinese_name = chinese_name.rsplit(".", 1)[0] attach_info.append((full_link, chinese_name)) if not attach_info: return "无附件", "" # 准备保存目录 day_dir = os.path.join(root_save_dir, day) if day else root_save_dir if not os.path.exists(day_dir): os.makedirs(day_dir) print(f"已创建文件夹:{day_dir}") attach_paths = [] all_attach_content = "" for idx, (link, chinese_name) in enumerate(attach_info, 1): # 处理文件名特殊字符 valid_name = chinese_name.replace("?", "").replace("*", "").replace(":", "").replace("\"", "") valid_name = valid_name.replace("<", "").replace(">", "").replace("|", "").replace("/", "").replace("\\", "") # 提取后缀并生成完整文件名 url_suffix = link.split(".")[-1].lower() if "." in link else "" valid_suffixes = ["pdf", "doc", "docx", "xls", "xlsx", "zip", "rar"] full_file_name = f"{valid_name}.{url_suffix}" if url_suffix in valid_suffixes else valid_name local_save_path = os.path.join(day_dir, full_file_name) expected_path = f"{day}/{full_file_name}" if day else full_file_name # 下载+提取内容 try: headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} response = requests.get(link, headers=headers, timeout=30, stream=True) if response.status_code == 200: with open(local_save_path, "wb") as f: for chunk in response.iter_content(1024): if chunk: f.write(chunk) print(f"附件{idx}下载成功:{local_save_path}") # 提取内容(支持PDF/docx/doc,doc已转为docx) attach_content = extract_file_content(local_save_path) all_attach_content += f"==== {full_file_name}\n{attach_content}\n\n" attach_paths.append(expected_path) else: print(f"附件{idx}下载失败(状态码{response.status_code})") attach_paths.append(f"{expected_path}(附件无法下载)") all_attach_content += f"==== {full_file_name}\n【下载失败,无法提取内容】\n\n" except Exception as e: print(f"附件{idx}处理异常:{str(e)}") attach_paths.append(f"{expected_path}(附件处理异常)") all_attach_content += f"==== {full_file_name}\n【处理异常:{str(e)}】\n\n" return ";".join(attach_paths), all_attach_content.rstrip("\n") def main(params): """主函数:串联完整流程(支持PDF/docx/doc提取)""" # 配置参数 excel_path = "F:/项目文档/作业/影刀医药/url.xlsx" csv_path = "F:/项目文档/作业/影刀医药/医药信息.csv" root_save_dir = "d:/yiyao" start_page = 1 end_page = 1 # 测试用1,正式运行改75 batch_size = 10 # 1. 爬取列表页 crawl_list_pages(start_page, end_page, excel_path) # 2. 加载Excel数据 wb = load_workbook(excel_path) ws = wb["Sheet1"] url_mapping = {} detail_urls = [] for row in range(2, ws.max_row + 1): title = ws.cell(row=row, column=1).value detail_url = ws.cell(row=row, column=2).value list_page_url = ws.cell(row=row, column=3).value if title and detail_url and list_page_url: url_mapping[detail_url] = {"title_backup": title, "list_page_url": list_page_url} detail_urls.append(detail_url) wb.close() print(f"共加载{len(detail_urls)}个详情页链接") # 3. 初始化CSV表头(含新增列) if not os.path.exists(csv_path): with open(csv_path, "w", encoding="utf-8-sig", newline="") as f: csv.writer(f).writerow([ "标题", "分类", "索引", "日期", "内容", "保存附件", "列表页URL", "MD5值" ]) # 4. 批量处理详情页 batch_data = [] for idx, detail_url in enumerate(detail_urls, 1): print(f"\n处理第{idx}个详情页:{detail_url}") title_backup = url_mapping[detail_url]["title_backup"] list_page_url = url_mapping[detail_url]["list_page_url"] detail_md5 = url_to_md5(detail_url) # 打开网页 web_object = web.create(detail_url, 'edge', load_timeout=2000) # 提取页面数据 page_data = extract_page_data(web_object) final_title = page_data["biaoti_web"] if page_data["biaoti_web"] else title_backup # 处理附件(多格式提取) final_attach, attach_content = process_attachments(web_object, root_save_dir, page_data["day"]) # 拼接最终内容 final_content = page_data["page_content"] if attach_content: final_content = f"{final_content}\n\n{attach_content}" if final_content else attach_content # 组装行数据 current_row = [ final_title, page_data["fenlei"], page_data["suoyin"], page_data["day"], final_content, final_attach, list_page_url, detail_md5 ] batch_data.append(current_row) # 批量写入CSV if len(batch_data) >= batch_size or idx == len(detail_urls): with open(csv_path, "a", encoding="utf-8-sig", newline="") as f: csv.writer(f).writerows(batch_data) print(f"已写入{len(batch_data)}条数据到CSV(支持doc格式)") batch_data = [] web_object.close() print(f"\n所有处理完成!共处理{len(detail_urls)}个详情页,CSV路径:{csv_path}")
10-30
/* 大区目标校验 */ if(para.p_posit=="事业部"){ para.p_jy1=LYBI.exeSql('dbs_1jbbqbnu042v',para).result[0].jy; /* 大区目标合计值校验 */ if(para.p_jy1=="false"){ LYBI.exeSql('dbs_1jbbqujjt6a7',para);/*校验不通过删除数据*/ layer.open({ content: "目标合计值不一致,请修改后重新导入!", btn: [], area: ['500px', '300px'], /*time: 3000, */ success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }); } if (para.p_jy1 == "true") { layer.open({ content: "保存成功", btn: [], area: ['500px', '300px'], time: 3000, success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }) } } /* 经销商目标校验 */ if(para.p_posit=="大区总监"){ /* 1.先校验天数填写是否符合实际 */ para.p_jy1=LYBI.exeSql('dbs_1jbbue1hb474',para).result[0].jy_day; /* 天数校验 */ if(para.p_jy1=="false"){ LYBI.exeSql('dbs_1jbbvnlp46cp',para);/*校验不通过删除数据*/ layer.open({ content: "日目标填写天数与实际不一致,请修改后重新导入!", btn: [], area: ['500px', '300px'], /*time: 3000, */ success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }); } if (para.p_jy1 == "true") { para.p_jy2=LYBI.exeSql('dbs_1jbc7e0th4c6',para).result[0].jy_hj; /* 经销商月目标与日目标合计值校验 */ if(para.p_jy2=="false"){ LYBI.exeSql('dbs_1jbbvnlp46cp',para);/*校验不通过删除数据*/ layer.open({ content: "月目标与日目标合计值不一致,请修改后重新导入!", btn: [], area: ['500px', '300px'], /*time: 3000, */ success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }); } if(para.p_jy2=="true"){ para.p_jy3=LYBI.exeSql('dbs_1jbbqbnu042v',para).result[0].jy; /* 经销商目标合计值校验 */ if(para.p_jy3=="false"){ LYBI.exeSql('dbs_1jbbvnlp46cp',para);/*校验不通过删除数据*/ layer.open({ content: "目标合计值不一致,请修改后重新导入!", btn: [], area: ['500px', '300px'], /*time: 3000, */ success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }); } if (para.p_jy3 == "true") { layer.open({ content: "保存成功", btn: [], area: ['500px', '300px'], time: 3000, success: function(layero, index) { var style = document.createElement('style'); style.innerHTML = ` .layui-layer { background-color: white !important; } .layui-layer-content { display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px!important; } `; document.head.appendChild(style); } }) } } } } 请帮我把这个js代码优化一下
最新发布
12-02
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值