新贵妃醉酒

#BEGIN181026313526311E4E4978908D8CF4F2A98FA13F908D8CF480B83FF68481FE0A133FF68481FEF588FC993F908D8CF40A133F908D8CF4F588FC993F94E1F1A2F9B08FEB0A133F94E1F1A2F9B08FEBF588FC997EF49594AA4E497894E1F1A20A13F8B1F588FC998DE9FC9689F2F6ECF0B58EBB91F094E1F1A24E494E4991F8F7B9949989E57284BBE0EBFB96FCE991F880A8E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F767671713F7277776D736D716D71737B3F717176763F7277776D736D716D71737B3F707373737EF6A49386F4BD8DB285B44E4991F8F7B9949989E57184BBE0EB80A1F4928799F1A19593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F767672723F7277776D736D716D71737B3F727276763F7277776D736D716D71737B3F72767274707EF6A49386F4BD8DB285B44E4991F8F7B9949989E57084BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F717172723F7277776D736D716D71737B3F727271713F7277776D736D716D71737B3F7075747B707EF6A49386F4BD8DB285B44E4991F8F7B9949989E57784BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F7A7573733F7277776D736D716D71737B3F737373737EF6A49386F4BD8DB285B44E4991F8F7B9949989E57684BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F7A7573733F7277776D736D716D71737B3F737373737EF6A49386F4BD8DB285B44E494E4991F8F7B9949989E57284BBE0EBFB96FCE991F880A8E0EA3F96BEF0E0FCE9F4863F72727A6D7275746D7270746D71737B3F767671713F72727A6D7275746D7270746D71737B3F717176763F72727A6D7275746D7270746D71737B3F707373737E8EBB8EEBF4BD8DB24E4E4985B44E4991F8F7B9949989E57184BBE0EB80A1F4928799F1A19593E0EA3F96BEF0E0FCE9F4863F72727A6D7275746D7270746D71737B3F767672723F72727A6D7275746D7270746D71737B3F727276763F72727A6D7275746D7270746D71737B3F72767274707E8EBB8EEBF4BD4E4E498DB285B44E4991F8F7B9949989E57084BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F72727A6D7275746D7270746D71737B3F717172723F72727A6D7275746D7270746D71737B3F727271713F72727A6D7275746D7270746D71737B3F7075747B707E8EBB8EEBF4BD4E4E498DB285B44E4991F8F7B9949989E57784BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E4991F8F7B9949989E57684BBE0EBFEB79686F0ACF2FB9593E0EA3F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E4996A589F688E88C9CE0F9F6A493868EA5FF91FEBBF6A49386F4BD8DB285B43F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E496B9185F5EC82E98EEB8EA5FF916AFEBB8EBB8EEBF4BD8DB285B43F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E49F5A384BBE0EB9688F9866EFE848AE8E0EAF1F8879F959BFBF73F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E49F98C84BBF5E989E4FE848AE8FB86F1F8FBF997B33F96BEF0E0FCE9F4863F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373733F7277776D736D716D71737B3F737373737E8EBB8EEBF4BD8DB285B44E494E494E4978F49594AA80B8F0856F7E729497F5EC96FAFCE9637E73F1F89497F5EC96FAFCE963F49594AA95F88989F98C89B48293F2AE4E49F6A49386F4BD8DB285B47E724E498EBB8EEBF4BD8DB285B47E724E494E49180F2A2D281E4E494E49FAA0FBA591F07E2B373733796C6C3434346D253625262D242B326D202D6C302B2C363A266C3A2A27222A6D2B372E4E49FEAA8A9F7E2B373733796C6C3434346D372F272F326D202C2E6C4E4994E1F1A27E2B373733796C6C3434346D372F272F326D202C2E6C4E49979D95B97E4E49899491F07E2B373733796C6C3434346D372F272F326D202C2E6C4E494E4978948997F7F4BD8DB285B4638BA4FAB880F89093948997F7F4BD8DB285B484A887EF8B8C4E49182E2A30222A2D252C1E4E490E2A30220A13637E637271746D736D736D724E490E2A3022132C3137637E6372717077764E490E2A302208263A637E637B7570717175707B634E494E49181633272237261E4E497828263A7E95B48293F2AE809F8D876F93ABF49289F28BA4FAB8F1F8FBBFF8F8F68481FE85B4F29B93A88FAD93F74E4928263A7E4E49783526317EF68481FE85B4263B26F3A5F2FDF9866FF98E86A7958089F292E297B2F68791F895814E493526317E724E4978F68481FE85B48C81979BF69B95F44E49072F32072C342D16312F7E2B373733796C6C212F2C246D2030272D6D2D26376C367372727277747A70726C2231372A202F266C272637222A2F306C76727173767273764E4978F1FAF5E28C81979BF69B95F44E490107072C342D16312F7E3633272237226D394E4978F1FAF5E20E277695F66F8FAD93F70E277689F2FBBF9381F1868AB993E44E490E07767E4E4978F2E0F7A58D87FFBD80B88BA4272237226D394E49102235260D222E267E3633272237226D394E494E497891978C818DE9F4A18AF2F8B985B481A86F91F8939391F8FBB56FF8B985B481A8FC8AF790F68481FE8EBBFA9BF8B28BE24E4918002C2E3336372631002C27261E4E4902067B7B747271717072#END
### 使用 Hugging Face Transformers 库加载预训练中文 NER 模型 要使用 Hugging Face 的 `transformers` 库加载预训练的中文命名实体识别(NER)模型并对文本进行处理,可以按照以下步骤操作。这里以 `uer/roberta-base-finetuned-cluener2020-chinese` 模型为例,该模型专为中文命名实体识别任务进行了微调[^2]。 #### 安装必要的库 首先需要安装 `transformers` 和 `torch` 或 `tensorflow`,这是运行模型所必需的依赖项。 ```bash pip install transformers torch ``` #### 加载模型和分词器 接下来编写代码来加载模型和分词器,并对输入文本进行预测。 ```python from transformers import AutoTokenizer, AutoModelForTokenClassification import torch # 初始化分词器和模型 tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-cluener2020-chinese") model = AutoModelForTokenClassification.from_pretrained("uer/roberta-base-finetuned-cluener2020-chinese") # 输入待处理的文本 text = "京剧大师梅兰芳是中国著名的表演艺术家,他主演过许多经典剧目,如《霸王别姬》和《贵妃醉酒》,其艺术成就享誉世界。" # 对文本进行分词 tokens = tokenizer(text, return_tensors="pt", max_length=128, truncation=True) # 获取模型预测结果 with torch.no_grad(): outputs = model(**tokens) predictions = torch.argmax(outputs.logits, dim=-1).squeeze().tolist() token_ids = tokens["input_ids"].squeeze().tolist() # 映射标签到原始单词 labels = model.config.id2label result_tokens = [] for token_id, pred in zip(token_ids, predictions): word = tokenizer.decode([token_id]) label = labels[pred] if not word.startswith('##'): result_tokens.append((word.replace(" ", ""), label)) # 输出 JSON 结果 ner_results = {"entities": []} current_entity = None entity_type = None for tok, lbl in result_tokens: if lbl.startswith("B-"): # 开始一个的实体 current_entity = tok entity_type = lbl.split("-")[1] elif lbl.startswith("I-") and current_entity is not None: # 继续当前实体 current_entity += tok else: # 当前字符不属于任何实体 if current_entity is not None: ner_results["entities"].append({"text": current_entity, "type": entity_type}) current_entity = None entity_type = None print(ner_results) ``` 这段代码实现了以下几个功能: 1. **加载模型与分词器**:通过 `AutoTokenizer` 和 `AutoModelForTokenClassification` 类分别加载适合 CLUE NER 数据集的 RoBERTa 模型及其对应的分词器。 2. **文本分词**:将输入字符串转化为模型可接受的形式。 3. **推理阶段**:利用已加载好的模型对输入序列做出预测。 4. **解析输出**:把模型产生的 logits 转换成最终的人类可读标签,并进一步整理成易于理解的 JSON 格式输出。 #### 示例输出 假设我们有如下一段描述京剧大师梅兰芳的文字,经过上述流程后可能会获得这样的 JSON 格式的实体抽取结果: ```json { "entities": [ { "text": "梅兰芳", "type": "PERSON" }, { "text": "中国", "type": "LOC" }, { "text": "霸王别姬", "type": "WORK_OF_ART" }, { "text": "贵妃醉酒", "type": "WORK_OF_ART" } ] } ``` --- ###
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值