BERT for Joint Intent Classification and Slot Filling
论文代码解读(二)
data_loader.py
import os
import copy
import json
import logging
import torch
from torch.utils.data import TensorDataset
from utils import get_intent_labels, get_slot_labels
logger = logging.getLogger(__name__)
class InputExample(object):
"""
A single training/test example for simple sequence classification.
Args:
guid: Unique id for the example.
words: list. The words of the sequence.
intent_label: (Optional) string. The intent label of the example.
slot_labels: (Optional) list. The slot labels of the example.
"""
def __init__(self, guid, words, intent_label=None, slot_labels=None):
self.guid = guid
self.words = words
self.intent_label = intent_label
self.slot_labels = slot_labels
def __repr__(self):#此时终端会打印出信息
return str(self.to_json_string())
def to_dict(self):
"""Serializes this instance to a Python dictionary."""
output = copy.deepcopy(self.__dict__)#深拷贝,创建了一个新的字典
return output
def to_json_string(self):
"""Serializes this instance to a JSON string.将此实例序列化为JSON字符串"""
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"#indent是缩进打印
class InputFeatures(object):
"""A single set of features of data.一组特征数据"""
def __init__(self, input_ids, attention_mask, token_type_ids, intent_label_id, slot_labels_ids):
self.input_ids = input_ids
self.attention_mask = attention_mask
self.token_type_ids = token_type_ids
self.intent_label_id = intent_label_id
self.slot_labels_ids = slot_labels_ids
def __repr__(self):
return str(self.to_json_string())
def to_dict(self):
"""Serializes this instance to a Python dictionary."""
output = copy.deepcopy(self.__dict__)
return output
def to_json_string(self):
"""Serializes this instance to a JSON string."""
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
class JointProcessor(object):
"""Processor for the JointBERT data set.处理器 """
def __init__(self, args):
self.args = args
self.intent_labels = get_intent_labels(args)#获得文档中的意图标签
self.slot_labels = get_slot_labels(args)#获得文档中的槽标签
self.input_text_file = 'seq.in'#输入句子
self.intent_label_file = 'label'#句子标签
self.slot_labels_file = 'seq.out'#句子槽值
@classmethod#不需要实例化

本文介绍BERT模型在联合意图分类和槽位填充任务中的应用,详细解析了数据加载器的实现,包括如何从文件中读取数据、创建训练示例、将示例转换为特征以及缓存和加载数据集。
最低0.47元/天 解锁文章
1797

被折叠的 条评论
为什么被折叠?



