JointBert代码解读(二)

最新推荐文章于 2025-09-19 17:04:41 发布

原创

最新推荐文章于 2025-09-19 17:04:41 发布 · 1k 阅读

4 ·

CC 4.0 BY-SA版权

本文介绍BERT模型在联合意图分类和槽位填充任务中的应用，详细解析了数据加载器的实现，包括如何从文件中读取数据、创建训练示例、将示例转换为特征以及缓存和加载数据集。

BERT for Joint Intent Classiﬁcation and Slot Filling
论文代码解读(二)

data_loader.py

import os
import copy
import json
import logging

import torch
from torch.utils.data import TensorDataset

from utils import get_intent_labels, get_slot_labels

logger = logging.getLogger(__name__)


class InputExample(object):
    """
    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
        words: list. The words of the sequence.
        intent_label: (Optional) string. The intent label of the example.
        slot_labels: (Optional) list. The slot labels of the example.
    """

    def __init__(self, guid, words, intent_label=None, slot_labels=None):
        self.guid = guid
        self.words = words
        self.intent_label = intent_label
        self.slot_labels = slot_labels

    def __repr__(self):#此时终端会打印出信息
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)#深拷贝，创建了一个新的字典
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string.将此实例序列化为JSON字符串"""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"#indent是缩进打印


class InputFeatures(object):
    """A single set of features of data.一组特征数据"""

    def __init__(self, input_ids, attention_mask, token_type_ids, intent_label_id, slot_labels_ids):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.intent_label_id = intent_label_id
        self.slot_labels_ids = slot_labels_ids

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class JointProcessor(object):
    """Processor for the JointBERT data set.处理器 """

    def __init__(self, args):
        self.args = args
        self.intent_labels = get_intent_labels(args)#获得文档中的意图标签
        self.slot_labels = get_slot_labels(args)#获得文档中的槽标签

        self.input_text_file = 'seq.in'#输入句子
        self.intent_label_file = 'label'#句子标签
        self.slot_labels_file = 'seq.out'#句子槽值

    @classmethod#不需要实例化