OpenPrompt关于软映射（SoftVerbalizer）篇二

本文链接：https://blog.youkuaiyun.com/eyouhs/article/details/137183265

我们还是从SoftVerbalizer的初始化方法看起：

 def __init__(self,
                 tokenizer: Optional[PreTrainedTokenizer],
                 model: Optional[PreTrainedModel],
                 classes: Optional[List] = None,
                 num_classes: Optional[Sequence[int]] = None,
                 label_words: Optional[Union[Sequence[str], Mapping[str, str]]] = None,
                 prefix: Optional[str] = " ",
                 multi_token_handler: Optional[str] = "first",
                ):
        super().__init__(tokenizer=tokenizer, num_classes=num_classes, classes=classes)
        self.prefix = prefix
        self.multi_token_handler = multi_token_handler#对单个word或字拆分成多个部分的处理策略

        head_name = [n for n,c in model.named_children()][-1]
        logger.info(f"The LM head named {head_name} was retrieved.")
        self.head = copy.deepcopy(getattr(model, head_name))
        max_loop = 5#下面的最要工作是找到掩码语言模型的Mask层的输出，最高迭代五次，否则表示这个模型没有mask层
        if not isinstance(self.head, torch.nn.Linear):
            module = self.head
            found = False
            last_layer_full_name = []
            for i in range(max_loop):
                last_layer_name = [n for n,c in module.named_children()][-1]
                last_layer_full_name.append(last_layer_name)
                parent_module = module
                module = getattr(module, last_layer_name)
                if isinstance(module, torch.nn.Linear):
                    found = True
                    break
            if not found:
                raise RuntimeError(f"Can't not retrieve a linear layer in {max_loop} loop from the plm.")
            self.original_head_last_layer = module.weight.data
            self.hidden_dims = self.original_head_last_layer.shape[-1]
            self.head_last_layer_full_name = ".".join(last_layer_full_name)
            #如果找到一层，则在这层之后添加一个线性层用做分类任务
            self.head_last_layer = torch.nn.Linear(self.hidden_dims, self.num_classes, bias=False)
            setattr(parent_module, last_layer_name, self.head_last_layer)
        else:
            self.hidden_dims = self.head.weight.shape[-1]
            self.original_head_last_layer = getattr(model, head_name).weight.data
            self.head = torch.nn.Linear(self.hidden_dims, self.num_classes, bias=False)


        if label_words is not None: # use label words as an initialization
            self.label_words = label_words

之后，我们进入模型的部分，我们以分类任务为例子，模型为PromptForClassification，他的forward方法如下，其中一部分已经在上一篇中讲过：

    def forward(self, batch: Union[Dict, InputFeatures]) -> torch.Tensor:
        r"""
        Get the logits of label words.

        Args:
            batch (:obj:`Union[Dict, InputFeatures]`): The original batch

        Returns:
            :obj:`torch.Tensor`: The logits of the label words (obtained by the current verbalizer).
        """
        outputs = self.prompt_model(batch)
        outputs = self.verbalizer.gather_outputs(outputs)
        if isinstance(outputs, tuple):
            outputs_at_mask = [self.extract_at_mask(output, batch) for output in outputs]
        else:
            outputs_at_mask = self.extract_at_mask(outputs, batch)
        label_words_logits = self.verbalizer.process_outputs(outputs_at_mask, batch=batch)#
        return label_words_logits

我们依次去看关于映射器部分的各个方法：

1.self.verbalizer.gather_outputs(outputs)

    def gather_outputs(self, outputs: ModelOutput):
        if isinstance(outputs, Seq2SeqLMOutput):
            ret = outputs.decoder_hidden_states[-1]
        elif isinstance(outputs, MaskedLMOutput) or isinstance(outputs, CausalLMOutputWithCrossAttentions):
            ret = outputs.hidden_states[-1]
        else:
            try:
                ret = outputs.hidden_states[-1]#这里很明显可以看到是去预训练模型的最后一层
            except AttributeError:
                raise NotImplementedError(f"Gather outputs method for outputs' type {type(outputs)} not implemented")

        return ret

2.self.extract_at_mask(output, batch)

    def extract_at_mask(self,
                       outputs: torch.Tensor,
                       batch: Union[Dict, InputFeatures]):
        r"""Get outputs at all <mask> token
        E.g., project the logits of shape
        (``batch_size``, ``max_seq_length``, ``vocab_size``)
        into logits of shape (if num_mask_token > 1)
        (``batch_size``, ``num_mask_token``, ``vocab_size``)
        or into logits of shape (if ``num_mask_token`` = 1)
        (``batch_size``, ``vocab_size``).

        Args:
            outputs (:obj:`torch.Tensor`): The original outputs (maybe process by verbalizer's
                 `gather_outputs` before) etc. of the whole sequence.
            batch (:obj:`Union[Dict, InputFeatures]`): The original batch

        Returns:
            :obj:`torch.Tensor`: The extracted outputs of ``<mask>`` tokens.

        """
        outputs = outputs[torch.where(batch['loss_ids']>0)]#找出mask的位置
        outputs = outputs.view(batch['loss_ids'].shape[0], -1, outputs.shape[1])
        if outputs.shape[1] == 1:
            outputs = outputs.view(outputs.shape[0], outputs.shape[2])
        return outputs

3.self.verbalizer.process_outputs(outputs_at_mask, batch=batch)

    def process_hiddens(self, hiddens: torch.Tensor, **kwargs):
        r"""A whole framework to process the original logits over the vocabulary, which contains four steps:
        """
        label_logits = self.head(hiddens)#这个是初始化过程中找到mask层之后添加的线性层，现在用来分类
        return label_logits

    def process_outputs(self, outputs: torch.Tensor, batch: Union[Dict, InputFeatures], **kwargs):
        return self.process_hiddens(outputs)