我们还是从SoftVerbalizer的初始化方法看起:
def __init__(self,
tokenizer: Optional[PreTrainedTokenizer],
model: Optional[PreTrainedModel],
classes: Optional[List] = None,
num_classes: Optional[Sequence[int]] = None,
label_words: Optional[Union[Sequence[str], Mapping[str, str]]] = None,
prefix: Optional[str] = " ",
multi_token_handler: Optional[str] = "first",
):
super().__init__(tokenizer=tokenizer, num_classes=num_classes, classes=classes)
self.prefix = prefix
self.multi_token_handler = multi_token_handler#对单个word或字拆分成多个部分的处理策略
head_name = [n for n,c in model.named_children()][-1]
logger.info(f"The LM head named {head_name} was retrieved.")
self.head = copy.deepcopy(getattr(model, head_name))
max_loop = 5#下面的最要工作是找到掩码语言模型的Mask层的输出,最高迭代五次,否则表示这个模型没有mask层
if not isinstance(self.head, torch.nn.Linear):
module = self.head
found = False
last_layer_full_name = []
for i in range(max_loop):
last_layer_name = [n for n,c in module.named_children()][-1]
last_layer_full_name.append(last_layer_name)
parent_module = module
module = getattr(module, last_layer_name)
if isinstance(module, torch.nn.Linear):
found = True
break
if not found:
raise RuntimeError(f"Can't not retrieve a linear layer in {max_loop} loop from the plm.")
self.original_head_last_layer = module.weight.data
self.hidden_dims = self.original_head_last_layer.shape[-1]
self.head_last_layer_full_name = ".".join(last_layer_full_name)
#如果找到一层,则在这层之后添加一个线性层用做分类任务
self.head_last_layer = torch.nn.Linear(self.hidden_dims, self.num_classes, bias=False)
setattr(parent_module, last_layer_name, self.head_last_layer)
else:
self.hidden_dims = self.head.weight.shape[-1]
self.original_head_last_layer = getattr(model, head_name).weight.data
self.head = torch.nn.Linear(self.hidden_dims, self.num_classes, bias=False)
if label_words is not None: # use label words as an initialization
self.label_words = label_words
之后,我们进入模型的部分,我们以分类任务为例子,模型为PromptForClassification,他的forward方法如下,其中一部分已经在上一篇中讲过:
def forward(self, batch: Union[Dict, InputFeatures]) -> torch.Tensor:
r"""
Get the logits of label words.
Args:
batch (:obj:`Union[Dict, InputFeatures]`): The original batch
Returns:
:obj:`torch.Tensor`: The logits of the label words (obtained by the current verbalizer).
"""
outputs = self.prompt_model(batch)
outputs = self.verbalizer.gather_outputs(outputs)
if isinstance(outputs, tuple):
outputs_at_mask = [self.extract_at_mask(output, batch) for output in outputs]
else:
outputs_at_mask = self.extract_at_mask(outputs, batch)
label_words_logits = self.verbalizer.process_outputs(outputs_at_mask, batch=batch)#
return label_words_logits
我们依次去看关于映射器部分的各个方法:
1.self.verbalizer.gather_outputs(outputs)
def gather_outputs(self, outputs: ModelOutput):
if isinstance(outputs, Seq2SeqLMOutput):
ret = outputs.decoder_hidden_states[-1]
elif isinstance(outputs, MaskedLMOutput) or isinstance(outputs, CausalLMOutputWithCrossAttentions):
ret = outputs.hidden_states[-1]
else:
try:
ret = outputs.hidden_states[-1]#这里很明显可以看到是去预训练模型的最后一层
except AttributeError:
raise NotImplementedError(f"Gather outputs method for outputs' type {type(outputs)} not implemented")
return ret
2.self.extract_at_mask(output, batch)
def extract_at_mask(self,
outputs: torch.Tensor,
batch: Union[Dict, InputFeatures]):
r"""Get outputs at all <mask> token
E.g., project the logits of shape
(``batch_size``, ``max_seq_length``, ``vocab_size``)
into logits of shape (if num_mask_token > 1)
(``batch_size``, ``num_mask_token``, ``vocab_size``)
or into logits of shape (if ``num_mask_token`` = 1)
(``batch_size``, ``vocab_size``).
Args:
outputs (:obj:`torch.Tensor`): The original outputs (maybe process by verbalizer's
`gather_outputs` before) etc. of the whole sequence.
batch (:obj:`Union[Dict, InputFeatures]`): The original batch
Returns:
:obj:`torch.Tensor`: The extracted outputs of ``<mask>`` tokens.
"""
outputs = outputs[torch.where(batch['loss_ids']>0)]#找出mask的位置
outputs = outputs.view(batch['loss_ids'].shape[0], -1, outputs.shape[1])
if outputs.shape[1] == 1:
outputs = outputs.view(outputs.shape[0], outputs.shape[2])
return outputs
3.self.verbalizer.process_outputs(outputs_at_mask, batch=batch)
def process_hiddens(self, hiddens: torch.Tensor, **kwargs):
r"""A whole framework to process the original logits over the vocabulary, which contains four steps:
"""
label_logits = self.head(hiddens)#这个是初始化过程中找到mask层之后添加的线性层,现在用来分类
return label_logits
def process_outputs(self, outputs: torch.Tensor, batch: Union[Dict, InputFeatures], **kwargs):
return self.process_hiddens(outputs)