str与repr,input与raw_input比较

本文详细对比了Python中str与repr函数的功能差异,包括它们如何处理不同类型的数据,以及input与raw_input函数在接收用户输入时的不同行为。通过具体的代码示例展示了这些函数的具体应用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

一.str与repr比较:

1.共同点:str和repr都是一个函数。

2.不同点:str:此函数将把参数值转换成合理形式的字符串;

                   repr:此函数则会创建一个字符串,然后以合法的python表达式形式来表示值;

3.实例str:

>>> print str("hello world")        

hello world
>>> print str(1000L)                 #将long参数转换成字符串

1000

4.实例repr:

>>> print repr("hello world")       
'hello world'
>>> print repr(1000L)              #重新创建一个字符串,以合法的python表达式输出 
1000L

>>> temp =42L
>>> print "the temperature is " +repr(temp)
the temperature is 42L

二.input与raw_input比较:

1.input函数:需要以合法的python表达式形式输入

如:

>>> name = input ("what is your name ?")
what is your name ?

当输入为数值型:3时,通过;当输入为字符型:lucy时,抱错:

Traceback (most recent call last):
  File "<pyshell#17>", line 1, in <module>
    name = input ("what is your name ?")
  File "<string>", line 1, in <module>
NameError: name 'lucy' is not defined

因为字符型的合法表达式应为:'lucy',如果输入'lucy'或者"lucy"则通过

2.raw_input函数:将会把所有的输入当原始数据处理

如:

>>> name = raw_input ("what is your name ?")
what is your name ?lucy
>>>

此时直接输入lucy就不会抱错。

我正在编辑【python】代码,遇到了(style_tune) C:\Users\28996\Desktop\AI\persona_contrastive_finetuning>python Contrastive_Training_LM.py INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). trainable params: 1,572,864 || all params: 1,838,401,536 || trainable%: 0.0856 Generating train split: 3 examples [00:00, 50.38 examples/s] Traceback (most recent call last): File "C:\Users\28996\Desktop\AI\persona_contrastive_finetuning\Contrastive_Training_LM.py", line 400, in <module> val_dataset = load_and_tokenize_dataset('data/processed/val_style_triplets.json', tokenizer) File "C:\Users\28996\Desktop\AI\persona_contrastive_finetuning\Contrastive_Training_LM.py", line 390, in load_and_tokenize_dataset tokenized_dataset = raw_dataset.map( File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\datasets\arrow_dataset.py", line 560, in wrapper out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\datasets\arrow_dataset.py", line 3086, in map raise ValueError( ValueError: Column to remove ['anchor', 'positive', 'negative'] not in the dataset. Current columns in the dataset: ['text'] ,请帮我检查并改正错误点。我的原始代码如下: import torch import torch.nn as nn import torch.nn.functional as F from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, PreTrainedTokenizerBase, BitsAndBytesConfig ) from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers.utils import PaddingStrategy from datasets import load_dataset, Dataset # 添加Dataset导入 from typing import Any, Dict, List, Optional, Tuple, Union import logging from dataclasses import dataclass import os import gc from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training # 设置日志 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 内存优化工具函数 def clear_memory(): """清除PythonCUDA缓存""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() def print_memory_usage(): """打印当前内存使用情况""" if torch.cuda.is_available(): allocated = torch.cuda.memory_allocated() / (1024 ** 3) reserved = torch.cuda.memory_reserved() / (1024 ** 3) logger.info(f"GPU内存使用: 已分配 {allocated:.2f}GB, 保留 {reserved:.2f}GB") else: logger.info("未检测到GPU") def tokenize_function(examples, tokenizer, max_length=256): """将文本转换为token IDs""" tokenized = {} # 对每个字段进行分词 for key in ['anchor', 'positive', 'negative']: if key in examples: # 使用分词器处理文本 result = tokenizer( examples[key], max_length=max_length, truncation=True, padding=False, return_tensors=None ) tokenized[f"{key}_input_ids"] = result["input_ids"] return tokenized @dataclass class ContrastiveDataCollator: """内存优化的数据收集器""" tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None return_tensors: str = "pt" def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # 分离出三元组的各个部分 anchor_features = [{"input_ids": f["anchor_input_ids"]} for f in features] positive_features = [{"input_ids": f["positive_input_ids"]} for f in features] negative_features = [{"input_ids": f["negative_input_ids"]} for f in features] # 对每个部分分别进行填充 batch_anchor = self.tokenizer.pad( anchor_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) batch_positive = self.tokenizer.pad( positive_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) batch_negative = self.tokenizer.pad( negative_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) # 创建注意力掩码 def create_attention_mask(input_ids): return (input_ids != self.tokenizer.pad_token_id).int() # 释放中间变量内存 del anchor_features, positive_features, negative_features clear_memory() return { "anchor_input_ids": batch_anchor["input_ids"], "anchor_attention_mask": create_attention_mask(batch_anchor["input_ids"]), "positive_input_ids": batch_positive["input_ids"], "positive_attention_mask": create_attention_mask(batch_positive["input_ids"]), "negative_input_ids": batch_negative["input_ids"], "negative_attention_mask": create_attention_mask(batch_negative["input_ids"]), } @dataclass class EvalDataCollator: """评估专用的数据收集器""" tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None return_tensors: str = "pt" def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # 评估时只使用正样本(用于语言建模评估) input_features = [] for f in features: # 确保所有必要字段都存在 if "positive_input_ids" in f: input_features.append({"input_ids": f["positive_input_ids"]}) else: # 如果缺少positive_input_ids,尝试使用其他字段 if "input_ids" in f: input_features.append({"input_ids": f["input_ids"]}) else: # 如果都没有,跳过该样本 continue if not input_features: raise ValueError("评估数据中没有找到有效的输入特征") # 对样本进行填充 batch = self.tokenizer.pad( input_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) # 创建注意力掩码 attention_mask = (batch["input_ids"] != self.tokenizer.pad_token_id).int() # 创建标签(用于语言建模) labels = batch["input_ids"].clone() labels[labels == self.tokenizer.pad_token_id] = -100 return { "input_ids": batch["input_ids"], "attention_mask": attention_mask, "labels": labels } class ContrastiveTrainer(Trainer): """内存优化的训练器""" def __init__(self, tokenizer=None, *args, contrastive_config=None, **kwargs): # 首先调用父类初始化 super().__init__(*args, **kwargs) # 关键修复:设置tokenizer self.tokenizer = tokenizer if contrastive_config is None: contrastive_config = {} # 设置默认值 self.temperature = contrastive_config.get("temperature", 0.07) self.margin = contrastive_config.get("margin", 0.3) self.contrastive_weight = contrastive_config.get("weight", 0.8) self.repr_layer = contrastive_config.get("repr_layer", -1) # 验证必要参数 if not hasattr(self.model.config, "output_hidden_states") or not self.model.config.output_hidden_states: raise ValueError("模型必须设置output_hidden_states=True") self.cross_entropy = nn.CrossEntropyLoss() def compute_contrastive_loss(self, anchor_emb, pos_emb, neg_emb): """计算对比损失""" # 计算余弦相似度 pos_sim = F.cosine_similarity(anchor_emb, pos_emb) neg_sim = F.cosine_similarity(anchor_emb, neg_emb) # 计算InfoNCE损失 numerator = torch.exp(pos_sim / self.temperature) denominator = numerator + torch.exp(neg_sim / self.temperature) info_nce_loss = -torch.log(numerator / (denominator + 1e-8)).mean() # 计算三元组损失 triplet_loss = F.relu(neg_sim - pos_sim + self.margin).mean() return info_nce_loss + triplet_loss def get_sequence_representation(self, outputs, attention_mask): """获取序列表示(内存优化版)""" # 只获取需要的隐藏状态层 hidden_states = outputs.hidden_states[self.repr_layer] # 获取每个序列的最后一个非填充token seq_lengths = attention_mask.sum(dim=1) - 1 batch_indices = torch.arange(hidden_states.size(0)) # 返回对应位置的隐藏状态 return hidden_states[batch_indices, seq_lengths] def compute_loss(self, model, inputs, return_outputs=False): """改进的损失计算,兼容训练评估两种模式""" # 检查输入数据格式 if "anchor_input_ids" in inputs: # 训练模式:处理三元组数据 return self._compute_training_loss(model, inputs, return_outputs) else: # 评估模式:处理单一样本数据 return self._compute_evaluation_loss(model, inputs, return_outputs) def _compute_training_loss(self, model, inputs, return_outputs=False): """训练阶段的损失计算(处理三元组)""" # 提取输入 anchor_ids = inputs["anchor_input_ids"] anchor_mask = inputs["anchor_attention_mask"] positive_ids = inputs["positive_input_ids"] positive_mask = inputs["positive_attention_mask"] negative_ids = inputs["negative_input_ids"] negative_mask = inputs["negative_attention_mask"] # 前向传播获取隐藏状态 def get_embeddings(input_ids, attention_mask): outputs = model( input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True ) return self.get_sequence_representation(outputs, attention_mask) # 获取三元组的嵌入表示 anchor_emb = get_embeddings(anchor_ids, anchor_mask) pos_emb = get_embeddings(positive_ids, positive_mask) neg_emb = get_embeddings(negative_ids, negative_mask) # 计算对比损失 cl_loss = self.compute_contrastive_loss(anchor_emb, pos_emb, neg_emb) cl_loss = cl_loss * self.contrastive_weight # 关键修复:确保tokenizer已设置 if self.tokenizer is None: raise ValueError("Tokenizer未设置!") # 计算语言建模损失 lm_labels = positive_ids.clone() # 关键修复:使用tokenizer的pad_token_id pad_token_id = self.tokenizer.pad_token_id lm_labels[lm_labels == pad_token_id] = -100 # 计算语言建模损失 lm_outputs = model( input_ids=positive_ids, attention_mask=positive_mask, labels=lm_labels ) lm_loss = lm_outputs.loss # 总损失 = LM损失 + 对比损失 total_loss = lm_loss + cl_loss # 记录内存使用 print_memory_usage() return (total_loss, lm_outputs) if return_outputs else total_loss def _compute_evaluation_loss(self, model, inputs, return_outputs=False): """评估阶段的损失计算(处理单一样本)""" # 提取评估输入 input_ids = inputs["input_ids"] attention_mask = inputs["attention_mask"] labels = inputs["labels"] # 计算语言建模损失 outputs = model( input_ids=input_ids, attention_mask=attention_mask, labels=labels ) loss = outputs.loss # 记录内存使用 print_memory_usage() return (loss, outputs) if return_outputs else loss def evaluate( self, eval_dataset: Optional[Dataset] = None, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "eval", ) -> Dict[str, float]: """重写评估方法以使用专用的数据收集器""" # 创建评估专用的数据收集器 eval_data_collator = EvalDataCollator( tokenizer=self.tokenizer, max_length=256, padding="max_length" ) # 临时保存原始数据收集器 original_collator = self.data_collator try: # 使用评估专用的数据收集器 self.data_collator = eval_data_collator # 调用父类的评估方法 return super().evaluate( eval_dataset=eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix ) finally: # 恢复原始数据收集器 self.data_collator = original_collator # ================ 主程序 ================ # if __name__ == "__main__": # 配置量化以减少内存使用 bnb_config = BitsAndBytesConfig( load_in_4bit=True, # 使用4位量化 bnb_4bit_quant_type="nf4", # 使用NF4量化类型 bnb_4bit_use_double_quant=True, # 双重量化 bnb_4bit_compute_dtype=torch.float16 # 计算使用FP16 ) # 加载模型分词器(使用量化) model = AutoModelForCausalLM.from_pretrained( "model/Qwen/Qwen1.5-1.8B", quantization_config=bnb_config, # 应用量化配置 device_map="auto", # 自动选择设备 output_hidden_states=True, # 必须设置以获取隐藏状态 return_dict_in_generate=True, use_cache=False # 禁用缓存以节省内存 ) tokenizer = AutoTokenizer.from_pretrained("model/Qwen/Qwen1.5-1.8B") tokenizer.pad_token = tokenizer.eos_token # 设置填充token # 为量化模型添加LoRA适配器 lora_config = LoraConfig( r=8, lora_alpha=32, target_modules=["q_proj", "v_proj"], # 针对Qwen1.5-1.8B模型 lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) # 关键修复:准备模型用于k位训练 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) # 添加LoRA适配器 model = get_peft_model(model, lora_config) # 关键修复:显式启用LoRA参数的梯度 for param in model.parameters(): if param.requires_grad: param.requires_grad = True model.print_trainable_parameters() # 打印可训练参数数量 # 加载数据集 def load_and_tokenize_dataset(file_path, tokenizer): """加载数据集并进行分词处理""" # 加载原始数据集 dataset_dict = load_dataset('json', data_files=file_path) raw_dataset = dataset_dict['train'] # 应用分词函数 tokenized_dataset = raw_dataset.map( lambda ex: tokenize_function(ex, tokenizer, max_length=256), batched=True, batch_size=8, # 减小批处理大小 remove_columns=['anchor', 'positive', 'negative'] ) return tokenized_dataset train_dataset = load_and_tokenize_dataset('data/processed/train_style_triplets.json', tokenizer) val_dataset = load_and_tokenize_dataset('data/processed/val_style_triplets.json', tokenizer) # 验证数据集格式 print("训练集样本示例:", train_dataset[0]) print("验证集样本示例:", val_dataset[0]) # 训练参数配置(内存优化) training_args = TrainingArguments( output_dir="./model/lora_adapter", per_device_train_batch_size=1, # 减小批量大小 gradient_accumulation_steps=8, # 增加梯度累积步数 num_train_epochs=3, learning_rate=2e-4, logging_steps=10, # 更频繁的日志记录以监控内存 save_steps=500, fp16=True, report_to="none", remove_unused_columns=False, gradient_checkpointing=True, # 启用梯度检查点 optim="adafactor", # 使用内存更少的优化器 ) # 对比学习配置 contrastive_config = { "temperature": 0.07, "margin": 0.3, "weight": 0.8, "repr_layer": -1 } # 初始化数据收集器 data_collator = ContrastiveDataCollator( tokenizer=tokenizer, max_length=256, # 减少最大长度 padding="max_length" ) # 初始化训练器 - 关键修复:传递tokenizer trainer = ContrastiveTrainer( model=model, args=training_args, tokenizer=tokenizer, # 传递tokenizer data_collator=data_collator, train_dataset=train_dataset, eval_dataset=val_dataset, contrastive_config=contrastive_config ) # 开始训练前打印内存状态 print_memory_usage() # 关键修复:验证可训练参数 print("可训练参数列表:") for name, param in model.named_parameters(): if param.requires_grad: print(f"- {name}") # 开始训练 trainer.train() # 保存LoRA适配器 model.save_pretrained("./model/lora_adapter") # 评估模型 try: eval_results = trainer.evaluate() print("评估结果:", eval_results) except Exception as e: print(f"评估过程中发生错误: {e}") import traceback traceback.print_exc()
最新发布
07-22
代码出现问题:(style_tune) C:\Users\28996\Desktop\AI\persona_contrastive_finetuning>python Contrastive_Training_LM.py INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). trainable params: 1,572,864 || all params: 1,838,401,536 || trainable%: 0.0856 训练集样本示例: {'anchor_input_ids': [56568, 118919, 116122, 11319], 'positive_input_ids': [116122, 20412, 107340, 9370, 100357, 102323, 3837, 109202, 104078, 103975, 100675, 101940, 100912, 105054, 6313], 'negative_input_ids': [100323, 104307, 99245, 9370, 106059, 104060, 3837, 104530, 115604, 99329, 11319]} 验证集样本示例: {'anchor_input_ids': [56568, 118919, 116122, 11319], 'positive_input_ids': [116122, 20412, 107340, 9370, 100357, 102323, 3837, 109202, 104078, 103975, 100675, 101940, 100912, 105054, 6313], 'negative_input_ids': [100323, 104307, 99245, 9370, 106059, 104060, 3837, 104530, 115604, 99329, 11319]} Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. INFO:__main__:GPU内存使用: 已分配 2.93GB, 保留 4.13GB 可训练参数列表: - base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.weight - base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.weight - base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.weight - base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.weight - base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.weight 0%| | 0/3 [00:00<?, ?it/s]You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.00GB, 保留 4.21GB Could not estimate the number of tokens of the input, floating-point operations will not be computed Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.02GB, 保留 4.22GB 33%|████████████████████████████ | 1/3 [00:03<00:06, 3.25s/it]Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.01GB, 保留 4.25GB Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.02GB, 保留 4.26GB 67%|████████████████████████████████████████████████████████ | 2/3 [00:06<00:02, 2.98s/it]Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.01GB, 保留 4.25GB Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. INFO:__main__:GPU内存使用: 已分配 4.02GB, 保留 4.26GB {'train_runtime': 9.034, 'train_samples_per_second': 0.664, 'train_steps_per_second': 0.332, 'train_loss': 1.0772175788879395, 'epoch': 3.0} 100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:09<00:00, 3.01s/it] Traceback (most recent call last): File "C:\Users\28996\Desktop\AI\persona_contrastive_finetuning\Contrastive_Training_LM.py", line 356, in <module> eval_results = trainer.evaluate() File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\transformers\trainer.py", line 4076, in evaluate output = eval_loop( File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\transformers\trainer.py", line 4270, in evaluation_loop losses, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\transformers\trainer.py", line 4496, in prediction_step outputs = model(**inputs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl return forward_call(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\accelerate\utils\operations.py", line 818, in forward return model_forward(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\accelerate\utils\operations.py", line 806, in __call__ return convert_to_fp32(self.model_forward(*args, **kwargs)) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\amp\autocast_mode.py", line 44, in decorate_autocast return func(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\peft\peft_model.py", line 1719, in forward return self.base_model( File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl return forward_call(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\peft\tuners\tuners_utils.py", line 197, in forward return self.model.forward(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\transformers\models\qwen2\modeling_qwen2.py", line 816, in forward outputs = self.model( File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl return forward_call(*args, **kwargs) File "C:\Users\28996\miniconda3\envs\style_tune\lib\site-packages\transformers\models\qwen2\modeling_qwen2.py", line 521, in forward raise ValueError("You must specify exactly one of input_ids or inputs_embeds") ValueError: You must specify exactly one of input_ids or inputs_embeds (style_tune) C:\Users\28996\Desktop\AI\persona_contrastive_finetuning>python Contrastive_Training_LM.py Traceback (most recent call last): File "C:\Users\28996\Desktop\AI\persona_contrastive_finetuning\Contrastive_Training_LM.py", line 57, in <module> class ContrastiveTrainer(Trainer): File "C:\Users\28996\Desktop\AI\persona_contrastive_finetuning\Contrastive_Training_LM.py", line 63, in ContrastiveTrainer eval_dataset: Optional[Dataset] = None, NameError: name 'Dataset' is not defined 原代码如下:import torch import torch.nn as nn import torch.nn.functional as F from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, PreTrainedTokenizerBase, BitsAndBytesConfig ) from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers.utils import PaddingStrategy from datasets import load_dataset from typing import Any, Dict, List, Optional, Tuple, Union import logging from dataclasses import dataclass import os import gc from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training @dataclass class EvalDataCollator: """评估专用的数据收集器""" tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None return_tensors: str = "pt" def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # 评估时只使用正样本(用于语言建模评估) positive_features = [{"input_ids": f["positive_input_ids"]} for f in features] # 对正样本进行填充 batch_positive = self.tokenizer.pad( positive_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) # 创建注意力掩码 attention_mask = (batch_positive["input_ids"] != self.tokenizer.pad_token_id).int() # 创建标签(用于语言建模) labels = batch_positive["input_ids"].clone() labels[labels == self.tokenizer.pad_token_id] = -100 return { "input_ids": batch_positive["input_ids"], "attention_mask": attention_mask, "labels": labels } class ContrastiveTrainer(Trainer): """内存优化的训练器""" # ... [保持其他方法不变] ... def evaluate( self, eval_dataset: Optional[Dataset] = None, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "eval", ) -> Dict[str, float]: """重写评估方法以使用专用的数据收集器""" # 创建评估专用的数据收集器 eval_data_collator = EvalDataCollator( tokenizer=self.tokenizer, max_length=256, padding="max_length" ) # 临时保存原始数据收集器 original_collator = self.data_collator try: # 使用评估专用的数据收集器 self.data_collator = eval_data_collator # 调用父类的评估方法 return super().evaluate( eval_dataset=eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix ) finally: # 恢复原始数据收集器 self.data_collator = original_collator # 设置日志 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 内存优化工具函数 def clear_memory(): """清除PythonCUDA缓存""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() def print_memory_usage(): """打印当前内存使用情况""" if torch.cuda.is_available(): allocated = torch.cuda.memory_allocated() / (1024 ** 3) reserved = torch.cuda.memory_reserved() / (1024 ** 3) logger.info(f"GPU内存使用: 已分配 {allocated:.2f}GB, 保留 {reserved:.2f}GB") else: logger.info("未检测到GPU") def tokenize_function(examples, tokenizer, max_length=256): """将文本转换为token IDs""" tokenized = {} # 对每个字段进行分词 for key in ['anchor', 'positive', 'negative']: if key in examples: # 使用分词器处理文本 result = tokenizer( examples[key], max_length=max_length, truncation=True, padding=False, return_tensors=None ) tokenized[f"{key}_input_ids"] = result["input_ids"] return tokenized @dataclass class ContrastiveDataCollator: """内存优化的数据收集器""" tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None return_tensors: str = "pt" def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # 分离出三元组的各个部分 anchor_features = [{"input_ids": f["anchor_input_ids"]} for f in features] positive_features = [{"input_ids": f["positive_input_ids"]} for f in features] negative_features = [{"input_ids": f["negative_input_ids"]} for f in features] # 对每个部分分别进行填充 batch_anchor = self.tokenizer.pad( anchor_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) batch_positive = self.tokenizer.pad( positive_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) batch_negative = self.tokenizer.pad( negative_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors=self.return_tensors, ) # 创建注意力掩码 def create_attention_mask(input_ids): return (input_ids != self.tokenizer.pad_token_id).int() # 释放中间变量内存 del anchor_features, positive_features, negative_features clear_memory() return { "anchor_input_ids": batch_anchor["input_ids"], "anchor_attention_mask": create_attention_mask(batch_anchor["input_ids"]), "positive_input_ids": batch_positive["input_ids"], "positive_attention_mask": create_attention_mask(batch_positive["input_ids"]), "negative_input_ids": batch_negative["input_ids"], "negative_attention_mask": create_attention_mask(batch_negative["input_ids"]), } class ContrastiveTrainer(Trainer): """内存优化的训练器""" def __init__(self, tokenizer=None, *args, contrastive_config=None, **kwargs): # 首先调用父类初始化 super().__init__(*args, **kwargs) # 关键修复:设置tokenizer self.tokenizer = tokenizer if contrastive_config is None: contrastive_config = {} # 设置默认值 self.temperature = contrastive_config.get("temperature", 0.07) self.margin = contrastive_config.get("margin", 0.3) self.contrastive_weight = contrastive_config.get("weight", 0.8) self.repr_layer = contrastive_config.get("repr_layer", -1) # 验证必要参数 if not hasattr(self.model.config, "output_hidden_states") or not self.model.config.output_hidden_states: raise ValueError("模型必须设置output_hidden_states=True") self.cross_entropy = nn.CrossEntropyLoss() def compute_contrastive_loss(self, anchor_emb, pos_emb, neg_emb): """计算对比损失""" # 计算余弦相似度 pos_sim = F.cosine_similarity(anchor_emb, pos_emb) neg_sim = F.cosine_similarity(anchor_emb, neg_emb) # 计算InfoNCE损失 numerator = torch.exp(pos_sim / self.temperature) denominator = numerator + torch.exp(neg_sim / self.temperature) info_nce_loss = -torch.log(numerator / (denominator + 1e-8)).mean() # 计算三元组损失 triplet_loss = F.relu(neg_sim - pos_sim + self.margin).mean() return info_nce_loss + triplet_loss def get_sequence_representation(self, outputs, attention_mask): """获取序列表示(内存优化版)""" # 只获取需要的隐藏状态层 hidden_states = outputs.hidden_states[self.repr_layer] # 获取每个序列的最后一个非填充token seq_lengths = attention_mask.sum(dim=1) - 1 batch_indices = torch.arange(hidden_states.size(0)) # 返回对应位置的隐藏状态 return hidden_states[batch_indices, seq_lengths] def compute_loss(self, model, inputs, return_outputs=False): """内存优化的损失计算""" # 确保模型处于训练模式 model.train() # 提取输入 anchor_ids = inputs["anchor_input_ids"] anchor_mask = inputs["anchor_attention_mask"] positive_ids = inputs["positive_input_ids"] positive_mask = inputs["positive_attention_mask"] negative_ids = inputs["negative_input_ids"] negative_mask = inputs["negative_attention_mask"] # 前向传播获取隐藏状态 def get_embeddings(input_ids, attention_mask): outputs = model( input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True ) return self.get_sequence_representation(outputs, attention_mask) # 获取三元组的嵌入表示 anchor_emb = get_embeddings(anchor_ids, anchor_mask) pos_emb = get_embeddings(positive_ids, positive_mask) neg_emb = get_embeddings(negative_ids, negative_mask) # 计算对比损失 cl_loss = self.compute_contrastive_loss(anchor_emb, pos_emb, neg_emb) cl_loss = cl_loss * self.contrastive_weight # 关键修复:确保tokenizer已设置 if self.tokenizer is None: raise ValueError("Tokenizer未设置!") # 计算语言建模损失 lm_labels = positive_ids.clone() # 关键修复:使用tokenizer的pad_token_id pad_token_id = self.tokenizer.pad_token_id lm_labels[lm_labels == pad_token_id] = -100 # 计算语言建模损失 lm_outputs = model( input_ids=positive_ids, attention_mask=positive_mask, labels=lm_labels ) lm_loss = lm_outputs.loss # 总损失 = LM损失 + 对比损失 total_loss = lm_loss + cl_loss # 记录内存使用 print_memory_usage() return (total_loss, lm_outputs) if return_outputs else total_loss # ================ 主程序 ================ # if __name__ == "__main__": # 配置量化以减少内存使用 bnb_config = BitsAndBytesConfig( load_in_4bit=True, # 使用4位量化 bnb_4bit_quant_type="nf4", # 使用NF4量化类型 bnb_4bit_use_double_quant=True, # 双重量化 bnb_4bit_compute_dtype=torch.float16 # 计算使用FP16 ) # 加载模型分词器(使用量化) model = AutoModelForCausalLM.from_pretrained( "model/Qwen/Qwen1.5-1.8B", quantization_config=bnb_config, # 应用量化配置 device_map="auto", # 自动选择设备 output_hidden_states=True, # 必须设置以获取隐藏状态 return_dict_in_generate=True, use_cache=False # 禁用缓存以节省内存 ) tokenizer = AutoTokenizer.from_pretrained("model/Qwen/Qwen1.5-1.8B") tokenizer.pad_token = tokenizer.eos_token # 设置填充token # 为量化模型添加LoRA适配器 lora_config = LoraConfig( r=8, lora_alpha=32, target_modules=["q_proj", "v_proj"], # 针对Qwen1.5-1.8B模型 lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) # 关键修复:准备模型用于k位训练 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) # 添加LoRA适配器 model = get_peft_model(model, lora_config) # 关键修复:显式启用LoRA参数的梯度 for param in model.parameters(): if param.requires_grad: param.requires_grad = True model.print_trainable_parameters() # 打印可训练参数数量 # 加载数据集 def load_and_tokenize_dataset(file_path, tokenizer): """加载数据集并进行分词处理""" # 加载原始数据集 dataset_dict = load_dataset('json', data_files=file_path) raw_dataset = dataset_dict['train'] # 应用分词函数 tokenized_dataset = raw_dataset.map( lambda ex: tokenize_function(ex, tokenizer, max_length=256), batched=True, batch_size=8, # 减小批处理大小 remove_columns=['anchor', 'positive', 'negative'] ) return tokenized_dataset train_dataset = load_and_tokenize_dataset('data/processed/train_style_triplets.json', tokenizer) val_dataset = load_and_tokenize_dataset('data/processed/val_style_triplets.json', tokenizer) # 验证数据集格式 print("训练集样本示例:", train_dataset[0]) print("验证集样本示例:", val_dataset[0]) # 训练参数配置(内存优化) training_args = TrainingArguments( output_dir="./model/lora_adapter", per_device_train_batch_size=1, # 减小批量大小 gradient_accumulation_steps=8, # 增加梯度累积步数 num_train_epochs=3, learning_rate=2e-4, logging_steps=10, # 更频繁的日志记录以监控内存 save_steps=500, fp16=True, report_to="none", remove_unused_columns=False, gradient_checkpointing=True, # 启用梯度检查点 optim="adafactor", # 使用内存更少的优化器 ) # 对比学习配置 contrastive_config = { "temperature": 0.07, "margin": 0.3, "weight": 0.8, "repr_layer": -1 } # 初始化数据收集器 data_collator = ContrastiveDataCollator( tokenizer=tokenizer, max_length=256, # 减少最大长度 padding="max_length" ) # 初始化训练器 - 关键修复:传递tokenizer trainer = ContrastiveTrainer( model=model, args=training_args, tokenizer=tokenizer, # 传递tokenizer data_collator=data_collator, train_dataset=train_dataset, eval_dataset=val_dataset, contrastive_config=contrastive_config ) # 开始训练前打印内存状态 print_memory_usage() # 关键修复:验证可训练参数 print("可训练参数列表:") for name, param in model.named_parameters(): if param.requires_grad: print(f"- {name}") # 开始训练 trainer.train() # 保存LoRA适配器 model.save_pretrained("./model/lora_adapter") # 评估模型 try: eval_results = trainer.evaluate() print("评估结果:", eval_results) except Exception as e: print(f"评估过程中发生错误: {e}") import traceback traceback.print_exc()
07-21
mport ctypes import os import shutil import random import sys import threading import time import cv2 import numpy as np import pycuda.autoinit import pycuda.driver as cuda import tensorrt as trt CONF_THRESH = 0.8 IOU_THRESHOLD = 0.6 def get_img_path_batches(batch_size, img_dir): ret = [] batch = [] for root, dirs, files in os.walk(img_dir): for name in files: if len(batch) == batch_size: ret.append(batch) batch = [] batch.append(os.path.join(root, name)) if len(batch) > 0: ret.append(batch) return ret def plot_one_box(x, img, color=None, label=None, line_thickness=None): """ description: Plots one bounding box on image img, this function comes from YoLov5 project. param: x: a box likes [x1,y1,x2,y2] img: a opencv image object color: color to draw rectangle, such as (0,255,0) label: str line_thickness: int return: no return """ tl = ( line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 ) # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) if label: tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText( img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, ) class YoLov5TRT(object): """ description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops. """ def __init__(self, engine_file_path): # Create a Context on this device, self.ctx = cuda.Device(0).make_context() stream = cuda.Stream() TRT_LOGGER = trt.Logger(trt.Logger.INFO) runtime = trt.Runtime(TRT_LOGGER) # Deserialize the engine from file with open(engine_file_path, "rb") as f: engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] for binding in engine: # print('bingding:', binding, engine.get_binding_shape(binding)) size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype) cuda_mem = cuda.mem_alloc(host_mem.nbytes) # Append the device buffer to device bindings. bindings.append(int(cuda_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): self.input_w = engine.get_binding_shape(binding)[-1] self.input_h = engine.get_binding_shape(binding)[-2] host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) # Store self.stream = stream self.context = context self.engine = engine self.host_inputs = host_inputs self.cuda_inputs = cuda_inputs self.host_outputs = host_outputs self.cuda_outputs = cuda_outputs self.bindings = bindings self.batch_size = 1 def infer(self, raw_image_generator): # threading.Thread.__init__(self) # Make self the active context, pushing it on top of the context stack. self.ctx.push() # Restore stream = self.stream context = self.context engine = self.engine host_inputs = self.host_inputs cuda_inputs = self.cuda_inputs host_outputs = self.host_outputs cuda_outputs = self.cuda_outputs bindings = self.bindings # Do image preprocess batch_image_raw = [] batch_origin_h = [] batch_origin_w = [] batch_input_image = np.empty(shape=[self.batch_size, 3, self.input_h, self.input_w]) input_image, image_raw, origin_h, origin_w = self.preprocess_image(raw_image_generator) batch_image_raw.append(image_raw) batch_origin_h.append(origin_h) batch_origin_w.append(origin_w) np.copyto(batch_input_image[0], input_image) batch_input_image = np.ascontiguousarray(batch_input_image) # Copy input image to host buffer np.copyto(host_inputs[0], batch_input_image.ravel()) start = time.time() # Transfer input data to the GPU. cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) # Run inference. context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle) # Transfer predictions back from the GPU. cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) # Synchronize the stream stream.synchronize() end = time.time() # Remove any context from the top of the context stack, deactivating it. self.ctx.pop() # Here we use the first row of output in that batch_size = 1 output = host_outputs[0] # Do postprocess for i in range(self.batch_size): result_boxes, result_scores, result_classid = self.post_process(output[i * 6001: (i + 1) * 6001], batch_origin_h[i], batch_origin_w[i]) return result_boxes, result_scores, result_classid, end - start # # Draw rectangles and labels on the original image # for j in range(len(result_boxes)): # box = result_boxes[j] # plot_one_box( # box, # batch_image_raw[i], # label="{}:{:.2f}".format( # categories[int(result_classid[j])], result_scores[j] # ), # ) def destroy(self): # Remove any context from the top of the context stack, deactivating it. self.ctx.pop() def get_raw_image(self, image_path_batch): """ description: Read an image from image path """ for img_path in image_path_batch: yield cv2.imread(img_path) def get_raw_image_zeros(self, image_path_batch=None): """ description: Ready data for warmup """ for _ in range(self.batch_size): yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8) def preprocess_image(self, raw_bgr_image): """ description: Convert BGR image to RGB, resize and pad it to target size, normalize to [0,1], transform to NCHW format. param: input_image_path: str, image path return: image: the processed image image_raw: the original image h: original height w: original width """ image_raw = raw_bgr_image h, w, c = image_raw.shape image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) # Calculate widht and height and paddings r_w = self.input_w / w r_h = self.input_h / h if r_h > r_w: tw = self.input_w th = int(r_w * h) tx1 = tx2 = 0 ty1 = int((self.input_h - th) / 2) ty2 = self.input_h - th - ty1 else: tw = int(r_h * w) th = self.input_h tx1 = int((self.input_w - tw) / 2) tx2 = self.input_w - tw - tx1 ty1 = ty2 = 0 # Resize the image with long side while maintaining ratio image = cv2.resize(image, (tw, th)) # Pad the short side with (128,128,128) image = cv2.copyMakeBorder( image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128) ) image = image.astype(np.float32) # Normalize to [0,1] image /= 255.0 # HWC to CHW format: image = np.transpose(image, [2, 0, 1]) # CHW to NCHW format image = np.expand_dims(image, axis=0) # Convert the image to row-major order, also known as "C order": image = np.ascontiguousarray(image) return image, image_raw, h, w def xywh2xyxy(self, origin_h, origin_w, x): """ description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right param: origin_h: height of original image origin_w: width of original image x: A boxes numpy, each row is a box [center_x, center_y, w, h] return: y: A boxes numpy, each row is a box [x1, y1, x2, y2] """ y = np.zeros_like(x) r_w = self.input_w / origin_w r_h = self.input_h / origin_h if r_h > r_w: y[:, 0] = x[:, 0] - x[:, 2] / 2 y[:, 2] = x[:, 0] + x[:, 2] / 2 y[:, 1] = x[:, 1] - x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2 y[:, 3] = x[:, 1] + x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2 y /= r_w else: y[:, 0] = x[:, 0] - x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2 y[:, 2] = x[:, 0] + x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2 y[:, 1] = x[:, 1] - x[:, 3] / 2 y[:, 3] = x[:, 1] + x[:, 3] / 2 y /= r_h return y def post_process(self, output, origin_h, origin_w): """ description: postprocess the prediction param: output: A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...] origin_h: height of original image origin_w: width of original image return: result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2] result_scores: finally scores, a numpy, each element is the score correspoing to box result_classid: finally classid, a numpy, each element is the classid correspoing to box """ # Get the num of boxes detected num = int(output[0]) # Reshape to a two dimentional ndarray pred = np.reshape(output[1:], (-1, 6))[:num, :] # Do nms boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD) result_boxes = boxes[:, :4] if len(boxes) else np.array([]) result_scores = boxes[:, 4] if len(boxes) else np.array([]) result_classid = boxes[:, 5] if len(boxes) else np.array([]) return result_boxes, result_scores, result_classid def bbox_iou(self, box1, box2, x1y1x2y2=True): """ description: compute the IoU of two bounding boxes param: box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h)) box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h)) x1y1x2y2: select the coordinate format return: iou: computed iou """ if not x1y1x2y2: # Transform from center and width to exact coordinates b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 else: # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # Get the coordinates of the intersection rectangle inter_rect_x1 = np.maximum(b1_x1, b2_x1) inter_rect_y1 = np.maximum(b1_y1, b2_y1) inter_rect_x2 = np.minimum(b1_x2, b2_x2) inter_rect_y2 = np.minimum(b1_y2, b2_y2) # Intersection area inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \ np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) return iou def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4): """ description: Removes detections with lower object confidence score than 'conf_thres' and performs Non-Maximum Suppression to further filter detections. param: prediction: detections, (x1, y1, x2, y2, conf, cls_id) origin_h: original image height origin_w: original image width conf_thres: a confidence threshold to filter detections nms_thres: a iou threshold to filter detections return: boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id) """ # Get the boxes that score > CONF_THRESH boxes = prediction[prediction[:, 4] >= conf_thres] # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2] boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4]) # clip the coordinates boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w -1) boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w -1) boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h -1) boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h -1) # Object confidence confs = boxes[:, 4] # Sort by the confs boxes = boxes[np.argsort(-confs)] # Perform non-maximum suppression keep_boxes = [] while boxes.shape[0]: large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres label_match = boxes[0, -1] == boxes[:, -1] # Indices of boxes with lower confidence scores, large IOUs and matching labels invalid = large_overlap & label_match keep_boxes += [boxes[0]] boxes = boxes[~invalid] boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([]) return boxes # class inferThread(threading.Thread): # def __init__(self, yolov5_wrapper, image_path_batch): # threading.Thread.__init__(self) # self.yolov5_wrapper = yolov5_wrapper # self.image_path_batch = image_path_batch # def run(self): # batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image(self.image_path_batch)) # for i, img_path in enumerate(self.image_path_batch): # parent, filename = os.path.split(img_path) # save_name = os.path.join('output', filename) # # Save image # cv2.imwrite(save_name, batch_image_raw[i]) # print('input->{}, time->{:.2f}ms, saving into output/'.format(self.image_path_batch, use_time * 1000)) class warmUpThread(threading.Thread): def __init__(self, yolov5_wrapper): threading.Thread.__init__(self) self.yolov5_wrapper = yolov5_wrapper def run(self): batch_image_raw, use_time = self.yolov5_wrapper.infer(self.yolov5_wrapper.get_raw_image_zeros()) print('warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000)) # def get_max_area(img , boxs, result_classid, result_scores): # max_area = 0 # cls_id = 10 # score_1=0 # for box, class_id, score in zip(boxs, result_classid, result_scores): # x1, y1, x2, y2 = box # area = (x2-x1)*(y2-y1) # if area > 100: # cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), # thickness=3, lineType=cv2.LINE_AA) # if area > max_area: # max_area = int(area) # cls_id = int(class_id) # score_1=score # return cls_id, max_area, score_1 def get_max_area(boxs, result_classid , result_scores): max_area = 0 cls_id = 0 _score = 0 max_box=[] for box, class_id ,score in zip(boxs, result_classid,result_scores): x1, y1, x2, y2 = box area = (y2-y1)*(y2-y1) if area > max_area: max_area = int(area) cls_id = int(class_id) max_box = box _score = score return cls_id, max_area, max_box ,_score def draw_rerectangle(frame,box,cls,score,area): x1, y1, x2, y2 = box cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), thickness=3, lineType=cv2.LINE_AA) cv2.putText(frame, str("cls_id:%s" % cls), (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(frame, str("area:%d" % area), (20, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(frame, str("score:%f" % score), (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if __name__ == "__main__": # load custom plugin and engine PLUGIN_LIBRARY = "/media/jian/My Passport/资料/car/ai_control-FULL-3.0(14)/ai_control-FULL-3.0/src/opencv_detection/scripts/small_yolo/libmyplugins.so" engine_file_path = "/media/jian/My Passport/资料/car/ai_control-FULL-3.0(14)/ai_control-FULL-3.0/src/opencv_detection/scripts/small_yolo/small.engine" if len(sys.argv) > 1: engine_file_path = sys.argv[1] if len(sys.argv) > 2: PLUGIN_LIBRARY = sys.argv[2] ctypes.CDLL(PLUGIN_LIBRARY) # load coco labels categories = ["danger","side_walk","speed","speed_limit","turn_left","turn_right","lane_change"] # a YoLov5TRT instance yolov5_wrapper = YoLov5TRT(engine_file_path) cap = cv2.VideoCapture(0,cv2.CAP_V4L2) try: while 1: ret, img = cap.read() if not ret: continue image = cv2.resize(img, (320, 240), interpolation=cv2.INTER_AREA) #w*h # print(image.shape) # frame = image[:224, 96:320, :] #h*w result_boxes, result_scores, result_classid, use_time = yolov5_wrapper.infer( image) cls_id = 10 area = 0 if (len(result_classid) != 0): cls_id, area, score = get_max_area(image,result_boxes, result_classid,result_scores) cv2.putText(image, str("cls_id:%s" % categories[cls_id]), (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(image, str("area:%d" % area), (20, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(image, str("score:%f" % score), (20, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow("frame", image) if cv2.waitKey(1) & 0xFF == ord('q'): break finally: # destroy the instance cap.release() cv2.destroyAllWindows() yolov5_wrapper.destroy()
07-07
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值