Tokens on the Segments

https://vjudge.net/problem/ZOJ-4120

如果一起结束,就让开始早的先选

如果不是这样,就让先结束的先选

3

1 3 

1 3

2 2

做题时,最好有一个人,不知道另外两个人的算法

set 扔外边,注意清空

#include<bits/stdc++.h> 
using namespace std;
const int N=1e5+7;
struct node{
	int l,r;
}se[N];

bool cmp(node a,node b){
	if(a.r==b.r)return a.l<b.l;
	else return a.r<b.r;
}
set<int> si;
int main(){
	int t;scanf("%d",&t);
	while(t--){
		//memset(pp,0,sizeof(pp));
		//printf("%d\n",pp[0]);
		si.clear();
		int n;scanf("%d",&n);
		for(int i=0;i<n;i++)scanf("%d%d",&se[i].l,&se[i].r);
		sort(se,se+n,cmp);
//		for(int i=0;i<n;i++)
//		printf("%d\n",se[i].l);
		int num=0;
		for(int i=0;i<n;i++){
			for(int j=se[i].l;j<=se[i].r;j++){
//				if(pp[j]==0){
//					pp[j]=1;num++;break;
//				}
				if(si.find(j)==si.end()){
					si.insert(j);num++;break;
				}
			}
		}
		printf("%d\n",num);
	}
	return 0;
}

 

import os import torch import transformers from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForLanguageModeling, BitsAndBytesConfig, Trainer ) from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from datasets import load_dataset import logging import psutil import gc from datetime import datetime # === 配置区域 === MODEL_NAME = "/home/vipuser/ai_writer_project_final_with_fixed_output_ui/models/Yi-6B" DATASET_PATH = "./data/train_lora_formatted.jsonl" OUTPUT_DIR = "./yi6b-lora-optimized" DEVICE_MAP = "auto" # 使用自动设备映射 # 确保输出目录存在 os.makedirs(OUTPUT_DIR, exist_ok=True) # === 增强的日志系统 === def setup_logging(output_dir): """配置日志系统,支持文件和TensorBoard""" # 主日志配置 logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # 文件日志处理器 file_handler = logging.FileHandler(os.path.join(output_dir, "training.log")) file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger.addHandler(file_handler) # 控制台日志处理器 console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(console_handler) # TensorBoard日志目录 tensorboard_log_dir = os.path.join(output_dir, "logs", datetime.now().strftime("%Y%m%d-%H%M%S")) os.makedirs(tensorboard_log_dir, exist_ok=True) # 安装TensorBoard回调 tb_writer = None try: from torch.utils.tensorboard import SummaryWriter tb_writer = SummaryWriter(log_dir=tensorboard_log_dir) logger.info(f"TensorBoard日志目录: {tensorboard_log_dir}") except ImportError: logger.warning("TensorBoard未安装,可视化功能不可用") return logger, tb_writer logger, tb_writer = setup_logging(OUTPUT_DIR) # === 量化配置 - 使用更高效的配置 === quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) # === 加载模型 === logger.info("加载预训练模型...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map=DEVICE_MAP, quantization_config=quant_config, torch_dtype=torch.bfloat16, trust_remote_code=True ) # === 分词器处理 === logger.info("加载分词器...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) tokenizer.padding_side = "right" if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token_id = tokenizer.eos_token_id # === 准备模型训练 === model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False) # 关闭梯度检查点以加速训练 # === LoRA 配置 - 增加参数以提升模型能力 === logger.info("配置LoRA...") lora_config = LoraConfig( r=128, # 增加rank以提升模型能力 lora_alpha=64, # 增加alpha值 target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # 增加更多目标模块 lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, lora_config) # 记录可训练参数 trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in model.parameters()) logger.info(f"可训练参数: {trainable_params:,} / 总参数: {total_params:,} ({trainable_params / total_params:.2%})") # === 加载并预处理数据集 === logger.info("加载和预处理数据集...") dataset = load_dataset("json", data_files=DATASET_PATH, split="train") # 文本过滤函数 def is_valid_text(example): text = example.get("text", "") return text is not None and len(text.strip()) > 200 # 增加最小长度要求 dataset = dataset.filter(is_valid_text) logger.info(f"过滤后数据集大小: {len(dataset)} 条") # 分词函数 - 使用更大的上下文窗口 def tokenize_function(examples): tokenized = tokenizer( examples["text"], padding="max_length", truncation=True, max_length=2048, # 增加上下文长度 ) # 创建 labels - 因果语言建模需要 labels = input_ids tokenized["labels"] = tokenized["input_ids"].copy() return tokenized tokenized_dataset = dataset.map( tokenize_function, batched=True, remove_columns=["text"], batch_size=128, # 增加批处理大小以加速处理 num_proc=8, # 使用更多进程 ) # === 数据整理器 === data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False # 因果语言建模 ) # === 训练参数 - 优化以利用A100 80GB显存 === report_to_list = ["tensorboard"] if tb_writer else [] training_args = TrainingArguments( output_dir=OUTPUT_DIR, per_device_train_batch_size=16, # 大幅增加批次大小 gradient_accumulation_steps=1, # 减少梯度累积步数 learning_rate=2e-5, # 提高学习率 num_train_epochs=3, # 增加训练轮数 logging_steps=50, save_strategy="steps", save_steps=500, # 每500步保存一次 bf16=True, optim="paged_adamw_32bit", # 使用更稳定的优化器 report_to=report_to_list, # 如果TensorBoard可用则报告 warmup_ratio=0.05, # 增加预热比例 gradient_checkpointing=False, # 关闭梯度检查点以加速训练 fp16=False, max_grad_norm=1.0, # 增加梯度裁剪阈值 remove_unused_columns=False, dataloader_num_workers=8, # 增加数据加载工作线程 evaluation_strategy="steps", # 添加验证步骤 eval_steps=500, # 每500步验证一次 save_total_limit=3, # 保存更多检查点 logging_dir=os.path.join(OUTPUT_DIR, "logs"), load_best_model_at_end=True, # 训练结束时加载最佳模型 ddp_find_unused_parameters=False, logging_first_step=True, group_by_length=True, # 按长度分组以加速训练 lr_scheduler_type="cosine", # 使用余弦学习率调度器 weight_decay=0.01, # 添加权重衰减 ) # === GPU监控工具 === def monitor_gpu(): """监控GPU使用情况""" if torch.cuda.is_available(): device = torch.device("cuda") mem_alloc = torch.cuda.memory_allocated(device) / 1024 ** 3 mem_reserved = torch.cuda.memory_reserved(device) / 1024 ** 3 mem_total = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 return { "allocated": f"{mem_alloc:.2f} GB", "reserved": f"{mem_reserved:.2f} GB", "total": f"{mem_total:.2f} GB", "utilization": f"{mem_alloc / mem_total * 100:.1f}%" } return {} # === 创建训练器 === eval_dataset = None if len(tokenized_dataset) > 100: eval_dataset = tokenized_dataset.select(range(100)) trainer = Trainer( model=model, tokenizer=tokenizer, args=training_args, train_dataset=tokenized_dataset, eval_dataset=eval_dataset, data_collator=data_collator, ) # === 训练前验证 === def validate_data_and_model(): """验证数据和模型是否准备好训练""" logger.info("\n=== 训练前验证 ===") # 检查样本格式 sample = tokenized_dataset[0] logger.info(f"样本键: {list(sample.keys())}") logger.info(f"input_ids 长度: {len(sample['input_ids'])}") logger.info(f"labels 长度: {len(sample['labels'])}") # 创建测试批次 test_batch = data_collator([sample, tokenized_dataset[1]]) # 移动数据到设备 test_batch = {k: v.to(model.device) for k, v in test_batch.items()} # 前向传播测试 model.train() outputs = model(**test_batch) loss_value = outputs.loss.item() logger.info(f"测试批次损失: {loss_value:.4f}") # 记录到TensorBoard if tb_writer: tb_writer.add_scalar("debug/test_loss", loss_value, 0) # 反向传播测试 outputs.loss.backward() logger.info("反向传播成功!") # 重置梯度 model.zero_grad() logger.info("验证完成,准备开始训练\n") # 记录初始GPU使用情况 gpu_status = monitor_gpu() logger.info(f"初始GPU状态: {gpu_status}") # 记录到TensorBoard if tb_writer: tb_writer.add_text("system/initial_gpu", str(gpu_status), 0) validate_data_and_model() # === 自定义回调 - 监控资源使用 === class ResourceMonitorCallback(transformers.TrainerCallback): def __init__(self, tb_writer=None): self.tb_writer = tb_writer self.start_time = datetime.now() def on_step_end(self, args, state, control, **kwargs): if state.global_step % 50 == 0: # GPU监控 gpu_status = monitor_gpu() logger.info(f"Step {state.global_step} - GPU状态: {gpu_status}") # CPU和内存监控 cpu_percent = psutil.cpu_percent() mem = psutil.virtual_memory() logger.info( f"CPU使用率: {cpu_percent}%, 内存使用: {mem.used / 1024 ** 3:.2f}GB/{mem.total / 1024 ** 3:.2f}GB") # 记录到TensorBoard if self.tb_writer: # GPU显存使用 if torch.cuda.is_available(): device = torch.device("cuda") mem_alloc = torch.cuda.memory_allocated(device) / 1024 ** 3 self.tb_writer.add_scalar("system/gpu_mem", mem_alloc, state.global_step) # CPU使用率 self.tb_writer.add_scalar("system/cpu_usage", cpu_percent, state.global_step) # 系统内存使用 self.tb_writer.add_scalar("system/ram_usage", mem.used / 1024 ** 3, state.global_step) def on_log(self, args, state, control, logs=None, **kwargs): """记录训练指标到TensorBoard""" if self.tb_writer and logs is not None: for metric_name, metric_value in logs.items(): if "loss" in metric_name or "lr" in metric_name or "grad_norm" in metric_name: self.tb_writer.add_scalar(f"train/{metric_name}", metric_value, state.global_step) def on_train_end(self, args, state, control, **kwargs): """训练结束时记录总时间""" training_time = datetime.now() - self.start_time logger.info(f"训练总时间: {training_time}") if self.tb_writer: self.tb_writer.add_text("system/total_time", str(training_time)) # 添加回调 trainer.add_callback(ResourceMonitorCallback(tb_writer=tb_writer)) # === 启动训练 === try: logger.info("开始训练...") train_result = trainer.train() # 保存训练指标 metrics = train_result.metrics trainer.log_metrics("train", metrics) trainer.save_metrics("train", metrics) # 保存最佳模型 trainer.save_model(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) logger.info(f"训练完成! 模型保存在: {OUTPUT_DIR}") # 记录最终指标到TensorBoard if tb_writer: for metric_name, metric_value in metrics.items(): tb_writer.add_scalar(f"final/{metric_name}", metric_value) tb_writer.close() except Exception as e: logger.error(f"训练出错: {e}") import traceback logger.error(traceback.format_exc()) # 尝试小批量训练 logger.info("\n尝试小批量训练...") small_dataset = tokenized_dataset.select(range(100)) trainer.train_dataset = small_dataset trainer.train() # 保存模型 trainer.save_model(f"{OUTPUT_DIR}_small") tokenizer.save_pretrained(f"{OUTPUT_DIR}_small") logger.info(f"小批量训练完成! 模型保存在: {OUTPUT_DIR}_small") # 记录错误到TensorBoard if tb_writer: tb_writer.add_text("error/exception", traceback.format_exc()) # 清理内存 del model del trainer gc.collect() torch.cuda.empty_cache() # === 训练后验证 === def validate_final_model(): """验证训练后的模型""" logger.info("\n=== 训练后验证 ===") # 加载保存的模型 from peft import PeftModel # 仅加载基础模型配置 base_model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map=DEVICE_MAP, quantization_config=quant_config, torch_dtype=torch.bfloat16, trust_remote_code=True, load_in_4bit=True ) # 加载LoRA适配器 peft_model = PeftModel.from_pretrained(base_model, OUTPUT_DIR) # 合并LoRA权重 merged_model = peft_model.merge_and_unload() # 测试生成 prompt = "中国的首都是" inputs = tokenizer(prompt, return_tensors="pt").to(merged_model.device) outputs = merged_model.generate( **inputs, max_new_tokens=100, # 生成长度增加 temperature=0.7, top_p=0.9, # 添加top-p采样 repetition_penalty=1.2, # 添加重复惩罚 do_sample=True ) generated = tokenizer.decode(outputs[0], skip_special_tokens=True) logger.info(f"提示: {prompt}") logger.info(f"生成结果: {generated}") # 记录到TensorBoard if tb_writer: tb_writer.add_text("validation/sample", f"提示: {prompt}\n生成: {generated}") # 更全面的测试 test_prompts = [ "人工智能的未来发展趋势是", "如何学习深度学习?", "写一个关于太空探索的短故事:" ] for i, test_prompt in enumerate(test_prompts): inputs = tokenizer(test_prompt, return_tensors="pt").to(merged_model.device) outputs = merged_model.generate( **inputs, max_new_tokens=150, temperature=0.7, top_p=0.9, repetition_penalty=1.2, do_sample=True ) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) logger.info(f"\n提示: {test_prompt}\n生成: {generated_text}\n{'=' * 50}") # 记录到TensorBoard if tb_writer: tb_writer.add_text(f"validation/test_{i}", f"提示: {test_prompt}\n生成: {generated_text}") logger.info("验证完成") # 执行验证 validate_final_model() # 关闭TensorBoard写入器 if tb_writer: tb_writer.close() logger.info("TensorBoard日志已关闭") torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 688.00 MiB. GPU 0 has a total capacity of 79.15 GiB of which 652.12 MiB is free. Including non-PyTorch memory, this process has 78.49 GiB memory in use. Of the allocated memory 76.57 GiB is allocated by PyTorch, and 1.42 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) 0%| | 0/21 [00:00<?, ?it/s]
最新发布
07-14
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值