IdPop3 出现 Max line length exceeded.的解决方法

本文解决了一个邮件程序在加载过程中出现的Maxlinelengthexceeded异常问题。通过调整IdSSLIOHandlerSocketOpenSSL1组件的MaxLineLength属性从16384增加到65536,成功解决了Messagecannotload的问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

邮件程序显示抛出Max line length exceeded.异常,后又提示Message can not load。跟踪源码发现如下代码:
  if AMaxLineLength < 0 then begin
    AMaxLineLength := MaxLineLength;
  end;
  // User may pass '' if they need to pass arguments beyond the first.
  if ATerminator = '' then begin
    ATerminator := LF;
  end;


里的MaxLineLength只有16384,试着将IdPop3关联的IdSSLIOHandlerSocketOpenSSL1的MaxLineLength设为65536,再运行,通过!

import numpy as np from tqdm import tqdm import tensorflow as tf import pandas as pd import torch import re from sklearn.model_selection import train_test_split CHEM_FORMULA_SIZE = "([A-Z][a-z]*)([0-9]*)" VALID_ELEMENTS = [ "C", "N", "P", "O", "S", "Si", "I", "H", "Cl", "F", "Br", "B", "Se", "Fe", "Co", "As", "K", "Na", ] ELEMENT_VECTORS = np.eye(len(VALID_ELEMENTS)) element_to_position = dict(zip(VALID_ELEMENTS, ELEMENT_VECTORS)) def formula_to_dense(chem_formula: str) -> np.ndarray: total_onehot = [] for (chem_symbol, num) in re.findall(CHEM_FORMULA_SIZE, chem_formula): num = 1 if num == "" else int(num) one_hot = element_to_position[chem_symbol].reshape(1, -1) one_hot_repeats = np.repeat(one_hot, repeats=num, axis=0) total_onehot.append(one_hot_repeats) if len(total_onehot) == 0: dense_vec = np.zeros(len(element_to_position)) else: dense_vec = np.vstack(total_onehot).sum(0) return dense_vec def sine_embed(v, max_count=256): num_freqs = int(np.ceil(np.log2(max_count))) freqs = 0.5 ** torch.arange(num_freqs, dtype=torch.float32) * np.pi v_tensor = torch.tensor(v, dtype=torch.float32)[:, None] embedded = torch.sin(v_tensor * freqs[None, :]) return torch.abs(embedded).numpy() def encode_formula(formula: str): candidate_features = formula_to_dense(formula) # 将单个化学式转为特征向量 sine_embeddings = sine_embed(candidate_features) return sine_embeddings.flatten() def positional_encoding(max_position, d_model, min_freq=1e-6): position = np.arange(max_position) freqs = min_freq**(2*(np.arange(d_model)//2)/d_model) pos_enc = position.reshape(-1,1)*freqs.reshape(1,-1) pos_enc[:, ::2] = np.cos(pos_enc[:, ::2]) pos_enc[:, 1::2] = np.sin(pos_enc[:, 1::2]) return pos_enc P=positional_encoding(2000000,256, min_freq=1e2) dimn=256 def encoding(rag_tensor,P,dimn): to_pad=[] for sample in rag_tensor: all_dim=[sample[0].numpy().tolist()] pos_enc=[P[int(i)-1] for i in sample[1].numpy().tolist()] for dim in range(dimn): dim_n=[i[dim] for i in pos_enc] all_dim.append(dim_n) to_pad.append(all_dim) to_pad=[tf.keras.preprocessing.sequence.pad_sequences(i,maxlen=501,dtype=&#39;float32&#39;,padding=&#39;post&#39;,truncating=&#39;post&#39;,value=10) for i in to_pad] to_pad=np.stack((to_pad)) to_pad=np.swapaxes(to_pad, 1, -1) return to_pad def trun_n_d(n,d): return ( n if not n.find(&#39;.&#39;)+1 else n[:n.find(&#39;.&#39;)+d+1] ) def prepro_specs_train(df): df = df.reset_index(drop=True) valid = [] mz_intensity = df[&#39;Spectrum&#39;].to_list() def process_line(line): pairs = line.split() mz_list = [] intensity_list = [] for pair in pairs: mz, intensity = pair.split(&#39;:&#39;) mz_list.append(float(mz)) intensity_list.append(float(intensity)) return mz_list, intensity_list for idx, intensities in tqdm(enumerate(mz_intensity)): mz_list, intensity_list = process_line(intensities) mz_list.append(float(df.at[idx, &#39;Total Exact Mass&#39;])) round_mz_list = [round(float(mz), 2) for mz in mz_list] round_intensity_list = [round(float(intensity), 2) for intensity in intensity_list] valid.append([round_mz_list, round_intensity_list]) return tf.ragged.constant(valid) import json import torch from typing import Dict, List from torch.utils.data import Dataset import transformers from peft import LoraConfig, TaskType, get_peft_model from torch.utils.data import DataLoader, SequentialSampler from transformers import Trainer, TrainingArguments from lora_plus import LoraPlusTrainer from torch.utils.data import RandomSampler def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> tuple[int, int]: if target_len * 2 < cutoff_len: # truncate source max_target_len = cutoff_len elif source_len * 2 < cutoff_len: # truncate target max_target_len = cutoff_len - source_len else: # truncate both max_target_len = int(cutoff_len * (target_len / (source_len + target_len))) new_target_len = min(max_target_len , target_len) max_source_len = max(cutoff_len - new_target_len, 0) new_source_len = min(max_source_len, source_len) return new_source_len, new_target_len class SupervisedDataset(Dataset): """Dataset for supervised fine-tuning.""" def __init__( self, data_path, tokenizer, model_max_length, user_tokens=[151644], assistant_tokens=[151645], ): super(SupervisedDataset, self).__init__() self.data = json.load(open(data_path)) self.tokenizer = tokenizer self.model_max_length = model_max_length self.user_tokens = user_tokens self.assistant_tokens = assistant_tokens self.ignore_index = -100 # 测试第一条数据是否正确处理 item = self.preprocessing(self.data[0]) print("input:", self.tokenizer.decode(item["input_ids"])) labels = [id_ for id_ in item["labels"] if id_ != -100] # 过滤 -100 的标签 def __len__(self): return len(self.data) def preprocessing(self, example): input_ids = [] labels = [] # 将用户和助手的内容配对 messages = example["conversations"] pairs = [] current_user_encoded = None # 将 user 和 assistant 配对,并将其打包成编码后的 pairs for message in messages: if message["role"] == "user": # 编码用户消息 current_user_encoded = [self.tokenizer.bos_token_id] + self.user_tokens + self.tokenizer.encode( message["content"], add_special_tokens=False ) elif message["role"] == "assistant" and current_user_encoded is not None: # 编码助手消息 assistant_encoded = self.assistant_tokens + self.tokenizer.encode( message["content"], add_special_tokens=False ) # 配对形成一个 (source_ids, target_ids) pairs.append((current_user_encoded, assistant_encoded)) current_user_encoded = None total_length = 0 # 初始化总长度 # 逐对处理编码后的 (source_ids, target_ids) for turn_idx, (source_ids, target_ids) in enumerate(pairs): # 检查是否超出最大长度,若超出则停止处理 if total_length >= self.model_max_length: print("Exceeded max length, stopping processing further turns.") break # 动态截断长度 source_len, target_len = infer_seqlen( len(source_ids), len(target_ids), self.model_max_length - total_length ) source_ids = source_ids[:source_len] target_ids = target_ids[:target_len] # 更新总长度 total_length += source_len + target_len source_label = [self.tokenizer.bos_token_id] + [self.ignore_index] * (source_len-1) target_label = target_ids # 数据拼接 input_ids += source_ids + target_ids labels += source_label + target_label # 添加 EOS 标记 input_ids += [self.tokenizer.eos_token_id] labels += [self.tokenizer.eos_token_id] input_ids += [self.tokenizer.pad_token_id] * ( self.model_max_length - len(input_ids) ) labels += [self.ignore_index] * (self.model_max_length - len(labels)) # 转换为 Tensor input_ids = torch.LongTensor(input_ids) labels = torch.LongTensor(labels) # 构造 attention_mask attention_mask = attention_mask = input_ids.ne(self.tokenizer.pad_token_id) return { "input_ids": input_ids, "labels": labels, "attention_mask": attention_mask, } def __getitem__(self, idx) -> Dict[str, torch.Tensor]: return self.preprocessing(self.data[idx])修改代码,改为读取csv文件,提取csv文件的Molecular Formula、Total Exact Mass、Spectrum以及SELFIES列数据,对Molecular Formula使用formula_to_dense函数获取形状为(batch,18)的输入,对Total Exact Mass和Spectrum列使用prepro_specs_train和encoding函数获取形状为(batch,501,257)的输入,然后SELFIES使用tokenrizer编码,使用cls_token和sep_token作为开始和结束标记,填充标记为pad_token
最新发布
07-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值