将图纸中所有的块的名称改为大写

本文介绍了一种使用LISP程序批量修改AutoCAD中特定图块名称的方法。通过编写脚本实现图块名称的批量替换,提高工作效率。适用于需要大量编辑CAD文件的工作场景。
;;By LUCAS
(defun C:KDX (/ BLOCKS NAME)
  (setq BLOCKS (vla-get-blocks
   (vla-get-activedocument (vlax-get-acad-object))
        )
  )
  (vlax-for BLK BLOCKS
    (if (not (wcmatch (setq NAME (strcase (vla-get-name BLK)))
        "*'**,*|*,GTJH,DUANT,ACFGW,HHK"
      )
 )
      (vla-put-name BLK NAME)
    )
  )
  (princ)
)
import pdfplumber import pandas as pd import os import re def extract_materials_from_pdf(pdf_path, filename): """ 从PDF文件中提取材料信息 - 专门针对ISO图纸格式 """ materials = [] try: with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: text = page.extract_text() if text: materials.extend(parse_iso_materials(text, filename)) return materials except Exception as e: print(f"处理文件 {filename} 时出错: {e}") return [] def parse_iso_materials(text, filename): """ 专门解析ISO图纸的材料表格 """ materials = [] lines = text.split('\n') current_category = "" pipeline_number = get_pipeline_number(text) # 查找材料表格的开始 table_start = -1 for i, line in enumerate(lines): if re.search(r'PT\s*NO|COMPONENT\s*DESCRIPTION|N\.S\.\(MM\)|IDENT\s*CODE|QTY', line, re.IGNORECASE): table_start = i break if table_start == -1: print(f" 在文件 {filename} 中未找到材料表格") return materials print(f" 找到材料表格,开始位置: {table_start}") # 从表格开始位置解析 i = table_start while i < len(lines): line = lines[i].strip() # 检测类别行 if is_category_line(line): current_category = get_category(line) print(f" 发现类别: {current_category}") i += 1 continue # 跳过空行 if not line: i += 1 continue # 检查是否是切割长度表或其他非材料行 if re.search(r'CUT LENGTH TABLE|SPOOL N|ɍɁɅȺ|LENGTH\(MM\)|ȾɅɂɇȺ\(MM\)', line, re.IGNORECASE): print(f" 遇到切割长度表,停止解析") break # 检查是否是图纸信息行 if re.search(r'ISSUED FOR|N-PLANT NORTH|E \d+|N \d+|EL\. \+\d+', line, re.IGNORECASE): i += 1 continue # 尝试解析材料行 material = parse_material_line(lines, i, current_category) if material and material['材料代码'] and material['材料介绍']: material['单线图文件'] = filename material['管线号'] = pipeline_number materials.append(material) print(f" 提取材料: {material['材料代码']} - 规格: {material['材料规格']} - 数量: {material['数量']}") # 跳过已处理的行 i += material.get('lines_processed', 1) else: i += 1 # 如果遇到明显的表格结束标记,提前退出 if i < len(lines) and re.search(r'CUT LENGTH TABLE|CONT\. ON|CONT\. FROM', lines[i], re.IGNORECASE): print(f" 遇到表格结束标记,停止解析") break return materials def parse_material_line(lines, start_index, category): """ 解析材料行,特别处理管道支架和螺栓 """ line = lines[start_index].strip() # 检查是否是以数字开头的行(材料行) if not re.match(r'^\d+\s', line): return None # 提取项目编号 item_no_match = re.match(r'^(\d+)\s+', line) if not item_no_match: return None item_no = item_no_match.group(1) remaining_line = line[len(item_no):].strip() # 特殊处理管道支架 if category == "PIPE SUPPORTS": return parse_pipe_support_line(lines, start_index, category) # 特殊处理螺栓 if category == "BOLTS": return parse_bolt_line(lines, start_index, category) # 特殊处理垫片(可能有多行描述) if category == "GASKETS": return parse_gasket_line(lines, start_index, category) # 常规材料解析 # 尝试提取数量(通常是行尾的数字,可能带有M) qty_match = re.search(r'(\d+\.?\d*\s*M?)$', remaining_line) if not qty_match: # 尝试从下一行获取数量 if start_index + 1 < len(lines): next_line = lines[start_index + 1].strip() qty_match = re.search(r'(\d+\.?\d*\s*M?)$', next_line) if not qty_match: return None quantity = qty_match.group(1) # 从行中移除数量部分 remaining_line = re.sub(r'\s*' + re.escape(quantity) + r'$', '', remaining_line) # 尝试提取材料代码(通常是大写字母、数字、下划线和连字符的组合) code_match = re.search(r'([A-Z0-9][A-Z0-9_\-]{5,})\s*$', remaining_line) if not code_match: # 尝试从下一行获取代码 if start_index + 1 < len(lines): next_line = lines[start_index + 1].strip() code_match = re.search(r'([A-Z0-9][A-Z0-9_\-]{5,})\s*$', next_line) if not code_match: # 对于没有明确代码的材料,检查是否有有效的描述 if not re.search(r'[A-Za-z]', remaining_line): return None # 使用描述的一部分作为代码 ident_code = f"MAT-{item_no}-{re.sub(r'[^A-Z0-9]', '', remaining_line.upper())[:10]}" else: ident_code = code_match.group(1) # 从行中移除代码部分 remaining_line = re.sub(r'\s*' + re.escape(ident_code) + r'\s*$', '', remaining_line) # 尝试提取规格(通常是数字或数字x数字格式) size_match = re.search(r'(\d+(?:\s?[x×]\s?\d+)?)\s*(?:MM|mm|")?$', remaining_line) if size_match: size = size_match.group(1) # 从行中移除规格部分 remaining_line = re.sub(r'\s*' + re.escape(size) + r'\s*(?:MM|mm|")?\s*$', '', remaining_line) else: size = "" # 剩余部分就是描述 description = remaining_line.strip() # 检查描述是否有效 if not re.search(r'[A-Za-z]', description): return None # 清理描述中的多余空格 description = re.sub(r'\s+', ' ', description).strip() return { '材料代码': ident_code, '材料规格': size, '材料介绍': f"{category}: {description}" if category else description, '数量': quantity, 'lines_processed': 1 } def parse_pipe_support_line(lines, start_index, category): """ 专门解析管道支架行 """ # 管道支架的格式通常是: 项目编号 描述 规格 数量 # 例如: "6 J(S1)-8"-CS1-100 200 4" line = lines[start_index].strip() # 提取项目编号 item_no_match = re.match(r'^(\d+)\s+', line) if not item_no_match: return None item_no = item_no_match.group(1) remaining_line = line[len(item_no):].strip() # 尝试提取数量(通常是行尾的数字) qty_match = re.search(r'(\d+)$', remaining_line) if not qty_match: return None quantity = qty_match.group(1) # 从行中移除数量部分 remaining_line = re.sub(r'\s*' + re.escape(quantity) + r'$', '', remaining_line) # 尝试提取规格(通常是数字) size_match = re.search(r'(\d+)\s*(?:MM|mm|")?$', remaining_line) if size_match: size = size_match.group(1) # 从行中移除规格部分 remaining_line = re.sub(r'\s*' + re.escape(size) + r'\s*(?:MM|mm|")?\s*$', '', remaining_line) else: size = "" # 剩余部分就是描述 description = remaining_line.strip() # 检查描述是否有效 if not re.search(r'[A-Za-z]', description): return None # 生成一个代码(使用描述的前几个字符和项目编号) clean_desc = re.sub(r'[^A-Z0-9]', '', description.upper()) ident_code = f"PS-{item_no}-{clean_desc[:10]}" if clean_desc else f"PS-{item_no}" return { '材料代码': ident_code, '材料规格': size, '材料介绍': f"{category}: {description}", '数量': quantity, 'lines_processed': 1 } def parse_bolt_line(lines, start_index, category): """ 专门解析螺栓行 """ # 螺栓的格式通常是多行的,例如: # "15 95 mm BOLTS/NUTS,-,A320 Gr.L7/A194 GR.7 S3,FT" # "S.BOLT/2 HHN,ASME B18.31.2" # "5/8 PLLS60NZZ-95 4" # 首先尝试提取项目编号 line = lines[start_index].strip() item_no_match = re.match(r'^(\d+)\s+', line) if not item_no_match: return None item_no = item_no_match.group(1) remaining_line = line[len(item_no):].strip() # 合并多行描述 description_lines = [remaining_line] lines_processed = 1 # 检查下一行是否也是描述的一部分 while start_index + lines_processed < len(lines): next_line = lines[start_index + lines_processed].strip() # 如果下一行以数字开头(可能是下一个项目),则停止 if re.match(r'^\d+\s', next_line): break # 如果下一行包含明显的数量或代码模式,则停止 if re.search(r'\d+$|PLLS\d+', next_line): break description_lines.append(next_line) lines_processed += 1 # 现在处理包含数量、规格和代码的行 if start_index + lines_processed >= len(lines): return None data_line = lines[start_index + lines_processed].strip() lines_processed += 1 # 尝试提取数量 qty_match = re.search(r'(\d+)$', data_line) if not qty_match: return None quantity = qty_match.group(1) # 从行中移除数量部分 data_line = re.sub(r'\s*' + re.escape(quantity) + r'$', '', data_line) # 尝试提取代码 code_match = re.search(r'([A-Z0-9][A-Z0-9_\-]{5,})\s*$', data_line) if not code_match: # 如果没有明确代码,生成一个 ident_code = f"BOLT-{item_no}" else: ident_code = code_match.group(1) # 从行中移除代码部分 data_line = re.sub(r'\s*' + re.escape(ident_code) + r'\s*$', '', data_line) # 剩余部分可能是规格 size = data_line.strip() # 合并描述 description = ' '.join(description_lines).strip() # 检查描述是否有效 if not re.search(r'[A-Za-z]', description): return None return { '材料代码': ident_code, '材料规格': size, '材料介绍': f"{category}: {description}", '数量': quantity, 'lines_processed': lines_processed } def parse_gasket_line(lines, start_index, category): """ 专门解析垫片行(可能有多行描述) """ line = lines[start_index].strip() # 提取项目编号 item_no_match = re.match(r'^(\d+)\s+', line) if not item_no_match: return None item_no = item_no_match.group(1) remaining_line = line[len(item_no):].strip() # 尝试提取数量 qty_match = re.search(r'(\d+)$', remaining_line) if not qty_match: return None quantity = qty_match.group(1) # 从行中移除数量部分 remaining_line = re.sub(r'\s*' + re.escape(quantity) + r'$', '', remaining_line) # 尝试提取代码 code_match = re.search(r'([A-Z0-9][A-Z0-9_\-]{5,})\s*$', remaining_line) if not code_match: return None ident_code = code_match.group(1) # 从行中移除代码部分 remaining_line = re.sub(r'\s*' + re.escape(ident_code) + r'\s*$', '', remaining_line) # 尝试提取规格 size_match = re.search(r'(\d+)\s*(?:MM|mm|")?$', remaining_line) if size_match: size = size_match.group(1) # 从行中移除规格部分 remaining_line = re.sub(r'\s*' + re.escape(size) + r'\s*(?:MM|mm|")?\s*$', '', remaining_line) else: size = "" # 剩余部分就是描述 description = remaining_line.strip() # 检查下一行是否也是描述的一部分 if start_index + 1 < len(lines): next_line = lines[start_index + 1].strip() if not re.match(r'^\d+\s', next_line) and not is_category_line(next_line): description += " " + next_line # 检查描述是否有效 if not re.search(r'[A-Za-z]', description): return None return { '材料代码': ident_code, '材料规格': size, '材料介绍': f"{category}: {description}", '数量': quantity, 'lines_processed': 1 } def is_category_line(line): """ 判断是否是类别行 """ categories = ['PIPES', 'FITTINGS', 'FLANGES', 'GASKETS', 'BOLTS', 'VALVES', 'INSTRUMENTS', 'PIPE SUPPORTS', 'VALVES / IN-LINE ITEMS'] return any(cat in line.upper() for cat in categories) def get_category(line): """ 从行中提取类别 """ categories = ['PIPES', 'FITTINGS', 'FLANGES', 'GASKETS', 'BOLTS', 'VALVES', 'INSTRUMENTS', 'PIPE SUPPORTS', 'VALVES / IN-LINE ITEMS'] for cat in categories: if cat in line.upper(): return cat return "" def get_pipeline_number(text): """ 从文本中提取管线号 """ # 首先尝试查找明确的管线号模式 patterns = [ r'9170-\d{4}-[A-Z0-9]+-[A-Z]{2,4}', r'9170-\d{4}-[A-Z0-9]+', r'CA911[EF]', r'91-E-\d+-T\d+', ] for pattern in patterns: matches = re.findall(pattern, text) if matches: # 返回最长的匹配项(通常更完整) return max(matches, key=len) # 如果找不到明确模式,尝试从文件提取 filename_match = re.search(r'9170[^\s]*-(?:PRP|LN)', text) if filename_match: return filename_match.group(0) return "未识别" def batch_process_pdfs(folder_path): """ 批量处理文件夹中的所有PDF文件 """ all_materials = [] pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')] if not pdf_files: print("文件夹中没有找到PDF文件") return None print(f"找到 {len(pdf_files)} 个PDF文件,开始处理...") for pdf_file in pdf_files: pdf_path = os.path.join(folder_path, pdf_file) print(f"\n处理文件: {pdf_file}") materials = extract_materials_from_pdf(pdf_path, pdf_file) if materials: print(f" 提取到 {len(materials)} 条材料记录") for j, material in enumerate(materials[:min(3, len(materials))]): print(f" {j+1}. {material['材料代码']}: {material['材料介绍']} - {material['数量']}") else: print(f" 未提取到材料记录") all_materials.extend(materials) return all_materials def create_excel(materials_data, output_path): """ 创建Excel文件 """ if not materials_data: print("没有提取到材料数据") return False df = pd.DataFrame(materials_data) # 重新排列列顺序 columns = ['单线图文件', '材料代码', '材料规格', '材料介绍', '数量', '管线号'] df = df[columns] # 去重(保留第一条记录) df = df.drop_duplicates(subset=['单线图文件', '材料代码', '数量'], keep='first') # 确保所有材料介绍都有类别前缀 def ensure_category_prefix(description): if not re.match(r'^[A-Z]+:', description): # 如果没有类别前缀,尝试从描述中推断类别 if 'PIPE' in description.upper() and 'SUPPORT' not in description.upper(): return f"PIPES: {description}" elif 'SUPPORT' in description.upper() or description.startswith('J('): return f"PIPE SUPPORTS: {description}" elif 'FITTING' in description.upper() or 'ELBOW' in description.upper() or 'TEE' in description.upper(): return f"FITTINGS: {description}" elif 'FLANGE' in description.upper(): return f"FLANGES: {description}" elif 'GASKET' in description.upper(): return f"GASKETS: {description}" elif 'BOLT' in description.upper() or 'NUT' in description.upper(): return f"BOLTS: {description}" elif 'VALVE' in description.upper(): return f"VALVES: {description}" elif 'INSTRUMENT' in description.upper(): return f"INSTRUMENTS: {description}" else: return description return description df['材料介绍'] = df['材料介绍'].apply(ensure_category_prefix) df.to_excel(output_path, index=False, engine='openpyxl') return True def main(): # 设置文件夹路径 pdf_folder = r"C:\Users\10196\Desktop\PDF文件夹" output_excel = r"C:\Users\10196\Desktop\材料清单汇总_精确版.xlsx" print("开始处理PDF文件...") materials_data = batch_process_pdfs(pdf_folder) if materials_data: if create_excel(materials_data, output_excel): print(f"\n✅ Excel文件已生成: {output_excel}") print(f"✅ 共提取 {len(materials_data)} 条材料记录") # 显示统计信息 files = set(m['单线图文件'] for m in materials_data) print(f"✅ 处理了 {len(files)} 个文件") # 显示材料类别统计 categories = {} for m in materials_data: cat = m['材料介绍'].split(':')[0] if ':' in m['材料介绍'] else '其他' categories[cat] = categories.get(cat, 0) + 1 print("✅ 材料类别统计:") for cat, count in categories.items(): print(f" {cat}: {count}条") else: print("❌ 生成Excel文件失败") else: print("❌ 没有提取到任何材料数据") if __name__ == "__main__": main()这个代码提取pdf里面的支架资料老是提取不全该怎么办
最新发布
08-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值