Items not exists bom list

-- 没有加入BOM的物料

select
    msi.segment1 item_no,
    msi.description,
    msi.primary_uom_code uom,
    --msi.item_type,
    (select meaning from FND_LOOKUP_VALUES_VL where 1=1 AND lookup_type = 'ITEM_TYPE' and lookup_code=msi.item_type) item_type,
    nvl((SELECT SUM (moq.primary_transaction_quantity) on_hand
        FROM mtl_onhand_quantities_detail moq
        WHERE 1 = 1 AND moq.inventory_item_id = msi.inventory_item_id AND moq.organization_id = msi.organization_id), 0) on_hand_qty
from mtl_system_items_b msi
where 1=1
and msi.eng_item_flag = 'N'
and msi.organization_id = 190
--and msi.inventory_item_id= 15395
--and msi.segment1 = '9SL4000030AFDIF0'
and not exists(
    select  'x'
    --       bom.assembly_item_id,
    --       bic.component_item_id,
    --       bom.organization_id
      from bom_bill_of_materials bom,
           bom_inventory_components bic
     where bom.BILL_SEQUENCE_ID = bic.bill_sequence_id
       and bom.organization_id = msi.organization_id--190
       --and bom.assembly_item_id = 15395
       --and bic.component_item_id = 11898
       and ( bom.assembly_item_id = msi.inventory_item_id or bic.component_item_id = msi.inventory_item_id)
   )

---------------------------------------------------------------------------------------------------------------------------------------------------------

-- bom list
select level,
lst.assembly_item_id,
lst.component_item_id,
lst.organization_id,
lst.farther,
lst.component
  from (select bom.assembly_item_id,
               bic.component_item_id,
               bom.organization_id
               ,(select msi.segment1 from inv.mtl_system_items_b msi where msi.organization_id=123 and  msi.inventory_item_id = bom.assembly_item_id) farther,
               (select msi.segment1 from inv.mtl_system_items_b msi where msi.organization_id=123 and msi.inventory_item_id = bic.component_item_id) component
          from bom_bill_of_materials bom,
               bom_inventory_components bic
         where bom.BILL_SEQUENCE_ID = bic.bill_sequence_id
           and bom.organization_id = 123) lst
start with (lst.assembly_item_id = 15395)--15395)
connect by lst.assembly_item_id = prior lst.component_item_id

import arcpy import pandas as pd import os import glob import datetime import chardet import codecs # 获取工具参数 input_folder = arcpy.GetParameterAsText(0) # 输入文件夹路径 keep_header = arcpy.GetParameter(1) # 是否保留表头(布尔值,默认True) try: # 验证输入文件夹 if not os.path.exists(input_folder): raise ValueError(f"输入文件夹不存在: {input_folder}") # 创建输出文件夹 output_folder = os.path.join(input_folder, "合并结果_UTF8") os.makedirs(output_folder, exist_ok=True) arcpy.AddMessage(f"创建输出文件夹: {output_folder}") # 获取所有支持的文件 all_files = [] for ext in ["*.xlsx", "*.xls", "*.csv", "*.txt", "*.dat"]: all_files.extend(glob.glob(os.path.join(input_folder, ext), recursive=False)) if not all_files: raise ValueError("未找到任何可合并的文件") arcpy.AddMessage(f"找到 {len(all_files)} 个可合并文件") # 按文件类型分组 file_groups = {} for file_path in all_files: ext = os.path.splitext(file_path)[1].lower() if ext not in file_groups: file_groups[ext] = [] file_groups[ext].append(file_path) arcpy.AddMessage(f"发现 {len(file_groups)} 种文件类型需要合并") # 处理每种文件类型 for ext, file_list in file_groups.items(): arcpy.AddMessage(f"\n开始处理 {ext} 文件 ({len(file_list)} 个文件)") # 生成输出文件名 timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") output_file = os.path.join(output_folder, f"合并结果_{ext[1:]}_{timestamp}{ext}") # 存储合并数据 merged_df = None header = None # 处理每个文件 for i, file_path in enumerate(file_list): file_name = os.path.basename(file_path) arcpy.AddMessage(f"处理文件 {i+1}/{len(file_list)}: {file_name}") # 读取文件 if ext in ['.xlsx', '.xls']: df = pd.read_excel(file_path, header=0 if keep_header else None) else: # 自动检测编码 with open(file_path, 'rb') as f: raw_data = f.read(10000) result = chardet.detect(raw_data) detected_encoding = result['encoding'] confidence = result['confidence'] arcpy.AddMessage(f" 检测到原始编码: {detected_encoding} (置信度: {confidence*100:.1f}%)") # 尝试读取并转换为UTF-8 try: # 第一步:用检测到的编码读取 with codecs.open(file_path, 'r', encoding=detected_encoding) as f: content = f.read() # 第二步:转换为UTF-8格式字符串 utf8_content = content.encode('utf-8', 'ignore').decode('utf-8') # 第三步:自动检测分隔符 first_line = utf8_content.split('\n')[0] if '\n' in utf8_content else utf8_content sep = ',' if ',' in first_line else '\t' if '\t' in first_line else ' ' # 第四步:从字符串创建DataFrame from io import StringIO df = pd.read_csv( StringIO(utf8_content), sep=sep, header=0 if keep_header else None, engine='python' ) arcpy.AddMessage(f" 成功转换为UTF-8格式") except Exception as e: arcpy.AddWarning(f" 转换失败: {str(e)}") # 尝试常见编码列表 encodings = ['gbk', 'gb18030', 'big5', 'latin1', 'iso-8859-1', 'cp1252', 'utf-8'] for enc in encodings: try: with codecs.open(file_path, 'r', encoding=enc) as f: content = f.read() utf8_content = content.encode('utf-8', 'ignore').decode('utf-8') first_line = utf8_content.split('\n')[0] if '\n' in utf8_content else utf8_content sep = ',' if ',' in first_line else '\t' if '\t' in first_line else ' ' from io import StringIO df = pd.read_csv( StringIO(utf8_content), sep=sep, header=0 if keep_header else None, engine='python' ) arcpy.AddMessage(f" 使用备用编码 {enc} 成功转换为UTF-8") break except Exception as e2: arcpy.AddWarning(f" 编码 {enc} 转换失败: {str(e2)}") else: raise ValueError(f"无法转换文件 {file_name} 为UTF-8格式") # 处理表头 if keep_header: if i == 0: header = df.columns.tolist() else: if len(df.columns) == len(header): df.columns = header else: arcpy.AddWarning(f"文件 {file_name} 列数不匹配(期望 {len(header)} 列,实际 {len(df.columns)} 列)") # 合并数据 if merged_df is None: merged_df = df else: merged_df = pd.concat([merged_df, df], ignore_index=True) # 保存合并结果(全部使用UTF-8编码) if ext in ['.xlsx', '.xls']: merged_df.to_excel(output_file, index=False, header=bool(keep_header)) arcpy.AddMessage(f"↳ 合并完成! 保存到: {output_file}") else: # 输出为UTF-8 with BOM格式 merged_df.to_csv(output_file, sep=',', index=False, header=bool(keep_header), encoding='utf-8-sig') arcpy.AddMessage(f"↳ 合并完成! 保存到: {output_file} (UTF-8 with BOM编码)") arcpy.AddMessage(f" 总行数: {len(merged_df)}, 总列数: {len(merged_df.columns)}") arcpy.AddMessage("\n所有文件类型合并完成!") arcpy.AddMessage(f"合并结果保存在: {output_folder} (全部转换为UTF-8格式)") except Exception as e: arcpy.AddError(f"处理失败: {str(e)}") import traceback arcpy.AddError(traceback.format_exc())
07-10
import requests import json import time import random import os import concurrent.futures from datetime import datetime from urllib.parse import urlencode import hashlib # 配置文件 CONFIG = { "user_agents": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" ], "max_workers": 5, # 并发爬取的工作线程数 "timeout": 15, # 请求超时时间(秒) "output_dir": "data/baidu_hotsearch/", # 数据存储目录 "cache_dir": "cache/", # 缓存目录 "categories": [ {"name": "实时热点", "tab": "realtime", "priority": 1}, {"name": "文娱榜", "tab": "entertainment", "priority": 2}, {"name": "小说榜", "tab": "novel", "priority": 3}, {"name": "电影榜", "tab": "movie", "priority": 4}, {"name": "游戏榜", "tab": "game", "priority": 5}, {"name": "汽车榜", "tab": "car", "priority": 6}, {"name": "电视剧榜", "tab": "teleplay", "priority": 7}, {"name": "纪录片榜", "tab": "documentary", "priority": 8}, ] } # 创建目录 os.makedirs(CONFIG["output_dir"], exist_ok=True) os.makedirs(CONFIG["cache_dir"], exist_ok=True) def get_random_headers(): """生成随机请求头""" return { 'User-Agent': random.choice(CONFIG["user_agents"]), 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Connection': 'keep-alive', 'Referer': 'https://top.baidu.com/board', 'Pragma': 'no-cache', 'X-Requested-With': 'XMLHttpRequest' } def get_cache_key(category_tab): """生成缓存key""" return hashlib.md5(f"baidu_hotsearch_{category_tab}".encode()).hexdigest() def is_cache_valid(cache_path, max_age=1800): """检查缓存是否有效(30分钟内)""" if not os.path.exists(cache_path): return False cache_mod_time = os.path.getmtime(cache_path) return (time.time() - cache_mod_time) < max_age def fetch_baidu_category(category, use_cache=True): """ 爬取指定类别的百度热搜榜数据 :param category: 类别字典 {name:, tab:, priority:} :param use_cache: 是否使用缓存 :return: 解析后的热搜列表 """ cache_path = os.path.join(CONFIG["cache_dir"], f"{get_cache_key(category['tab'])}.json") # 检查缓存 if use_cache and is_cache_valid(cache_path): try: with open(cache_path, 'r', encoding='utf-8') as f: return json.load(f) except Exception: pass # 缓存读取失败,继续爬取 # 构建API URL api_url = f"https://top.baidu.com/api/board?platform=wise&tab={category['tab']}" try: # 添加随机延时 time.sleep(random.uniform(0.5, 1.5)) # 发起API请求 response = requests.get( api_url, headers=get_random_headers(), timeout=CONFIG["timeout"] ) response.raise_for_status() # 检查HTTP状态码 data = response.json() # 验证API返回的数据结构 if not isinstance(data, dict) or not data.get('data') or not data['data'].get('cards'): raise ValueError("API返回数据结构异常") hotsearch_list = [] # 遍历所有卡片数据 for card in data['data']['cards']: # 跳过无效卡片 if not card.get('content'): continue # 处理卡片内的热搜项目 for item in card['content']: # 不同类型的处理 if item.get('word'): # 标准热搜项 hot_item = { 'title': item['word'], 'heat': item.get('hotScore', '0'), 'hot_change': item.get('hotChange', ''), 'image_url': item.get('img', ''), 'label': item.get('label', ''), 'desc': item.get('desc', ''), 'category': category['name'], 'category_tab': category['tab'], 'source': '百度热搜' } # 构建URL if item.get('url'): hot_item['url'] = item['url'] else: query = item.get('query') or item.get('word') if query: encoded_query = urlencode({'wd': query}) hot_item['url'] = f"https://www.baidu.com/s?{encoded_query}" hotsearch_list.append(hot_item) elif item.get('query'): # 查询类热搜 hotsearch_list.append({ 'title': item['query'], 'heat': item.get('hotScore', '0'), 'hot_change': item.get('hotChange', ''), 'image_url': item.get('img', ''), 'desc': item.get('desc', ''), 'category': category['name'], 'category_tab': category['tab'], 'source': '百度热搜', 'url': f"https://www.baidu.com/s?{urlencode({'wd': item['query']})}" }) # 按热度排序并重新分配排名 hotsearch_list.sort(key=lambda x: int(x['heat']), reverse=True) for rank, item in enumerate(hotsearch_list, 1): item['rank'] = rank # 更新缓存 try: with open(cache_path, 'w', encoding='utf-8') as f: json.dump(hotsearch_list, f, ensure_ascii=False, indent=2) except Exception as e: print(f"缓存更新失败: {str(e)}") return hotsearch_list except Exception as e: print(f"爬取分类'{category['name']}'失败: {str(e)}") return [] def parallel_fetch_all_categories(): """并行爬取所有分类的热搜数据""" all_results = {} with concurrent.futures.ThreadPoolExecutor(max_workers=CONFIG['max_workers']) as executor: # 提交所有分类的爬取任务 future_to_category = { executor.submit(fetch_baidu_category, category): category['tab'] for category in CONFIG['categories'] } # 收集结果 for future in concurrent.futures.as_completed(future_to_category): category_tab = future_to_category[future] try: result = future.result() if result: # 按优先级排序标识 priority = next((c['priority'] for c in CONFIG['categories'] if c['tab'] == category_tab), 99) all_results[category_tab] = { 'priority': priority, 'category': category_tab, 'data': result } print(f"成功爬取分类 '{category_tab}': 获取 {len(result)} 条热搜") except Exception as e: print(f"分类 '{category_tab}' 爬取失败: {str(e)}") return all_results def save_results(all_results): """保存所有分类的热搜数据""" # 1. 分别保存每个分类的数据 for tab, category_data in all_results.items(): filename = f"{tab}_hotsearch_{datetime.now().strftime('%Y%m%d_%H%M')}.json" filepath = os.path.join(CONFIG["output_dir"], filename) # 添加元数据 result = { 'meta': { 'source': '百度热搜', 'category': next(c['name'] for c in CONFIG['categories'] if c['tab'] == tab), 'tab': tab, 'timestamp': datetime.now().isoformat(), 'count': len(category_data['data']) }, 'data': category_data['data'] } with open(filepath, 'w', encoding='utf-8') as f: json.dump(result, f, ensure_ascii=False, indent=2) print(f"已保存分类数据到 {filepath}") # 2. 保存合并的热搜总榜(按热度排序) all_hot_items = [] for tab, category_data in all_results.items(): all_hot_items.extend(category_data['data']) # 按热度降序排序 all_hot_items.sort(key=lambda x: int(x['heat']), reverse=True) # 重新分配全局排名 for rank, item in enumerate(all_hot_items, 1): item['global_rank'] = rank # 保存总榜数据 combined_filename = os.path.join( CONFIG["output_dir"], f"combined_hotsearch_{datetime.now().strftime('%Y%m%d_%H%M')}.json" ) combined_result = { 'meta': { 'source': '百度热搜', 'timestamp': datetime.now().isoformat(), 'categories': [tab for tab in all_results.keys()], 'total_count': len(all_hot_items) }, 'data': all_hot_items } with open(combined_filename, 'w', encoding='utf-8') as f: json.dump(combined_result, f, ensure_ascii=False, indent=2) print(f"已保存总榜数据到 {combined_filename}") def print_top_items(all_results, top_n=5): """打印各分类的热搜TOP N""" print("\n百度热搜榜各分类TOP {}:".format(top_n)) print("{:<8} {:<5} {:<35} {:<10} {:<10}".format( "分类", "排名", "标题", "热度", "变化趋势" )) print("-" * 80) # 按分类优先级排序 sorted_results = sorted(all_results.items(), key=lambda x: x[1]['priority']) for tab, category_data in sorted_results: data = category_data['data'][:top_n] category_name = next(c['name'] for c in CONFIG['categories'] if c['tab'] == tab) print(f"[{category_name}]") for item in data: title = item['title'][:32] + "..." if len(item['title']) > 32 else item['title'] heat = format(int(item['heat']), ",") # 千位分隔符 change = item.get('hot_change', '') or item.get('hotChange', '') print("{:<8} {:<5} {:<35} {:<10} {:<10}".format( "", item['rank'], title, heat, change )) print("") def main(): """主函数""" print("开始爬取百度热搜多个分类数据...") start_time = time.time() # 并行爬取所有分类 all_results = parallel_fetch_all_categories() if not all_results: print("所有分类爬取失败") return # 打印TOP榜单 print_top_items(all_results) # 保存数据 save_results(all_results) elapsed = time.time() - start_time print(f"爬取完成! 共处理 {len(all_results)} 个分类, 耗时: {elapsed:.2f}秒") if __name__ == "__main__": main()这个代码爬出来的用csv保存
06-09
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值