php parse_url,pathinfo,basename函数

 

 

import os import json import requests from flask import Flask, jsonify, request, render_template from datetime import datetime import logging import glob import time import argparse import re app = Flask(__name__) # 配置日志 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # 定义基础路径 BASE_PATH = r"C:\Users\l30078648\Desktop\250730" DOWNLOAD_PATH = os.path.join(BASE_PATH, "downloads") # S3 OBS API配置 S3_OBS_API_URL = "https://adsci.api.ias.huawei.com/icollect/get_s3_obs_file_download_url/" S3_OBS_HEADERS = { "Accept-Language": "zh-CN", "Content-Type": "application/json" } USER_ID = "h00512356" # 芯片型号到文件后缀的映射 CHIP_FILENAME_MAP = { "BS9SX1A": "BS9SX1AA", # 添加额外的'A'后缀 "Ascend610Lite": "Ascend610Lite" } # 定义芯片和模型组路径映射 TRACK_PATHS = { "Ascend610Lite": { "rl_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "rl_nn"), "rsc_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "rsc_nn"), "prediction_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "prediction_nn") }, "BS9SX1A": { "rl_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "rl_nn"), "rsc_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "rsc_nn"), "prediction_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "prediction_nn") } } JSON_PATHS = { "Ascend610Lite": { "rl_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "rl_nn"), "rsc_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "rsc_nn"), "prediction_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "prediction_nn") }, "BS9SX1A": { "rl_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "rl_nn"), "rsc_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "rsc_nn"), "prediction_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "prediction_nn") } } def send_request(method, url, headers=None, data=None, timeout=30, max_try_times=1, **kwargs): """发送HTTP请求(带重试机制)""" try_times = 0 response = None if data is None: data = {} if 'files' in kwargs: request_body = data else: request_body = json.dumps(data) if not isinstance(data, str) else data.encode("utf-8") while try_times < max_try_times: try: response = requests.request( method=method, url=url, headers=headers, data=request_body, timeout=timeout, verify=False, **kwargs ) if response.status_code in (200, 201): break except requests.exceptions.Timeout: logger.warning(f"请求超时: url={url}") except Exception as e: logger.exception(f"请求失败: url={url}, 错误: {str(e)}") if response: logger.error(f"响应内容: {response.text}") try_times += 1 logger.warning(f"请求失败, url={url}, 重试中 ({try_times}/{max_try_times})") if max_try_times > 1: time.sleep(5) return response def extract_build_ids_from_track(chip, group): """从.track文件夹中提取build_id和完整模型名称数据,并生成OBS链接""" build_data = {} build_missing = [] if chip not in TRACK_PATHS or group not in TRACK_PATHS[chip]: logger.error(f"无效路径: {chip}/{group}") return build_data, build_missing group_path = TRACK_PATHS[chip][group] if not os.path.exists(group_path): logger.error(f"原始路径不存在: {group_path}") return build_data, build_missing # 扫描所有JSON文件 json_files = glob.glob(os.path.join(group_path, "*.json")) logger.info(f"在路径 {group_path} 中找到 {len(json_files)} 个JSON文件") for json_file in json_files: try: with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # 获取完整模型名称(带后缀) model_full_name = os.path.splitext(os.path.basename(json_file))[0] # 提取build_id build_id = data.get('build_id') # 处理不同格式的ID if isinstance(build_id, int): build_id = str(build_id) # 生成OBS链接 obs_url = None if build_id: obs_url = f"https://adsci.ias.huawei.com/icollect/show_obs_objects/?s3_obs_url=hpct/nn_model_prebuild/{build_id}_PREBUILD_OUTPUTS" # 存储build_id、模型全名和OBS链接 build_data[model_full_name] = { "build_id": build_id, "model_full_name": model_full_name, "obs_url": obs_url # 新增OBS链接字段 } if build_id: logger.debug(f"提取成功: {model_full_name} -> build_id: {build_id}, OBS链接: {obs_url}") else: logger.warning(f"文件 {json_file} 中没有找到build_id字段") build_missing.append(model_full_name) except Exception as e: logger.error(f"解析原始文件 {json_file} 时出错: {str(e)}") build_missing.append(os.path.basename(json_file)) continue logger.info(f"成功提取 {len(build_data) - len(build_missing)} 个build_id") return build_data, build_missing def get_chip_filename(chip): """获取芯片对应的正确文件名""" # 使用映射表获取正确的文件名后缀 return CHIP_FILENAME_MAP.get(chip, f"{chip}") def download_file_with_build_id(model_full_name, chip, group, build_id): """使用build_id下载JSON文件""" if not build_id: logger.warning(f"跳过缺少build_id的文件: {model_full_name}") return None, None # 获取芯片对应的正确文件名 chip_filename = get_chip_filename(chip) # 构建S3 OBS路径 s3_obs_url = f"hpct/nn_model_prebuild/{build_id}_PREBUILD_OUTPUTS/profiling_summary/" file_name = f"{chip_filename}_prof_record.json" # 获取下载URL download_url, error = get_download_url(build_id, s3_obs_url, file_name) if not download_url: logger.error(f"获取下载URL失败: {error}") return None, None # 下载并保存文件 save_path, file_data = download_and_save(download_url, model_full_name, chip, group, file_name) return save_path, file_data def get_download_url(build_id, s3_obs_url, file_name): """获取S3 OBS下载URL""" payload = { "user_id": USER_ID, "s3_obs_url": s3_obs_url, "file_name": file_name } logger.info(f"请求下载URL: {payload}") response = send_request( "POST", S3_OBS_API_URL, headers=S3_OBS_HEADERS, data=payload ) if not response or response.status_code != 200: logger.error(f"获取下载URL失败: 状态码={getattr(response, 'status_code', '无响应')}") return None, "下载URL获取失败" try: result = response.json() download_url = result.get("download_url") if not download_url: logger.error(f"响应中缺少download_url字段: {result}") return None, "响应中缺少download_url字段" # 修复URL端口问题:确保使用10001端口 fixed_url = fix_download_url_port(download_url) return fixed_url, None except Exception as e: logger.error(f"解析响应失败: {str(e)}") return None, f"解析响应失败: {str(e)}" def fix_download_url_port(url): """修正下载URL的端口号""" # 使用正则表达式匹配并替换端口号 pattern = r"(\w+\.huawei\.com):\d+" replacement = r"\1:10001" fixed_url = re.sub(pattern, replacement, url) if fixed_url != url: logger.info(f"URL端口已修正: {url} -> {fixed_url}") return fixed_url def download_and_save(download_url, model_full_name, chip, group, file_name): """下载文件并保存到本地""" logger.info(f"下载文件: {download_url}") file_response = send_request("GET", download_url, timeout=60) if not file_response or file_response.status_code != 200: logger.error(f"文件下载失败: 状态码={getattr(file_response, 'status_code', '无响应')}") return None, None # 保存文件到本地 save_dir = JSON_PATHS[chip][group] os.makedirs(save_dir, exist_ok=True) save_file_name = f"{model_full_name}_{file_name}" save_path = os.path.join(save_dir, save_file_name) try: with open(save_path, 'wb') as f: f.write(file_response.content) logger.info(f"文件保存成功: {save_path}") # 尝试解析文件内容 try: with open(save_path, 'r', encoding='utf-8') as f: file_data = json.load(f) return save_path, file_data except Exception as e: logger.error(f"解析下载的JSON文件失败: {save_path}, 错误: {str(e)}") return save_path, None except Exception as e: logger.error(f"保存文件失败: {save_path}, 错误: {str(e)}") return None, None def download_files_with_build_ids(build_id_map, chip, group): """使用提取的build_id下载所有文件""" downloaded_files = [] download_errors = [] for model_full_name, build_info in build_id_map.items(): build_id = build_info["build_id"] if not build_id: logger.warning(f"跳过缺少build_id的文件: {model_full_name}") downloaded_files.append({ "model": model_full_name, "status": "skipped", "reason": "missing_build_id" }) continue logger.info(f"下载文件: {model_full_name} (build_id={build_id})") save_path, file_data = download_file_with_build_id(model_full_name, chip, group, build_id) if save_path: if file_data: status = "success" else: status = "partial_success" download_errors.append({ "model": model_full_name, "error": "文件解析失败" }) downloaded_files.append({ "model": model_full_name, "status": status }) else: logger.warning(f"下载失败: {model_full_name}") download_errors.append({ "model": model_full_name, "error": "下载失败" }) downloaded_files.append({ "model": model_full_name, "status": "failed" }) return downloaded_files, download_errors def extract_om_and_mean_ddr(data): """ 从JSON数据中提取.om后缀键的值和mean_ddr值 :param data: JSON数据字典 :return: 元组 (om_value, mean_ddr_value, bandwidth_value) """ om_value = None mean_ddr = None bandwidth = None # 查找所有以.om结尾的键并提取第一个值 om_keys = [key for key in data.keys() if key.endswith('.om')] if om_keys: om_value = data[om_keys[0]] # 查找mean_ddr值(直接查找或嵌套查找) if 'mean_ddr' in data: mean_ddr = data['mean_ddr'] else: # 在嵌套结构中查找mean_ddr for value in data.values(): if isinstance(value, dict) and 'mean_ddr' in value: mean_ddr = value['mean_ddr'] break # 计算带宽值 (带宽 = 时延 * mean_ddr * 10 / 1000) if om_value is not None and mean_ddr is not None: try: bandwidth = (float(om_value) * float(mean_ddr) * 10) / 1000 bandwidth = round(bandwidth, 4) # 保留4位小数 except (TypeError, ValueError): logger.error(f"带宽计算失败: om_value={om_value}, mean_ddr={mean_ddr}") return om_value, mean_ddr, bandwidth # 返回三个值 def parse_json_file(file_path, file_data=None): """解析JSON文件,提取性能数据和完整模型名称""" if file_data is None: try: with open(file_path, 'r', encoding='utf-8') as f: file_data = json.load(f) except Exception as e: logger.error(f"读取文件出错 {file_path}: {e}") return None # 从文件名中提取完整模型名称(带后缀) file_name = os.path.basename(file_path) # 文件名格式: "模型全名_芯片_prof_record.json" # 确保与.track文件夹中的名称完全匹配 model_full_name = file_name.rsplit('_', 2)[0] # 去掉最后两个部分 # 提取.om值、mean_ddr值和带宽值 om_value, mean_ddr, bandwidth = extract_om_and_mean_ddr(file_data) # 获取文件元数据 file_size = os.path.getsize(file_path) last_modified = datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat() return { "model_full_name": model_full_name, "file_name": file_name, "file_path": file_path, "file_size": file_size, "last_modified": last_modified, "om_value": om_value, "mean_ddr": mean_ddr, "bandwidth": bandwidth # 新增带宽字段 } def print_all_om_and_mean_ddr(): """打印所有设备/组的所有.om、mean_ddr和带宽值到终端""" print("\n" + "=" * 100) print("JSON性能数据提取报告 - 时延(ms)、DDR和带宽(Mbps)") print("=" * 100) # 获取所有设备目录 devices = [d for d in os.listdir(DOWNLOAD_PATH) if os.path.isdir(os.path.join(DOWNLOAD_PATH, d))] if not devices: print(f"在 {DOWNLOAD_PATH} 中未找到设备目录") return total_files = 0 valid_files = 0 for device in devices: device_path = os.path.join(DOWNLOAD_PATH, device) # 获取设备下的所有组目录 groups = [g for g in os.listdir(device_path) if os.path.isdir(os.path.join(device_path, g))] if not groups: print(f"\n设备 {device} 中没有模型组") continue print(f"\n设备: {device} ({len(groups)}个模型组)") print("-" * 90) for group in groups: group_path = os.path.join(device_path, group) json_files = glob.glob(os.path.join(group_path, "*.json")) if not json_files: print(f" ├── 组 {group}: 没有JSON文件") continue print(f" ├── 组 {group}: {len(json_files)}个模型") # 打印组内所有模型的指标 for json_file in json_files: metrics = parse_json_file(json_file) filename = os.path.basename(json_file) om_str = f"{metrics['om_value']:.4f}" if metrics['om_value'] is not None else "N/A" ddr_str = f"{metrics['mean_ddr']:.4f}" if metrics['mean_ddr'] is not None else "N/A" bw_str = f"{metrics['bandwidth']:.4f}" if metrics['bandwidth'] is not None else "N/A" print(f" │ ├── {metrics['model_full_name']} | 时延: {om_str:<8} | DDR: {ddr_str:<8} | 带宽: {bw_str:<8}") total_files += 1 if metrics['om_value'] is not None and metrics['mean_ddr'] is not None: valid_files += 1 print("\n" + "=" * 100) print(f"扫描完成: 共处理 {total_files} 个文件, 有效数据 {valid_files} 个 ({valid_files/total_files*100:.1f}%)") print("=" * 100 + "\n") def get_performance_data(chip, group, refresh_data=False): """获取性能数据,refresh_data控制是否下载新文件""" performance_data = { "status": "success", "models": [], "timestamp": datetime.now().isoformat(), "chip_type": chip, "group": group, "json_path": JSON_PATHS[chip].get(group, "") if chip in JSON_PATHS else "", "track_path": TRACK_PATHS[chip].get(group, "") if chip in TRACK_PATHS else "", "file_count": 0, "build_id_count": 0, "refresh_performed": refresh_data, "download_errors": [], "downloaded_files": [], "build_missing": [] } # 1. 从.track文件夹提取build_id和模型全名 build_id_map, build_missing = extract_build_ids_from_track(chip, group) performance_data["build_id_count"] = len(build_id_map) - len(build_missing) performance_data["build_missing"] = build_missing # 2. 检查路径有效性 if chip not in JSON_PATHS or group not in JSON_PATHS[chip]: performance_data["status"] = "error" performance_data["error"] = "无效芯片或模型组" return performance_data group_path = JSON_PATHS[chip][group] if not os.path.exists(group_path): performance_data["status"] = "error" performance_data["error"] = "JSON路径不存在" return performance_data # 确保目录存在 os.makedirs(group_path, exist_ok=True) # 3. 刷新时下载所有文件 if refresh_data: logger.info(f"开始刷新下载,共有{len(build_id_map)}个文件需要处理") downloaded_files, download_errors = download_files_with_build_ids(build_id_map, chip, group) performance_data["downloaded_files"] = downloaded_files performance_data["download_errors"] = download_errors performance_data["refresh_status"] = "executed" else: performance_data["refresh_status"] = "skipped" logger.info("跳过文件下载,仅使用本地现有数据") # 4. 处理所有JSON文件并提取指标 json_files = glob.glob(os.path.join(group_path, "*.json")) performance_data["file_count"] = len(json_files) for json_file in json_files: model_data_parsed = parse_json_file(json_file) if model_data_parsed: model_full_name = model_data_parsed["model_full_name"] # 获取对应的build_id和OBS链接 build_info = build_id_map.get(model_full_name, {}) build_id = build_info.get("build_id", "NA") obs_url = build_info.get("obs_url", None) # 构造前端需要的模型数据 model_for_frontend = { "model_full_name": model_full_name, "prebuild_id": build_id, "obs_url": obs_url, # 新增OBS链接 "latency": model_data_parsed["om_value"], # 时延 "mean_ddr": model_data_parsed["mean_ddr"], # DDR值 "bandwidth": model_data_parsed["bandwidth"], # 带宽值 "timestamp": model_data_parsed["last_modified"] } performance_data["models"].append(model_for_frontend) return performance_data @app.route('/api/refresh', methods=['POST']) def refresh_data_api(): """手动触发刷新操作API""" start_time = time.time() try: data = request.json chip = data.get('chip', 'Ascend610Lite') group = data.get('group', 'rl_nn') logger.info(f"手动刷新请求 - 芯片: {chip}, 组: {group}") # 1. 从.track文件夹提取build_id build_id_map, build_missing = extract_build_ids_from_track(chip, group) # 2. 下载所有文件 downloaded_files, download_errors = download_files_with_build_ids(build_id_map, chip, group) # 3. 构建响应 response = { "status": "success", "chip": chip, "group": group, "downloaded_files": downloaded_files, "download_errors": download_errors, "build_missing": build_missing, "process_time": round(time.time() - start_time, 4) } return jsonify(response) except Exception as e: logger.exception("刷新数据时出错") return jsonify({ "status": "error", "error": "服务器内部错误", "details": str(e), "process_time": round(time.time() - start_time, 4) }), 500 @app.route('/api/performance', methods=['GET']) def performance_api(): """性能数据API接口,添加refresh参数控制下载""" start_time = time.time() try: device = request.args.get('device', 'Ascend610Lite') group = request.args.get('type', 'rl_nn') refresh = request.args.get('refresh', 'false').lower() == 'true' logger.info(f"性能API请求 - 设备: {device}, 组: {group}, 刷新: {refresh}") performance_data = get_performance_data(device, group, refresh_data=refresh) process_time = time.time() - start_time response = jsonify({ **performance_data, "process_time": round(process_time, 4) }) response.headers['Cache-Control'] = 'public, max-age=300' return response except Exception as e: logger.exception("处理请求时出错") return jsonify({ "status": "error", "error": "服务器内部错误", "details": str(e), "process_time": round(time.time() - start_time, 4) }), 500 @app.route('/') def home(): """首页路由""" return render_template('index.html') if __name__ == '__main__': # 确保下载目录存在 os.makedirs(DOWNLOAD_PATH, exist_ok=True) # 创建所有JSON子目录 for chip, groups in JSON_PATHS.items(): for group, path in groups.items(): os.makedirs(path, exist_ok=True) # 添加命令行参数解析 parser = argparse.ArgumentParser(description='AI芯片性能监控服务') parser.add_argument('--host', type=str, default='127.0.0.1', help='服务监听地址') parser.add_argument('--port', type=int, default=8080, help='服务监听端口') parser.add_argument('--print', action='store_true', help='启动时打印所有.om和mean_ddr值到终端') args = parser.parse_args() # 启动时打印所有.om和mean_ddr值到终端 if args.print: print_all_om_and_mean_ddr() app.run(host="127.0.0.1", port=8080, debug=True) 检查后端
最新发布
08-15
<think>我们正在讨论PHPpathinfo()函数,特别是使用PATHINFO_EXTENSION参数来获取文件扩展名的用法。根据引用[1],我们知道pathinfo()函数可以以数组形式返回文件路径的信息,包括扩展名、文件名等。同时,引用[2]提到,当路径中包含中文字符时,pathinfo函数可能会出现乱码或解析错误,但我们的问题集中在基本用法上,暂时不涉及中文问题。用户要求提供PATHINFO_EXTENSION参数的具体用法和示例。PATHINFO_EXTENSION是pathinfo函数的第二个可选参数,用于直接获取文件扩展名。基本语法:mixedpathinfo(string$path[,int$options=PATHINFO_DIRNAME|PATHINFO_BASENAME|PATHINFO_EXTENSION|PATHINFO_FILENAME])当使用PATHINFO_EXTENSION作为选项时,函数将返回文件的扩展名(不带点)。示例:假设我们有一个文件路径:"/test/dir/test.txt"我们想要获取它的扩展名"txt"。我们可以这样写:<?php$file_path='/test/dir/test.txt';$extension=pathinfo($file_path,PATHINFO_EXTENSION);echo$extension;//输出:txt?>注意:如果文件没有扩展名,则返回空字符串。如果文件名以点开头(例如".test"),则pathinfo会将其视为扩展名为空,而文件名包括点。另外,如果路径中有多个点,PATHINFO_EXTENSION只返回最后一个点之后的部分。另一个示例,路径为"/path/to/my.file.tar.gz"<?php$file_path='/path/to/my.file.tar.gz';$extension=pathinfo($file_path,PATHINFO_EXTENSION);echo$extension;//输出:gz?>如果需要同时获取多个信息,可以不指定第二个参数,这样会返回一个关联数组。例如:<?php$file_path='/test/dir/test.txt';$file_info=pathinfo($file_path);echo$file_info['extension'];//输出:txt//同时还可以获取其他信息,如:echo$file_info['dirname'];//输出:/test/direcho$file_info['basename'];//输出:test.txtecho$file_info['filename'];//输出:test(注意:PHP5.2之后支持)?>注意:在旧版本的PHP(<5.2)中,返回的数组中没有'filename'键,只有'dirname','basename','extension'(如果有扩展名的话)。另外,引用[3]提到了parse_url函数,但用户的问题集中在pathinfo,所以我们不展开。最后,关于中文路径问题,引用[2]提到可能会出现乱码,可以通过自定义函数解决,但用户当前没有要求,我们可以不提。如果用户后续问及,我们再提供解决方案。现在,我们根据用户要求,提供PATHINFO_EXTENSION参数的用法示例即可。</think> ### PHP `pathinfo()` 函数:`PATHINFO_EXTENSION` 参数详解 #### 基础功能说明 `pathinfo()` 是 PHP 的内置文件路径解析函数,通过 `PATHINFO_EXTENSION` 参数可直接获取文件扩展名(不带前导点)。其核心特性: 1. **仅返回扩展名字符串**(如 `txt`/`jpg`) 2. **自动过滤路径中的目录和文件名** 3. **识别最后一个点`.` 后的字符作为扩展名**[^1] #### 用法示例 ```php <?php // 示例1:基础用法 $path = '/var/www/index.html'; echo pathinfo($path, PATHINFO_EXTENSION); // 输出: html // 示例2:复杂路径解析 $imgPath = 'uploads/2023/photo.tar.gz'; $extension = pathinfo($imgPath, PATHINFO_EXTENSION); echo $extension; // 输出: gz(注意:仅返回最后一级扩展名) // 示例3:中文路径兼容(需确保系统字符编码一致) $cnPath = '文档/报告.docx'; echo pathinfo($cnPath, PATHINFO_EXTENSION); // 输出: docx ?> ``` #### 注意事项 1. **无扩展名文件** ```php $dirPath = '/etc/nginx/conf.d/default'; echo pathinfo($dirPath, PATHINFO_EXTENSION); // 输出空字符串 ``` 2. **隐藏文件处理** ```php $hiddenFile = '.env.production'; echo pathinfo($hiddenFile, PATHINFO_EXTENSION); // 输出: production ``` 3. **多扩展名文件** 始终返回最后一级扩展名(如 `file.min.js` → `js`) #### 兼容性优化建议 当路径含中文字符可能解析异常时,推荐使用组合方案: ```php function safe_extension($path) { return pathinfo(iconv('UTF-8', 'GBK//IGNORE', $path), PATHINFO_EXTENSION); } // 调用示例 echo safe_extension('图片/北京故宫.jpg'); // 稳定输出: jpg[^2] ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值