php 使用pathinfo(), parse_url(), basename()解析URL

本文介绍了如何使用PHP内置函数pathinfo(), parse_url() 和 basename()来解析URL的不同部分,包括路径、参数、文件名等。

原文链接:点击打开链接

1、利用pathinfo解析URL

<?
     $test = pathinfo("http://localhost/index.php");
     print_r($test);
?>
结果如下
    Array
   (
      [dirname] => http://localhost //url的路径
      [basename] => index.php //完整文件名
      [extension] => php //文件名后缀
      [filename] => index //文件名
   )

2、利用parse_url()函数解析

<?
    $test = parse_url("http://localhost/index.php?name=tank&sex=1#top");
    print_r($test);
?>
结果如下
   Array
   (
      [scheme] => http //使用什么协议
      [host] => localhost //主机名
      [path] => /index.php //路径
      [query] => name=tank&sex=1 // 所传的参数
      [fragment] => top //后面根的锚点
   )

3、使用basename()解析

<?
    $test = basename("http://localhost/index.php?name=tank&sex=1#top");
    echo $test;
?>
结果如下
    index.php?name=tank&sex=1#top




def _parse_sheet(self, sheet, shapes): """ 解析一个worksheet中的数据 :params sheet: Worksheet对象 :params shapes: shape字典列表 """ wb_with_pywin32 = self._excel_app ws_with_pywin32 = wb_with_pywin32.Sheets[sheet.title] sheet_name = sheet.title # 重置sheet背景颜色的缓存 self._cell_bg_colors = {} self._cell_border = {} self._merged_cells_cache = {} logging.debug(f"parse_excel start sheet_name: {sheet_name}") # 表格的范围, 表格外的内容的范围 table_range_list, out_table_cells = self._get_start_end_of_table(sheet) table_range_list = sorted(table_range_list) # sort the table range list logging.debug(f"parse_excel sheet_name: {sheet_name} get_table_range_list {table_range_list}") # 实例化一个sheet对象 block = DocumentBlockObject() block.file_name = os.path.basename(self._file_path) shapes = handle_shape_in_merged_cell(sheet, shapes) # 解析当前sheet的表格信息 tables = self._parse_table(sheet, block, table_range_list, ws_with_pywin32, shapes) logging.info(f"parse_excel sheet_name: {sheet_name} _parse_table {len(tables)}") # 解析当前sheet的图片信息 pictures = self._parse_picture(sheet_name, block, shapes) logging.info(f"parse_excel sheet_name: {sheet_name} _parse_picture {len(pictures)}") # 解析当前sheet的图形信息 graphics = self._parse_graphic(sheet_name, block, shapes) logging.info(f"parse_excel sheet_name: {sheet_name} _parse_graphic {len(graphics)}") # 解析当前sheet的表格外的对象信息 texts = self._parse_text(sheet, block, table_range_list, out_table_cells, shapes, ws_with_pywin32) logging.info(f"parse_excel sheet_name: {sheet_name} _parse_text {len(texts)}") block.name = sheet_name block.add_table(tables) block.add_text(texts) block.add_graphic(graphics) block.add_picture(pictures) self._assign_ref(block) logging.debug(f"parse_excel end sheet_name: {sheet_name}") # 合并虚拟单元格中的图形、图片 # self._merge_graphic_pic_cross_multi_cell(block, ws_with_pywin32, sheet) # 合并同一个单元格中的图形 self._merge_graphics(block, ws_with_pywin32) # 合并同一个单元格图片 self._merge_pictures(block, ws_with_pywin32) # 合并同一个单元格中的图形 + 图片 self._merge_graphics_and_pictures(block, ws_with_pywin32) # 关闭 return block 整理成工作流图
最新发布
10-29
import os import json import requests from flask import Flask, jsonify, request, render_template from datetime import datetime import logging import glob import time import argparse import re app = Flask(__name__) # 配置日志 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # 定义基础路径 BASE_PATH = r"C:\Users\l30078648\Desktop\250730" DOWNLOAD_PATH = os.path.join(BASE_PATH, "downloads") # S3 OBS API配置 S3_OBS_API_URL = "https://adsci.api.ias.huawei.com/icollect/get_s3_obs_file_download_url/" S3_OBS_HEADERS = { "Accept-Language": "zh-CN", "Content-Type": "application/json" } USER_ID = "h00512356" # 芯片型号到文件后缀的映射 CHIP_FILENAME_MAP = { "BS9SX1A": "BS9SX1AA", # 添加额外的'A'后缀 "Ascend610Lite": "Ascend610Lite" } # 定义芯片和模型组路径映射 TRACK_PATHS = { "Ascend610Lite": { "rl_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "rl_nn"), "rsc_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "rsc_nn"), "prediction_nn": os.path.join(BASE_PATH, ".track", "Ascend610Lite", "prediction_nn") }, "BS9SX1A": { "rl_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "rl_nn"), "rsc_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "rsc_nn"), "prediction_nn": os.path.join(BASE_PATH, ".track", "BS9SX1A", "prediction_nn") } } JSON_PATHS = { "Ascend610Lite": { "rl_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "rl_nn"), "rsc_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "rsc_nn"), "prediction_nn": os.path.join(DOWNLOAD_PATH, "Ascend610Lite", "prediction_nn") }, "BS9SX1A": { "rl_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "rl_nn"), "rsc_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "rsc_nn"), "prediction_nn": os.path.join(DOWNLOAD_PATH, "BS9SX1A", "prediction_nn") } } def send_request(method, url, headers=None, data=None, timeout=30, max_try_times=1, **kwargs): """发送HTTP请求(带重试机制)""" try_times = 0 response = None if data is None: data = {} if 'files' in kwargs: request_body = data else: request_body = json.dumps(data) if not isinstance(data, str) else data.encode("utf-8") while try_times < max_try_times: try: response = requests.request( method=method, url=url, headers=headers, data=request_body, timeout=timeout, verify=False, **kwargs ) if response.status_code in (200, 201): break except requests.exceptions.Timeout: logger.warning(f"请求超时: url={url}") except Exception as e: logger.exception(f"请求失败: url={url}, 错误: {str(e)}") if response: logger.error(f"响应内容: {response.text}") try_times += 1 logger.warning(f"请求失败, url={url}, 重试中 ({try_times}/{max_try_times})") if max_try_times > 1: time.sleep(5) return response def extract_build_ids_from_track(chip, group): """从.track文件夹中提取build_id和完整模型名称数据,并生成OBS链接""" build_data = {} build_missing = [] if chip not in TRACK_PATHS or group not in TRACK_PATHS[chip]: logger.error(f"无效路径: {chip}/{group}") return build_data, build_missing group_path = TRACK_PATHS[chip][group] if not os.path.exists(group_path): logger.error(f"原始路径不存在: {group_path}") return build_data, build_missing # 扫描所有JSON文件 json_files = glob.glob(os.path.join(group_path, "*.json")) logger.info(f"在路径 {group_path} 中找到 {len(json_files)} 个JSON文件") for json_file in json_files: try: with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # 获取完整模型名称(带后缀) model_full_name = os.path.splitext(os.path.basename(json_file))[0] # 提取build_id build_id = data.get('build_id') # 处理不同格式的ID if isinstance(build_id, int): build_id = str(build_id) # 生成OBS链接 obs_url = None if build_id: obs_url = f"https://adsci.ias.huawei.com/icollect/show_obs_objects/?s3_obs_url=hpct/nn_model_prebuild/{build_id}_PREBUILD_OUTPUTS" # 存储build_id、模型全名和OBS链接 build_data[model_full_name] = { "build_id": build_id, "model_full_name": model_full_name, "obs_url": obs_url # 新增OBS链接字段 } if build_id: logger.debug(f"提取成功: {model_full_name} -> build_id: {build_id}, OBS链接: {obs_url}") else: logger.warning(f"文件 {json_file} 中没有找到build_id字段") build_missing.append(model_full_name) except Exception as e: logger.error(f"解析原始文件 {json_file} 时出错: {str(e)}") build_missing.append(os.path.basename(json_file)) continue logger.info(f"成功提取 {len(build_data) - len(build_missing)} 个build_id") return build_data, build_missing def get_chip_filename(chip): """获取芯片对应的正确文件名""" # 使用映射表获取正确的文件名后缀 return CHIP_FILENAME_MAP.get(chip, f"{chip}") def download_file_with_build_id(model_full_name, chip, group, build_id): """使用build_id下载JSON文件""" if not build_id: logger.warning(f"跳过缺少build_id的文件: {model_full_name}") return None, None # 获取芯片对应的正确文件名 chip_filename = get_chip_filename(chip) # 构建S3 OBS路径 s3_obs_url = f"hpct/nn_model_prebuild/{build_id}_PREBUILD_OUTPUTS/profiling_summary/" file_name = f"{chip_filename}_prof_record.json" # 获取下载URL download_url, error = get_download_url(build_id, s3_obs_url, file_name) if not download_url: logger.error(f"获取下载URL失败: {error}") return None, None # 下载并保存文件 save_path, file_data = download_and_save(download_url, model_full_name, chip, group, file_name) return save_path, file_data def get_download_url(build_id, s3_obs_url, file_name): """获取S3 OBS下载URL""" payload = { "user_id": USER_ID, "s3_obs_url": s3_obs_url, "file_name": file_name } logger.info(f"请求下载URL: {payload}") response = send_request( "POST", S3_OBS_API_URL, headers=S3_OBS_HEADERS, data=payload ) if not response or response.status_code != 200: logger.error(f"获取下载URL失败: 状态码={getattr(response, 'status_code', '无响应')}") return None, "下载URL获取失败" try: result = response.json() download_url = result.get("download_url") if not download_url: logger.error(f"响应中缺少download_url字段: {result}") return None, "响应中缺少download_url字段" # 修复URL端口问题:确保使用10001端口 fixed_url = fix_download_url_port(download_url) return fixed_url, None except Exception as e: logger.error(f"解析响应失败: {str(e)}") return None, f"解析响应失败: {str(e)}" def fix_download_url_port(url): """修正下载URL的端口号""" # 使用正则表达式匹配并替换端口号 pattern = r"(\w+\.huawei\.com):\d+" replacement = r"\1:10001" fixed_url = re.sub(pattern, replacement, url) if fixed_url != url: logger.info(f"URL端口已修正: {url} -> {fixed_url}") return fixed_url def download_and_save(download_url, model_full_name, chip, group, file_name): """下载文件并保存到本地""" logger.info(f"下载文件: {download_url}") file_response = send_request("GET", download_url, timeout=60) if not file_response or file_response.status_code != 200: logger.error(f"文件下载失败: 状态码={getattr(file_response, 'status_code', '无响应')}") return None, None # 保存文件到本地 save_dir = JSON_PATHS[chip][group] os.makedirs(save_dir, exist_ok=True) save_file_name = f"{model_full_name}_{file_name}" save_path = os.path.join(save_dir, save_file_name) try: with open(save_path, 'wb') as f: f.write(file_response.content) logger.info(f"文件保存成功: {save_path}") # 尝试解析文件内容 try: with open(save_path, 'r', encoding='utf-8') as f: file_data = json.load(f) return save_path, file_data except Exception as e: logger.error(f"解析下载的JSON文件失败: {save_path}, 错误: {str(e)}") return save_path, None except Exception as e: logger.error(f"保存文件失败: {save_path}, 错误: {str(e)}") return None, None def download_files_with_build_ids(build_id_map, chip, group): """使用提取的build_id下载所有文件""" downloaded_files = [] download_errors = [] for model_full_name, build_info in build_id_map.items(): build_id = build_info["build_id"] if not build_id: logger.warning(f"跳过缺少build_id的文件: {model_full_name}") downloaded_files.append({ "model": model_full_name, "status": "skipped", "reason": "missing_build_id" }) continue logger.info(f"下载文件: {model_full_name} (build_id={build_id})") save_path, file_data = download_file_with_build_id(model_full_name, chip, group, build_id) if save_path: if file_data: status = "success" else: status = "partial_success" download_errors.append({ "model": model_full_name, "error": "文件解析失败" }) downloaded_files.append({ "model": model_full_name, "status": status }) else: logger.warning(f"下载失败: {model_full_name}") download_errors.append({ "model": model_full_name, "error": "下载失败" }) downloaded_files.append({ "model": model_full_name, "status": "failed" }) return downloaded_files, download_errors def extract_om_and_mean_ddr(data): """ 从JSON数据中提取.om后缀键的值和mean_ddr值 :param data: JSON数据字典 :return: 元组 (om_value, mean_ddr_value, bandwidth_value) """ om_value = None mean_ddr = None bandwidth = None # 查找所有以.om结尾的键并提取第一个值 om_keys = [key for key in data.keys() if key.endswith('.om')] if om_keys: om_value = data[om_keys[0]] # 查找mean_ddr值(直接查找或嵌套查找) if 'mean_ddr' in data: mean_ddr = data['mean_ddr'] else: # 在嵌套结构中查找mean_ddr for value in data.values(): if isinstance(value, dict) and 'mean_ddr' in value: mean_ddr = value['mean_ddr'] break # 计算带宽值 (带宽 = 时延 * mean_ddr * 10 / 1000) if om_value is not None and mean_ddr is not None: try: bandwidth = (float(om_value) * float(mean_ddr) * 10) / 1000 bandwidth = round(bandwidth, 4) # 保留4位小数 except (TypeError, ValueError): logger.error(f"带宽计算失败: om_value={om_value}, mean_ddr={mean_ddr}") return om_value, mean_ddr, bandwidth # 返回三个值 def parse_json_file(file_path, file_data=None): """解析JSON文件,提取性能数据和完整模型名称""" if file_data is None: try: with open(file_path, 'r', encoding='utf-8') as f: file_data = json.load(f) except Exception as e: logger.error(f"读取文件出错 {file_path}: {e}") return None # 从文件名中提取完整模型名称(带后缀) file_name = os.path.basename(file_path) # 文件名格式: "模型全名_芯片_prof_record.json" # 确保与.track文件夹中的名称完全匹配 model_full_name = file_name.rsplit('_', 2)[0] # 去掉最后两个部分 # 提取.om值、mean_ddr值和带宽值 om_value, mean_ddr, bandwidth = extract_om_and_mean_ddr(file_data) # 获取文件元数据 file_size = os.path.getsize(file_path) last_modified = datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat() return { "model_full_name": model_full_name, "file_name": file_name, "file_path": file_path, "file_size": file_size, "last_modified": last_modified, "om_value": om_value, "mean_ddr": mean_ddr, "bandwidth": bandwidth # 新增带宽字段 } def print_all_om_and_mean_ddr(): """打印所有设备/组的所有.om、mean_ddr和带宽值到终端""" print("\n" + "=" * 100) print("JSON性能数据提取报告 - 时延(ms)、DDR和带宽(Mbps)") print("=" * 100) # 获取所有设备目录 devices = [d for d in os.listdir(DOWNLOAD_PATH) if os.path.isdir(os.path.join(DOWNLOAD_PATH, d))] if not devices: print(f"在 {DOWNLOAD_PATH} 中未找到设备目录") return total_files = 0 valid_files = 0 for device in devices: device_path = os.path.join(DOWNLOAD_PATH, device) # 获取设备下的所有组目录 groups = [g for g in os.listdir(device_path) if os.path.isdir(os.path.join(device_path, g))] if not groups: print(f"\n设备 {device} 中没有模型组") continue print(f"\n设备: {device} ({len(groups)}个模型组)") print("-" * 90) for group in groups: group_path = os.path.join(device_path, group) json_files = glob.glob(os.path.join(group_path, "*.json")) if not json_files: print(f" ├── 组 {group}: 没有JSON文件") continue print(f" ├── 组 {group}: {len(json_files)}个模型") # 打印组内所有模型的指标 for json_file in json_files: metrics = parse_json_file(json_file) filename = os.path.basename(json_file) om_str = f"{metrics['om_value']:.4f}" if metrics['om_value'] is not None else "N/A" ddr_str = f"{metrics['mean_ddr']:.4f}" if metrics['mean_ddr'] is not None else "N/A" bw_str = f"{metrics['bandwidth']:.4f}" if metrics['bandwidth'] is not None else "N/A" print(f" │ ├── {metrics['model_full_name']} | 时延: {om_str:<8} | DDR: {ddr_str:<8} | 带宽: {bw_str:<8}") total_files += 1 if metrics['om_value'] is not None and metrics['mean_ddr'] is not None: valid_files += 1 print("\n" + "=" * 100) print(f"扫描完成: 共处理 {total_files} 个文件, 有效数据 {valid_files} 个 ({valid_files/total_files*100:.1f}%)") print("=" * 100 + "\n") def get_performance_data(chip, group, refresh_data=False): """获取性能数据,refresh_data控制是否下载新文件""" performance_data = { "status": "success", "models": [], "timestamp": datetime.now().isoformat(), "chip_type": chip, "group": group, "json_path": JSON_PATHS[chip].get(group, "") if chip in JSON_PATHS else "", "track_path": TRACK_PATHS[chip].get(group, "") if chip in TRACK_PATHS else "", "file_count": 0, "build_id_count": 0, "refresh_performed": refresh_data, "download_errors": [], "downloaded_files": [], "build_missing": [] } # 1. 从.track文件夹提取build_id和模型全名 build_id_map, build_missing = extract_build_ids_from_track(chip, group) performance_data["build_id_count"] = len(build_id_map) - len(build_missing) performance_data["build_missing"] = build_missing # 2. 检查路径有效性 if chip not in JSON_PATHS or group not in JSON_PATHS[chip]: performance_data["status"] = "error" performance_data["error"] = "无效芯片或模型组" return performance_data group_path = JSON_PATHS[chip][group] if not os.path.exists(group_path): performance_data["status"] = "error" performance_data["error"] = "JSON路径不存在" return performance_data # 确保目录存在 os.makedirs(group_path, exist_ok=True) # 3. 刷新时下载所有文件 if refresh_data: logger.info(f"开始刷新下载,共有{len(build_id_map)}个文件需要处理") downloaded_files, download_errors = download_files_with_build_ids(build_id_map, chip, group) performance_data["downloaded_files"] = downloaded_files performance_data["download_errors"] = download_errors performance_data["refresh_status"] = "executed" else: performance_data["refresh_status"] = "skipped" logger.info("跳过文件下载,仅使用本地现有数据") # 4. 处理所有JSON文件并提取指标 json_files = glob.glob(os.path.join(group_path, "*.json")) performance_data["file_count"] = len(json_files) for json_file in json_files: model_data_parsed = parse_json_file(json_file) if model_data_parsed: model_full_name = model_data_parsed["model_full_name"] # 获取对应的build_id和OBS链接 build_info = build_id_map.get(model_full_name, {}) build_id = build_info.get("build_id", "NA") obs_url = build_info.get("obs_url", None) # 构造前端需要的模型数据 model_for_frontend = { "model_full_name": model_full_name, "prebuild_id": build_id, "obs_url": obs_url, # 新增OBS链接 "latency": model_data_parsed["om_value"], # 时延 "mean_ddr": model_data_parsed["mean_ddr"], # DDR值 "bandwidth": model_data_parsed["bandwidth"], # 带宽值 "timestamp": model_data_parsed["last_modified"] } performance_data["models"].append(model_for_frontend) return performance_data @app.route('/api/refresh', methods=['POST']) def refresh_data_api(): """手动触发刷新操作API""" start_time = time.time() try: data = request.json chip = data.get('chip', 'Ascend610Lite') group = data.get('group', 'rl_nn') logger.info(f"手动刷新请求 - 芯片: {chip}, 组: {group}") # 1. 从.track文件夹提取build_id build_id_map, build_missing = extract_build_ids_from_track(chip, group) # 2. 下载所有文件 downloaded_files, download_errors = download_files_with_build_ids(build_id_map, chip, group) # 3. 构建响应 response = { "status": "success", "chip": chip, "group": group, "downloaded_files": downloaded_files, "download_errors": download_errors, "build_missing": build_missing, "process_time": round(time.time() - start_time, 4) } return jsonify(response) except Exception as e: logger.exception("刷新数据时出错") return jsonify({ "status": "error", "error": "服务器内部错误", "details": str(e), "process_time": round(time.time() - start_time, 4) }), 500 @app.route('/api/performance', methods=['GET']) def performance_api(): """性能数据API接口,添加refresh参数控制下载""" start_time = time.time() try: device = request.args.get('device', 'Ascend610Lite') group = request.args.get('type', 'rl_nn') refresh = request.args.get('refresh', 'false').lower() == 'true' logger.info(f"性能API请求 - 设备: {device}, 组: {group}, 刷新: {refresh}") performance_data = get_performance_data(device, group, refresh_data=refresh) process_time = time.time() - start_time response = jsonify({ **performance_data, "process_time": round(process_time, 4) }) response.headers['Cache-Control'] = 'public, max-age=300' return response except Exception as e: logger.exception("处理请求时出错") return jsonify({ "status": "error", "error": "服务器内部错误", "details": str(e), "process_time": round(time.time() - start_time, 4) }), 500 @app.route('/') def home(): """首页路由""" return render_template('index.html') if __name__ == '__main__': # 确保下载目录存在 os.makedirs(DOWNLOAD_PATH, exist_ok=True) # 创建所有JSON子目录 for chip, groups in JSON_PATHS.items(): for group, path in groups.items(): os.makedirs(path, exist_ok=True) # 添加命令行参数解析 parser = argparse.ArgumentParser(description='AI芯片性能监控服务') parser.add_argument('--host', type=str, default='127.0.0.1', help='服务监听地址') parser.add_argument('--port', type=int, default=8080, help='服务监听端口') parser.add_argument('--print', action='store_true', help='启动时打印所有.om和mean_ddr值到终端') args = parser.parse_args() # 启动时打印所有.om和mean_ddr值到终端 if args.print: print_all_om_and_mean_ddr() app.run(host="127.0.0.1", port=8080, debug=True) 检查后端
08-15
<think>我们被要求详细解释PHPURL解析和生成机制。根据引用[1],我们可以使用`parse_url()`函数来解析URL,该函数可以返回URL的各个组成部分。同时,我们也可以使用`http_build_query()`来生成查询字符串,并结合其他函数来构建完整的URL。 ### 1. URL解析PHP中,解析URL主要使用`parse_url()`函数,该函数将URL分解成多个组成部分,并返回一个关联数组(或指定部分)。组成部分包括: - scheme (如http, https) - host - port - user - pass - path - query (在问号?之后的部分) - fragment (在散列符号#之后的部分) #### 示例:解析URL ```php $url = 'https://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor'; $components = parse_url($url); print_r($components); ``` 输出: ``` Array ( [scheme] => https [host] => hostname [port] => 9090 [user] => username [pass] => password [path] => /path [query] => arg1=value1&arg2=value2 [fragment] => anchor ) ``` 如果只需要某一部分,可以传递第二个参数指定,例如: ```php $scheme = parse_url($url, PHP_URL_SCHEME); // 返回 'https' ``` #### 解析查询字符串 `parse_url()`返回的查询部分是一个字符串(如`arg1=value1&arg2=value2`),我们可以使用`parse_str()`将其解析为关联数组: ```php $queryString = $components['query']; parse_str($queryString, $queryParams); print_r($queryParams); ``` 输出: ``` Array ( [arg1] => value1 [arg2] => value2 ) ``` ### 2. URL生成 生成URL通常涉及构建各个部分,然后组合成完整的URL。我们可以使用以下方法: - 对于查询字符串,使用`http_build_query()`将关联数组转换为查询字符串。 - 使用`sprintf()`或直接拼接字符串来构建URL。 #### 示例:生成URL ```php $baseUrl = 'https://hostname/path'; $queryParams = [ 'arg1' => 'value1', 'arg2' => 'value2' ]; $fragment = 'anchor'; // 生成查询字符串 $queryString = http_build_query($queryParams); // 输出:arg1=value1&arg2=value2 // 构建完整URL $url = $baseUrl . '?' . $queryString . '#' . $fragment; echo $url; // 输出:https://hostname/path?arg1=value1&arg2=value2#anchor ``` #### 使用`http_build_url`(需要安装PECL扩展) 注意:PHP标准库中没有`http_build_url`函数,它是pecl_http扩展的一部分。如果需要,可以使用上述字符串拼接方法。 ### 3. 处理相对URL 有时我们需要将相对URL转换为绝对URL。可以使用`parse_url()`解析基础URL,然后根据相对URL的组成部分进行合并。 PHP标准库中没有内置的函数来直接解析相对URL,但我们可以使用以下方法: ```php function resolve_url($base, $relative) { // 如果相对URL是绝对URL(包含scheme),则直接返回 if (parse_url($relative, PHP_URL_SCHEME) != '') { return $relative; } $baseParts = parse_url($base); $relativeParts = parse_url($relative); // 合并路径 if (isset($relativeParts['path'])) { // 如果相对路径以/开头,则直接使用 if ($relativeParts['path'][0] == '/') { $result = $relativeParts; } else { // 否则,将相对路径附加到基础路径的后面 $path = isset($baseParts['path']) ? $baseParts['path'] : ''; // 如果基础路径不是以/结尾,则去除最后一个路径部分 if (substr($path, -1) != '/') { $path = dirname($path) . '/'; } $result['path'] = $path . $relativeParts['path']; } } // 合并其他部分(如query, fragment) $result = array_merge($baseParts, $relativeParts, $result ?? []); // 重新构建URL $url = ''; if (isset($result['scheme'])) { $url .= $result['scheme'] . '://'; } if (isset($result['user'])) { $url .= $result['user']; if (isset($result['pass'])) { $url .= ':' . $result['pass']; } $url .= '@'; } if (isset($result['host'])) { $url .= $result['host']; } if (isset($result['port'])) { $url .= ':' . $result['port']; } if (isset($result['path'])) { $url .= $result['path']; } if (isset($result['query'])) { $url .= '?' . $result['query']; } if (isset($result['fragment'])) { $url .= '#' . $result['fragment']; } return $url; } // 示例 $base = 'https://example.com/dir1/dir2/file.html'; $relative = '../file2.html'; echo resolve_url($base, $relative); // 输出:https://example.com/dir1/file2.html ``` ### 4. URL编码 在构建URL时,需要对参数进行适当的编码。可以使用`urlencode()`或`rawurlencode()`函数,两者的区别在于对空格的处理(`urlencode`将空格编码为`+`,而`rawurlencode`编码为`%20`)。在查询字符串中,通常使用`urlencode`,而在路径部分,推荐使用`rawurlencode`。 ### 总结 - 解析URL使用`parse_url()`和`parse_str()`。 - 生成URL使用`http_build_query()`构建查询字符串,然后拼接。 - 相对URL转绝对URL:需要自行处理,根据基础URL和相对URL的组成部分合并。 引用[1]中提到的`parse_url()`函数是PHP中处理URL的核心函数,它提供了强大的解析能力[^1]。引用[2]则展示了在WordPress中如何处理URL路由,这涉及到对URL的进一步处理(如去除查询参数、解码等)[^2]。 §§相关问题§§ 1. 如何在PHP中处理URL编码和解码? 2. 如何使用PHP解析和生成URL中的查询参数? 3. 在PHP中如何安全地处理用户提供的URL?</think>### PHP URL解析和生成机制详解 在PHP中,URL处理主要涉及解析和生成两个核心功能,使用内置函数可以高效完成这些操作。以下是详细机制解析: --- #### 一、URL解析机制 PHP使用`parse_url()`函数解析URL,该函数将URL分解为多个组成部分并返回关联数组: ```php $url = "https://user:pass@example.com:8080/path/to/file.php?arg1=value1&arg2=value2#anchor"; $components = parse_url($url); print_r($components); /* 输出: Array ( [scheme] => https [host] => example.com [port] => 8080 [user] => user [pass] => pass [path] => /path/to/file.php [query] => arg1=value1&arg2=value2 [fragment] => anchor ) */ ``` **参数解析说明**: 1. **获取特定部分**:通过第二个参数指定组件类型 ```php $scheme = parse_url($url, PHP_URL_SCHEME); // "https" $query = parse_url($url, PHP_URL_QUERY); // "arg1=value1&arg2=value2" ``` 2. **查询参数解析**:使用`parse_str()`将查询字符串转为数组 ```php parse_str($components['query'], $params); print_r($params); /* 输出: Array ( [arg1] => value1 [arg2] => value2 ) */ ``` 3. **路径解析**:使用`pathinfo()`获取路径详情 ```php $pathInfo = pathinfo($components['path']); echo $pathInfo['dirname']; // "/path/to" echo $pathInfo['basename']; // "file.php" echo $pathInfo['extension']; // "php" ``` --- #### 二、URL生成机制 URL生成主要通过字符串拼接和专用函数实现: 1. **构建查询字符串**:使用`http_build_query()` ```php $params = [ 'page' => 2, 'sort' => 'desc' ]; $queryString = http_build_query($params); // "page=2&sort=desc" ``` 2. **完整URL生成**:组合各组件 ```php $url = sprintf( "%s://%s%s?%s#%s", 'https', 'example.com', '/api/users', $queryString, 'section1' ); // 输出:https://example.com/api/users?page=2&sort=desc#section1 ``` 3. **URL编码**:确保特殊字符安全传输 ```php $param = "data with spaces"; $safeParam = urlencode($param); // "data+with+spaces" ``` --- #### 三、高级应用场景 1. **路由解析**(如WordPress): - 获取请求URI:`$_SERVER['REQUEST_URI']` - 标准化处理:去除查询参数、URL解码、去除首尾斜杠 - 匹配路由规则(如`/post/{slug}`)[^2] 2. **相对URL转绝对URL**: ```php function makeAbsolute($base, $relative) { $baseParts = parse_url($base); if (strpos($relative, '://') !== false) return $relative; if ($relative[0] == '/') { return $baseParts['scheme'] . '://' . $baseParts['host'] . $relative; } $path = dirname($baseParts['path']) . '/' . $relative; return $baseParts['scheme'] . '://' . $baseParts['host'] . $path; } ``` 3. **URL验证**: ```php if (filter_var($url, FILTER_VALIDATE_URL)) { // 有效URL } ``` --- #### 四、安全注意事项 1. **输入过滤**:始终验证用户提供的URL ```php $cleanUrl = filter_input(INPUT_GET, 'url', FILTER_SANITIZE_URL); ``` 2. **避免拼接漏洞**:使用`http_build_query()`而非手动拼接 3. **处理编码问题**:统一使用UTF-8编码防止乱码 --- ### 总结 PHP通过`parse_url()`和`http_build_query()`等函数提供完整的URL处理能力: - **解析**:分解URL为结构化数据 - **生成**:安全构建查询字符串和完整URL - **路由**:框架通常扩展基础功能实现高级路由[^2] 实际开发中应结合`filter_var()`验证和`urlencode()`编码确保安全性[^1][^5]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值