D_Double's Journey 的博客

优快云 博客分类概览
本文概述了 优快云 博客中涵盖的各种信息技术领域的分类与子分类,包括前端、后端、移动开发等,为读者提供了丰富的技术知识点。
%% 数据清洗与匹配系统 % 作者:MATLAB % 日期:2023-10-20 % 描述:整合海运轨迹、港口信息和船舶数据 %% 步骤1:读取原始数据 tic; disp('正在读取数据...'); % 读取海运轨迹数据 opts = detectImportOptions('海运轨迹数据.xlsx'); opts.VariableTypes = {'double', 'char', 'char', 'char', 'double', 'double', 'double', 'char', 'char', 'double'}; trackData = readtable('海运轨迹数据.xlsx', opts); trackData.Properties.VariableNames = {'timestamp', 'vessel_id', 'origin_port', 'destination_port', ... 'latitude', 'longitude', 'speed', 'status', 'anomaly_type', 'fuel_consumption'}; % 读取港口数据 portData = readtable('港口数据.xlsx'); portData.Properties.VariableNames = {'port_name', 'port_code', 'country', 'port_latitude', 'port_longitude'}; % 读取船舶信息 vesselData = readtable('船舶信息数据.xlsx'); vesselData.Properties.VariableNames = {'vessel_id', 'vessel_type', 'build_year', 'dwt', 'max_speed'}; disp(['数据读取完成! 耗时: ', num2str(toc), ' 秒']); fprintf('轨迹数据记录数: %d\n', height(trackData)); fprintf('港口数据记录数: %d\n', height(portData)); fprintf('船舶信息记录数: %d\n', height(vesselData)); %% 步骤2:数据清洗 tic; disp('正在进行数据清洗...'); % 转换时间戳为日期时间格式(Excel序列日期 -> MATLAB日期时间) trackData.timestamp = datetime(trackData.timestamp, 'ConvertFrom', 'excel'); % 处理异常值 % 速度在0-50节之间,经纬度在有效范围内 validSpeed = trackData.speed >= 0 & trackData.speed <= 50; validLatitude = trackData.latitude >= -90 & trackData.latitude <= 90; validLongitude = trackData.longitude >= -180 & trackData.longitude <= 180; validCoords = validLatitude & validLongitude; trackData = trackData(validSpeed & validCoords, :); % 填充缺失的异常类型 missingAnomaly = cellfun(@isempty, trackData.anomaly_type); trackData.anomaly_type(missingAnomaly) = {'NONE'}; % 标准化港口名称 portData.port_name = upper(portData.port_name); trackData.origin_port = upper(trackData.origin_port); trackData.destination_port = upper(trackData.destination_port); % 船舶ID格式标准化 trackData.vessel_id = regexprep(trackData.vessel_id, '\s+', ''); vesselData.vessel_id = regexprep(vesselData.vessel_id, '\s+', ''); disp(['数据清洗完成! 耗时: ', num2str(toc), ' 秒']); fprintf('清洗后轨迹数据记录数: %d\n', height(trackData)); %% 步骤3:数据匹配 tic; disp('正在进行数据匹配...'); % 创建港口名称到港口代码的映射 portCodeMap = containers.Map(portData.port_name, portData.port_code); portCountryMap = containers.Map(portData.port_name, portData.country); % 添加港口信息到轨迹数据 trackData.origin_port_code = cell(height(trackData), 1); trackData.destination_port_code = cell(height(trackData), 1); trackData.origin_country = cell(height(trackData), 1); trackData.destination_country = cell(height(trackData), 1); for i = 1:height(trackData) originPort = trackData.origin_port{i}; if isKey(portCodeMap, originPort) trackData.origin_port_code{i} = portCodeMap(originPort); trackData.origin_country{i} = portCountryMap(originPort); end destPort = trackData.destination_port{i}; if isKey(portCodeMap, destPort) trackData.destination_port_code{i} = portCodeMap(destPort); trackData.destination_country{i} = portCountryMap(destPort); end end % 船舶信息匹配 vesselIDMap = containers.Map(vesselData.vessel_id, num2cell(1:height(vesselData))); vesselTypes = cell(height(trackData), 1); buildYears = nan(height(trackData), 1); dwtValues = nan(height(trackData), 1); maxSpeeds = nan(height(trackData), 1); for i = 1:height(trackData) vesselID = trackData.vessel_id{i}; if isKey(vesselIDMap, vesselID) idx = vesselIDMap(vesselID); vesselTypes{i} = vesselData.vessel_type{idx}; buildYears(i) = vesselData.build_year(idx); dwtValues(i) = vesselData.dwt(idx); maxSpeeds(i) = vesselData.max_speed(idx); end end % 添加船舶信息到轨迹数据 trackData.vessel_type = vesselTypes; trackData.build_year = buildYears; trackData.dwt = dwtValues; trackData.max_speed = maxSpeeds; disp(['数据匹配完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤4:数据质量检查 tic; disp('正在进行数据质量检查...'); % 检查未匹配的港口 missingOrigin = cellfun(@isempty, trackData.origin_port_code); missingDest = cellfun(@isempty, trackData.destination_port_code); fprintf('未匹配的起始港: %.2f%%\n', 100*nnz(missingOrigin)/height(trackData)); fprintf('未匹配的目的港: %.2f%%\n', 100*nnz(missingDest)/height(trackData)); % 检查未匹配的船舶信息 missingVessel = isnan(trackData.build_year); fprintf('未匹配的船舶信息: %.2f%%\n', 100*nnz(missingVessel)/height(trackData)); % 创建异常报告 anomalyReport = table(); if nnz(missingOrigin) > 0 anomalyReport = [anomalyReport; ... trackData(missingOrigin, {'origin_port', 'timestamp', 'vessel_id'})]; end if nnz(missingDest) > 0 anomalyReport = [anomalyReport; ... trackData(missingDest, {'destination_port', 'timestamp', 'vessel_id'})]; end % 保存异常报告 if ~isempty(anomalyReport) writetable(anomalyReport, '数据匹配异常报告.xlsx'); disp('已保存异常报告到: 数据匹配异常报告.xlsx'); end % 移除完全未匹配的记录 validRecords = ~(missingOrigin | missingDest | missingVessel); trackData = trackData(validRecords, :); fprintf('有效记录数: %d (已移除 %d 条无效记录)\n', ... height(trackData), nnz(~validRecords)); disp(['数据质量检查完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤5:计算衍生指标并保存结果 tic; disp('正在计算衍生指标...'); % 计算速度比(当前速度/船舶最大速度) trackData.speed_ratio = trackData.speed ./ trackData.max_speed; % 标记异常速度(速度比 < 0.2 或 > 1.1) trackData.speed_anomaly = trackData.speed_ratio < 0.2 | trackData.speed_ratio > 1.1; % 添加航程标记 trackData.journey_id = cell(height(trackData), 1); currentJourney = ''; currentKey = ''; for i = 1:height(trackData) newKey = [trackData.vessel_id{i} '_' trackData.origin_port{i} '_' trackData.destination_port{i}]; if ~strcmp(newKey, currentKey) currentKey = newKey; journeyNumber = 1; else journeyNumber = journeyNumber + 1; end trackData.journey_id{i} = [currentKey '_' num2str(journeyNumber)]; end disp(['衍生指标计算完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤6:保存最终结果 tic; disp('正在保存结果...'); % 重新组织列顺序 finalData = trackData(:, { 'timestamp', 'vessel_id', 'vessel_type', 'build_year', 'dwt', 'journey_id', ... 'origin_port', 'origin_port_code', 'origin_country', ... 'destination_port', 'destination_port_code', 'destination_country', ... 'latitude', 'longitude', 'speed', 'max_speed', 'speed_ratio', ... 'fuel_consumption', 'status', 'anomaly_type', 'speed_anomaly' }); % 保存为MAT文件(完整数据) save('combinedMarineData.mat', 'finalData', '-v7.3'); % 保存为Parquet文件(高效列式存储) parquetwrite('combinedMarineData.parquet', finalData); % 保存为Excel(适合分析) writetable(finalData, 'combinedMarineData.xlsx'); % 保存港口和船舶数据 save('portData.mat', 'portData'); save('vesselData.mat', 'vesselData'); disp(['结果保存完成! 耗时: ', num2str(toc), ' 秒']); disp(['总处理时间: ', num2str(toc), ' 秒']); fprintf('最终数据集大小: %d 行 x %d 列\n', size(finalData, 1), size(finalData, 2)); %% 步骤7:验证结果 disp('数据整合报告:'); disp('========================================'); fprintf('轨迹记录: %d 条\n', height(finalData)); fprintf('唯一船舶: %d 艘\n', numel(unique(finalData.vessel_id))); fprintf('唯一航程: %d 个\n', numel(unique(finalData.journey_id))); fprintf('覆盖港口: %d 个\n', numel(unique([finalData.origin_port; finalData.destination_port]))); fprintf('时间范围: %s 至 %s\n', ... datestr(min(finalData.timestamp), 'yyyy-mm-dd'), ... datestr(max(finalData.timestamp), 'yyyy-mm-dd')); fprintf('异常速度记录: %d 条 (%.2f%%)\n', ... nnz(finalData.speed_anomaly), 100*nnz(finalData.speed_anomaly)/height(finalData)); disp('========================================');(>> maritime_data_cleaning 正在读取数据... 错误使用 maritime_data_cleaning 对于表中的每个变量,VariableNames 属性必须包含一个名称。 >> )修改该代码并给出修改后的完整代码
08-25
clc,clear tic; disp('正在读取数据...'); % 读取海运轨迹数据 - 修复变量名问题 opts = detectImportOptions('gj.xlsx'); % 确保变量类型和变量名数量一致 if numel(opts.VariableTypes) ~= 10 error('轨迹数据文件列数不是10列,请检查文件格式'); end opts.VariableTypes = {'double', 'char', 'char', 'char', 'double', 'double', 'double', 'char', 'char', 'double'}; trackData = readtable('gj.xlsx', opts); % 直接设置变量名(避免使用Properties.VariableNames结构) trackData.Properties.VariableNames = {'timestamp', 'vessel_id', 'origin_port', 'destination_port', ... 'latitude', 'longitude', 'speed', 'status', 'anomaly_type', 'fuel_consumption'}; % 读取港口数据 portData = readtable('zc.xlsx'); portData.Properties.VariableNames = {'port_name', 'port_code', 'country', 'port_latitude', 'port_longitude'}; % 读取船舶信息 vesselData = readtable('yy.xlsx'); vesselData.Properties.VariableNames = {'vessel_id', 'vessel_type', 'build_year', 'dwt', 'max_speed'}; disp(['数据读取完成! 耗时: ', num2str(toc), ' 秒']); fprintf('轨迹数据记录数: %d\n', height(trackData)); fprintf('港口数据记录数: %d\n', height(portData)); fprintf('船舶信息记录数: %d\n', height(vesselData)); %% 步骤2:数据清洗 tic; disp('正在进行数据清洗...'); % 转换时间戳为日期时间格式(Excel序列日期 -> MATLAB日期时间) trackData.timestamp = datetime(trackData.timestamp, 'ConvertFrom', 'excel'); % 处理异常值 % 速度在0-50节之间,经纬度在有效范围内 validSpeed = trackData.speed >= 0 & trackData.speed <= 50; validLatitude = trackData.latitude >= -90 & trackData.latitude <= 90; validLongitude = trackData.longitude >= -180 & trackData.longitude <= 180; validCoords = validLatitude & validLongitude; trackData = trackData(validSpeed & validCoords, :); % 填充缺失的异常类型 missingAnomaly = cellfun(@isempty, trackData.anomaly_type); trackData.anomaly_type(missingAnomaly) = {'NONE'}; % 标准化港口名称 portData.port_name = upper(portData.port_name); trackData.origin_port = upper(trackData.origin_port); trackData.destination_port = upper(trackData.destination_port); % 船舶ID格式标准化 trackData.vessel_id = regexprep(trackData.vessel_id, '\s+', ''); vesselData.vessel_id = regexprep(vesselData.vessel_id, '\s+', ''); disp(['数据清洗完成! 耗时: ', num2str(toc), ' 秒']); fprintf('清洗后轨迹数据记录数: %d\n', height(trackData)); %% 步骤3:数据匹配 tic; disp('正在进行数据匹配...'); % 创建港口名称到港口代码的映射 portCodeMap = containers.Map(portData.port_name, portData.port_code); portCountryMap = containers.Map(portData.port_name, portData.country); % 添加港口信息到轨迹数据 trackData.origin_port_code = cell(height(trackData), 1); trackData.destination_port_code = cell(height(trackData), 1); trackData.origin_country = cell(height(trackData), 1); trackData.destination_country = cell(height(trackData), 1); for i = 1:height(trackData) originPort = trackData.origin_port{i}; if isKey(portCodeMap, originPort) trackData.origin_port_code{i} = portCodeMap(originPort); trackData.origin_country{i} = portCountryMap(originPort); else trackData.origin_port_code{i} = 'UNKNOWN'; trackData.origin_country{i} = 'UNKNOWN'; end destPort = trackData.destination_port{i}; if isKey(portCodeMap, destPort) trackData.destination_port_code{i} = portCodeMap(destPort); trackData.destination_country{i} = portCountryMap(destPort); else trackData.destination_port_code{i} = 'UNKNOWN'; trackData.destination_country{i} = 'UNKNOWN'; end end % 船舶信息匹配 - 使用更高效的向量化方法 [isMember, idx] = ismember(trackData.vessel_id, vesselData.vessel_id); trackData.vessel_type = repmat({''}, height(trackData), 1); trackData.build_year = nan(height(trackData), 1); trackData.dwt = nan(height(trackData), 1); trackData.max_speed = nan(height(trackData), 1); trackData.vessel_type(isMember) = vesselData.vessel_type(idx(isMember)); trackData.build_year(isMember) = vesselData.build_year(idx(isMember)); trackData.dwt(isMember) = vesselData.dwt(idx(isMember)); trackData.max_speed(isMember) = vesselData.max_speed(idx(isMember)); disp(['数据匹配完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤4:数据质量检查 tic; disp('正在进行数据质量检查...'); % 检查未匹配的港口 missingOrigin = strcmp(trackData.origin_port_code, 'UNKNOWN'); missingDest = strcmp(trackData.destination_port_code, 'UNKNOWN'); fprintf('未匹配的起始港: %.2f%%\n', 100*nnz(missingOrigin)/height(trackData)); fprintf('未匹配的目的港: %.2f%%\n', 100*nnz(missingDest)/height(trackData)); % 检查未匹配的船舶信息 missingVessel = isnan(trackData.build_year); fprintf('未匹配的船舶信息: %.2f%%\n', 100*nnz(missingVessel)/height(trackData)); % 创建异常报告 anomalyReport = table(); if nnz(missingOrigin) > 0 tempReport = trackData(missingOrigin, {'origin_port', 'timestamp', 'vessel_id'}); tempReport.Properties.VariableNames = {'Port', 'Timestamp', 'Vessel_ID'}; tempReport.Issue = repmat({'Missing Origin Port'}, height(tempReport), 1); anomalyReport = [anomalyReport; tempReport]; end if nnz(missingDest) > 0 tempReport = trackData(missingDest, {'destination_port', 'timestamp', 'vessel_id'}); tempReport.Properties.VariableNames = {'Port', 'Timestamp', 'Vessel_ID'}; tempReport.Issue = repmat({'Missing Destination Port'}, height(tempReport), 1); anomalyReport = [anomalyReport; tempReport]; end if nnz(missingVessel) > 0 tempReport = trackData(missingVessel, {'vessel_id', 'timestamp'}); tempReport.Properties.VariableNames = {'Vessel_ID', 'Timestamp'}; tempReport.Issue = repmat({'Missing Vessel Info'}, height(tempReport), 1); anomalyReport = [anomalyReport; tempReport]; end % 保存异常报告 if ~isempty(anomalyReport) writetable(anomalyReport, '数据匹配异常报告.xlsx'); disp('已保存异常报告到: 数据匹配异常报告.xlsx'); end % 移除完全未匹配的记录 validRecords = ~(strcmp(trackData.origin_port_code, 'UNKNOWN') | ... strcmp(trackData.destination_port_code, 'UNKNOWN') | ... isnan(trackData.build_year)); trackData = trackData(validRecords, :); fprintf('有效记录数: %d (已移除 %d 条无效记录)\n', ... height(trackData), nnz(~validRecords)); disp(['数据质量检查完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤5:计算衍生指标并保存结果 tic; disp('正在计算衍生指标...'); % 计算速度比(当前速度/船舶最大速度) trackData.speed_ratio = trackData.speed ./ trackData.max_speed; % 标记异常速度(速度比 < 0.2 或 > 1.1) trackData.speed_anomaly = trackData.speed_ratio < 0.2 | trackData.speed_ratio > 1.1; % 添加航程标记 (优化性能) [~, ~, journeyGroup] = unique(strcat(trackData.vessel_id, '_', trackData.origin_port, '_', trackData.destination_port)); trackData.journey_id = arrayfun(@(x) sprintf('JID_%d', x), journeyGroup, 'UniformOutput', false); disp(['衍生指标计算完成! 耗时: ', num2str(toc), ' 秒']); %% 步骤6:保存最终结果 tic; disp('正在保存结果...'); % 重新组织列顺序 finalData = trackData(:, { 'timestamp', 'vessel_id', 'vessel_type', 'build_year', 'dwt', 'journey_id', ... 'origin_port', 'origin_port_code', 'origin_country', ... 'destination_port', 'destination_port_code', 'destination_country', ... 'latitude', 'longitude', 'speed', 'max_speed', 'speed_ratio', ... 'fuel_consumption', 'status', 'anomaly_type', 'speed_anomaly' }); % 保存为MAT文件(完整数据) save('combinedMarineData.mat', 'finalData', '-v7.3'); % 保存为Parquet文件(高效列式存储) parquetwrite('combinedMarineData.parquet', finalData); % 保存为Excel(适合分析) writetable(finalData, 'combinedMarineData.xlsx'); % 保存港口和船舶数据 save('portData.mat', 'portData'); save('vesselData.mat', 'vesselData'); disp(['结果保存完成! 耗时: ', num2str(toc), ' 秒']); disp(['总处理时间: ', num2str(toc), ' 秒']); fprintf('最终数据集大小: %d 行 x %d 列\n', size(finalData, 1), size(finalData, 2)); %% 步骤7:验证结果 disp('数据整合报告:'); disp('========================================'); fprintf('轨迹记录: %d 条\n', height(finalData)); fprintf('唯一船舶: %d 艘\n', numel(unique(finalData.vessel_id))); fprintf('唯一航程: %d 个\n', numel(unique(finalData.journey_id))); fprintf('覆盖港口: %d 个\n', numel(unique([finalData.origin_port; finalData.destination_port]))); fprintf('时间范围: %s 至 %s\n', ... datestr(min(finalData.timestamp), 'yyyy-mm-dd'), ... datestr(max(finalData.timestamp), 'yyyy-mm-dd')); fprintf('异常速度记录: %d 条 (%.2f%%)\n', ... nnz(finalData.speed_anomaly), 100*nnz(finalData.speed_anomaly)/height(finalData)); disp('========================================'); (>> maritime_data_cleaning 正在读取数据... 错误使用 maritime_data_cleaning 对于表中的每个变量,VariableNames 属性必须包含一个名称。)
最新发布
08-25
import re import logging import tkinter as tk from tkinter import scrolledtext, ttk, messagebox from datetime import datetime import traceback class SimpleCLexer: def __init__(self): self.tokens = [] def tokenize(self, input_str): tokens = [] pos = 0 line = 1 column = 0 length = len(input_str) # 定义C语言的关键词和类型 keywords = { 'void', 'int', 'char', 'float', 'double', 'short', 'long', 'signed', 'unsigned', 'struct', 'union', 'enum', 'typedef', 'static', 'extern', 'auto', 'register', 'const', 'volatile', 'return', 'if', 'else', 'switch', 'case', 'default', 'for', 'while', 'do', 'break', 'continue', 'goto', 'sizeof' } # 扩展类型别名识别 types = {'U1', 'U2', 'U4', 'S1', 'S2', 'S4', 'BOOL', 'BYTE', 'WORD', 'DWORD'} while pos < length: char = input_str[pos] # 跳过空白字符 if char in ' \t': pos += 1 column += 1 continue # 处理换行 if char == '\n': line += 1 column = 0 pos += 1 continue # 处理单行注释 if pos + 1 < length and input_str[pos:pos+2] == '//': end = input_str.find('\n', pos) if end == -1: end = length pos = end continue # 处理多行注释 if pos + 1 < length and input_str[pos:pos+2] == '/*': end = input_str.find('*/', pos + 2) if end == -1: end = length else: end += 2 pos = end continue # 处理标识符 if char.isalpha() or char == '_': start = pos pos += 1 while pos < length and (input_str[pos].isalnum() or input_str[pos] == '_'): pos += 1 token_text = input_str[start:pos] token_type = 'IDENTIFIER' # 检查是否为关键字或类型 if token_text in keywords: token_type = 'KEYWORD' elif token_text in types: token_type = 'TYPE' tokens.append({ 'type': token_type, 'text': token_text, 'line': line, 'column': column }) column += (pos - start) continue # 处理数字 if char.isdigit(): start = pos pos += 1 while pos < length and (input_str[pos].isdigit() or input_str[pos] in '.xXabcdefABCDEF'): pos += 1 tokens.append({ 'type': 'NUMBER', 'text': input_str[start:pos], 'line': line, 'column': column }) column += (pos - start) continue # 处理字符串 if char == '"': start = pos pos += 1 while pos < length and input_str[pos] != '"': if input_str[pos] == '\\' and pos + 1 < length: pos += 2 else: pos += 1 if pos < length and input_str[pos] == '"': pos += 1 tokens.append({ 'type': 'STRING', 'text': input_str[start:pos], 'line': line, 'column': column }) column += (pos - start) continue # 处理字符 if char == "'": start = pos pos += 1 while pos < length and input_str[pos] != "'": if input_str[pos] == '\\' and pos + 1 < length: pos += 2 else: pos += 1 if pos < length and input_str[pos] == "'": pos += 1 tokens.append({ 'type': 'CHAR', 'text': input_str[start:pos], 'line': line, 'column': column }) column += (pos - start) continue # 处理运算符和标点符号 operators = { '(', ')', '{', '}', '[', ']', ';', ',', '.', '->', '++', '--', '&', '*', '+', '-', '~', '!', '/', '%', '<<', '>>', '<', '>', '<=', '>=', '==', '!=', '^', '|', '&&', '||', '?', ':', '=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '&=', '^=', '|=', ',' } # 尝试匹配最长的运算符 matched = False for op_len in range(3, 0, -1): if pos + op_len <= length and input_str[pos:pos+op_len] in operators: tokens.append({ 'type': 'OPERATOR', 'text': input_str[pos:pos+op_len], 'line': line, 'column': column }) pos += op_len column += op_len matched = True break if matched: continue # 无法识别的字符 tokens.append({ 'type': 'UNKNOWN', 'text': char, 'line': line, 'column': column }) pos += 1 column += 1 return tokens class EnhancedFunctionAnalyzer: def __init__(self): self.function_name = "" self.parameters = [] self.global_vars = [] self.function_calls = [] self.current_function = None self.in_function_body = False self.brace_depth = 0 self.variable_declarations = {} self.macro_definitions = set() self.storage_classes = {"static", "extern", "auto", "register"} self.local_vars = [] self.structs = [] self.arrays = [] self.struct_tags = set() self.recorded_locals = set() self.recorded_globals = set() self.recorded_params = set() self.local_scope_stack = [set()] # 基本类型和类型别名 self.basic_types = {'void', 'int', 'char', 'float', 'double', 'short', 'long', 'signed', 'unsigned'} self.type_aliases = {"U1", "U2", "U4", "S1", "S2", "S4", "BOOL", "BYTE", "WORD", "DWORD"} self.allowed_types = self.basic_types | self.type_aliases self.allowed_types.add('struct') def analyze(self, tokens): self.tokens = tokens self.pos = 0 self.current_line = 0 self.brace_depth = 0 self.local_scope_stack = [set()] # 第一步:识别宏定义(全大写标识符) self._identify_macros() # 第二步:识别函数定义 self._find_function_definition() # 第三步:识别函数体内的内容 if self.function_name: self._analyze_function_body() return self def _identify_macros(self): """识别宏定义(全大写标识符)""" for token in self.tokens: if token['type'] == 'IDENTIFIER' and token['text'].isupper(): self.macro_definitions.add(token['text']) def _find_function_definition(self): """查找函数定义并提取函数名和参数""" self.pos = 0 while self.pos < len(self.tokens): token = self.tokens[self.pos] self.current_line = token['line'] # 跳过非类型开头的token if token['text'] not in self.allowed_types and token['text'] not in self.storage_classes: self.pos += 1 continue # 尝试识别函数定义 if self._is_function_definition(): self._extract_function_signature() return self.pos += 1 def _is_function_definition(self): """检查当前位置是否是函数定义开始""" start_pos = self.pos found_function_name = False found_paren = False # 跳过存储类说明符 if self.tokens[start_pos]['text'] in self.storage_classes: start_pos += 1 # 检查返回类型 if start_pos >= len(self.tokens) or self.tokens[start_pos]['text'] not in self.allowed_types: return False # 查找函数名 pos = start_pos + 1 while pos < len(self.tokens): token = self.tokens[pos] # 找到左括号,说明是函数定义 if token['text'] == '(': found_paren = True break # 找到标识符,可能是函数名 if token['type'] == 'IDENTIFIER' and not found_function_name: found_function_name = True pos += 1 return found_function_name and found_paren def _extract_function_signature(self): """提取函数签名(函数名和参数)""" # 提取存储类(如果有) storage_class = None if self.tokens[self.pos]['text'] in self.storage_classes: storage_class = self.tokens[self.pos]['text'] self.pos += 1 # 提取返回类型 return_type = self.tokens[self.pos]['text'] self.pos += 1 # 处理指针类型 if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '*': return_type += '*' self.pos += 1 # 提取函数名 - 函数名是 '(' 前的最后一个标识符 func_name = None while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] != '(': if self.tokens[self.pos]['type'] == 'IDENTIFIER': func_name = self.tokens[self.pos]['text'] self.pos += 1 if not func_name: return self.function_name = func_name self.current_function = func_name # 跳过 '(' if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '(': self.pos += 1 # 提取参数 params = [] current_param = [] depth = 1 param_line = self.current_line while self.pos < len(self.tokens) and depth > 0: token = self.tokens[self.pos] if token['text'] == '(': depth += 1 elif token['text'] == ')': depth -= 1 if depth == 0: break elif token['text'] == ',' and depth == 1: # 提取参数类型和名称 param_type, param_name = self._extract_param_info(current_param) if param_type and param_name: params.append({ 'type': param_type, 'name': param_name, 'line': param_line }) self.variable_declarations[param_name] = True current_param = [] param_line = token['line'] self.pos += 1 continue current_param.append(token) self.pos += 1 # 处理最后一个参数 if current_param: param_type, param_name = self._extract_param_info(current_param) if param_type and param_name: params.append({ 'type': param_type, 'name': param_name, 'line': param_line }) self.variable_declarations[param_name] = True # 记录参数 self.parameters = params for param in params: self.recorded_params.add(param['name']) # 查找函数体开头的 '{' while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] != '{': self.pos += 1 if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '{': self.in_function_body = True self.brace_depth = 1 self.pos += 1 def _extract_param_info(self, tokens): """从参数token列表中提取类型和名称""" param_type = [] param_name = None # 首先收集所有类型部分 type_end_index = -1 for i, token in enumerate(tokens): if token['type'] in ('KEYWORD', 'TYPE') or token['text'] in self.allowed_types: param_type.append(token['text']) type_end_index = i else: break # 然后查找参数名 for i in range(type_end_index + 1, len(tokens)): token = tokens[i] if token['type'] == 'IDENTIFIER' and not token['text'].isupper(): param_name = token['text'] break return ' '.join(param_type), param_name def _analyze_function_body(self): """分析函数体内容""" while self.pos < len(self.tokens) and self.brace_depth > 0: token = self.tokens[self.pos] self.current_line = token['line'] # 处理作用域开始 if token['text'] == '{': self.brace_depth += 1 self.local_scope_stack.append(set()) self.pos += 1 continue # 处理作用域结束 if token['text'] == '}': self.brace_depth -= 1 if self.local_scope_stack: self.local_scope_stack.pop() self.pos += 1 continue # 检测变量声明 if token['text'] in self.allowed_types or token['text'] in self.storage_classes: self._handle_variable_declaration() continue # 检测结构体声明 if token['text'] == 'struct': self._handle_struct_declaration() continue # 检测函数调用 if token['type'] == 'IDENTIFIER' and self.pos + 1 < len(self.tokens): next_token = self.tokens[self.pos + 1] if next_token['text'] == '(': self._handle_function_call() continue # 检测结构体成员访问 if token['type'] == 'IDENTIFIER' and self.pos + 1 < len(self.tokens): next_token = self.tokens[self.pos + 1] if next_token['text'] == '.': # 记录结构体变量 struct_var = token['text'] if struct_var not in self.structs: self.structs.append({ 'name': struct_var, 'line': token['line'], 'scope': 'local' if struct_var in self.recorded_locals else 'global' }) # 跳过成员访问部分 self.pos += 2 continue # 检测全局变量使用 if token['type'] == 'IDENTIFIER' and not token['text'].isupper(): var_name = token['text'] if (var_name not in self.variable_declarations and var_name not in self.macro_definitions and var_name != self.function_name and var_name not in self.struct_tags): if var_name not in self.recorded_globals: self.global_vars.append({ 'name': var_name, 'line': token['line'], 'scope': 'global' }) self.recorded_globals.add(var_name) self.variable_declarations[var_name] = True self.pos += 1 def _handle_struct_declaration(self): """处理结构体声明""" start_pos = self.pos current_line = self.current_line is_struct = True # 跳过 'struct' 关键字 self.pos += 1 # 获取结构体类型名 struct_type = None if self.pos < len(self.tokens) and self.tokens[self.pos]['type'] == 'IDENTIFIER': struct_type = self.tokens[self.pos]['text'] self.struct_tags.add(struct_type) self.allowed_types.add(struct_type) self.pos += 1 # 跳过结构体定义(如果有) if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '{': depth = 1 self.pos += 1 while self.pos < len(self.tokens) and depth > 0: if self.tokens[self.pos]['text'] == '{': depth += 1 elif self.tokens[self.pos]['text'] == '}': depth -= 1 self.pos += 1 # 获取结构体变量名 var_name = None if self.pos < len(self.tokens) and self.tokens[self.pos]['type'] == 'IDENTIFIER': var_name = self.tokens[self.pos]['text'] self.pos += 1 # 处理数组声明 array_dims = [] while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '[': self.pos += 1 dim = "" while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] != ']': dim += self.tokens[self.pos]['text'] self.pos += 1 if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == ']': self.pos += 1 array_dims.append(dim) # 创建变量信息 if var_name: var_type = f"struct {struct_type}" if struct_type else "struct" for dim in array_dims: var_type += f"[{dim}]" var_info = { 'type': var_type, 'name': var_name, 'line': current_line, 'is_struct': True, 'struct_type': struct_type, 'is_array': bool(array_dims), 'array_dims': array_dims } # 添加到局部变量 self.local_vars.append(var_info) self.recorded_locals.add(var_name) self.variable_declarations[var_name] = True # 如果是数组,单独记录 if array_dims: self.arrays.append(var_info) # 记录结构体 self.structs.append(var_info) def _handle_variable_declaration(self): """处理变量声明""" start_pos = self.pos current_line = self.current_line # 获取变量类型 var_type = self.tokens[self.pos]['text'] self.pos += 1 # 处理指针声明 while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '*': var_type += '*' self.pos += 1 # 获取变量名 var_name = None if self.pos < len(self.tokens) and self.tokens[self.pos]['type'] == 'IDENTIFIER': var_name = self.tokens[self.pos]['text'] self.pos += 1 # 跳过非变量声明的情况(如函数调用) if not var_name or var_name.isupper(): self.pos = start_pos return # 处理数组声明 array_dims = [] while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '[': self.pos += 1 dim = "" while self.pos < len(self.tokens) and self.tokens[self.pos]['text'] != ']': dim += self.tokens[self.pos]['text'] self.pos += 1 if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == ']': self.pos += 1 array_dims.append(dim) # 处理初始化 if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == '=': self.pos += 1 depth = 0 while self.pos < len(self.tokens): t = self.tokens[self.pos] if t['text'] in {'(', '['}: depth += 1 elif t['text'] in {')', ']'}: depth -= 1 elif t['text'] in {',', ';'} and depth == 0: break self.pos += 1 # 创建变量信息 for dim in array_dims: var_type += f"[{dim}]" var_info = { 'type': var_type, 'name': var_name, 'line': current_line, 'is_struct': False, 'is_array': bool(array_dims), 'array_dims': array_dims } # 添加到局部变量 self.local_vars.append(var_info) self.recorded_locals.add(var_name) self.variable_declarations[var_name] = True # 如果是数组,单独记录 if array_dims: self.arrays.append(var_info) def _handle_function_call(self): """处理函数调用""" # 提取函数名 func_name = self.tokens[self.pos]['text'] line = self.current_line # 跳过函数名和 '(' self.pos += 2 # 提取参数 params = [] current_param = [] depth = 1 param_line = line while self.pos < len(self.tokens) and depth > 0: token = self.tokens[self.pos] if token['text'] == '(': depth += 1 elif token['text'] == ')': depth -= 1 if depth == 0: break elif token['text'] == ',' and depth == 1: # 提取参数表达式 param_text = ''.join([t['text'] for t in current_param]).strip() params.append(param_text) current_param = [] param_line = token['line'] self.pos += 1 continue current_param.append(token) self.pos += 1 if current_param: param_text = ''.join([t['text'] for t in current_param]).strip() params.append(param_text) # 跳过 ')' if self.pos < len(self.tokens) and self.tokens[self.pos]['text'] == ')': self.pos += 1 # 确定返回类型 return_type = "unknown" if func_name.startswith("vd_"): return_type = "void" elif func_name.startswith(("u1_", "u2_", "u4_", "s1_", "s2_", "s4_")): prefix = func_name.split("_")[0] return_type = prefix.upper() # 添加到函数调用列表 self.function_calls.append({ 'name': func_name, 'return_type': return_type, 'params': ", ".join(params), 'line': line }) class FunctionParserApp: def __init__(self, root): self.root = root self.root.title("C语言函数解析器") self.root.geometry("1000x800") self.root.configure(bg="#f0f0f0") self.setup_logging() # 创建样式 style = ttk.Style() style.configure("TFrame", background="#f0f0f0") style.configure("TLabelFrame", background="#f0f0f0", font=("Arial", 10, "bold")) style.configure("TButton", font=("Arial", 10), padding=5) style.configure("TProgressbar", thickness=10) # 主框架 main_frame = ttk.Frame(root) main_frame.pack(fill="both", expand=True, padx=15, pady=15) # 创建输入区域 input_frame = ttk.LabelFrame(main_frame, text="输入C语言函数体") input_frame.pack(fill="both", expand=True, padx=5, pady=5) self.input_text = scrolledtext.ScrolledText(input_frame, width=100, height=15, font=("Consolas", 11), bg="#ffffff") self.input_text.pack(fill="both", expand=True, padx=10, pady=10) # 按钮区域 btn_frame = ttk.Frame(main_frame) btn_frame.pack(fill="x", padx=5, pady=5) # 解析按钮 parse_btn = ttk.Button(btn_frame, text="解析函数", command=self.parse_function) parse_btn.pack(side="left", padx=5) # 进度条 self.progress = ttk.Progressbar(btn_frame, orient="horizontal", length=300, mode="determinate") self.progress.pack(side="left", padx=10, fill="x", expand=True) # 示例按钮 example_btn = ttk.Button(btn_frame, text="加载示例", command=self.load_example) example_btn.pack(side="right", padx=5) # 创建输出区域 output_frame = ttk.LabelFrame(main_frame, text="解析结果") output_frame.pack(fill="both", expand=True, padx=5, pady=5) self.output_text = scrolledtext.ScrolledText(output_frame, width=100, height=15, font=("Consolas", 11), bg="#ffffff") self.output_text.pack(fill="both", expand=True, padx=10, pady=10) self.output_text.config(state=tk.DISABLED) # 日志区域 log_frame = ttk.LabelFrame(main_frame, text="日志信息") log_frame.pack(fill="both", expand=True, padx=5, pady=5) self.log_text = scrolledtext.ScrolledText(log_frame, width=100, height=6, font=("Consolas", 10), bg="#f8f8f8") self.log_text.pack(fill="both", expand=True, padx=10, pady=10) self.log_text.config(state=tk.DISABLED) # 示例函数体 self.example_code = """static void Diag21_PID_C9(U1 u1_a_num) { U1 u1_t_cmplt; U1 u1_t_cnt; struct SensorData sensor; U2 u2_array[10][20]; if((U1)DIAG_CNT_ZERO == u1_t_swrstcnt) /* Determine if a software reset is in progress */ { for(u1_t_cnt = (U1)DIAG21_ZERO; u1_t_cnt < (U1)DIAG21_PIDC9_FLAG; u1_t_cnt ++) { u1_t_cmplt = u1_g_InspSoftwareVersion(u4_g_cmd, &u4_g_data, (U1)TRUE); } vd_s_Diag21_U2ToU1(u2_g_buf, u1_g_data, (U1)DIAG21_PIDC9_FLAG); } else { /* Do Nothing */ } }""" # 加载示例 self.load_example() def setup_logging(self): """配置日志系统""" self.log_filename = f"parser_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" # 创建文件处理器 file_handler = logging.FileHandler(self.log_filename, encoding='utf-8') file_handler.setLevel(logging.INFO) file_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) # 配置根日志器 root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) root_logger.addHandler(file_handler) def log_to_gui(self, message, level="info"): """将日志信息显示在GUI中""" try: self.log_text.config(state=tk.NORMAL) timestamp = datetime.now().strftime("%H:%M:%S") self.log_text.insert(tk.END, f"[{timestamp}] {message}\n") self.log_text.see(tk.END) self.log_text.config(state=tk.DISABLED) if level == "info": logging.info(message) elif level == "warning": logging.warning(message) elif level == "error": logging.error(message) except Exception as e: logging.error(f"GUI日志错误: {str(e)}") def update_progress(self, value): """更新进度条""" self.progress['value'] = value self.root.update_idletasks() def load_example(self): """加载示例函数体""" self.input_text.delete(1.0, tk.END) self.input_text.insert(tk.END, self.example_code) self.log_to_gui("已加载示例函数体") def parse_function(self): try: code = self.input_text.get(1.0, tk.END) if not code.strip(): self.log_to_gui("错误: 没有输入函数体", "error") messagebox.showerror("错误", "请输入要解析的C语言函数体") return self.log_to_gui("开始解析函数体...") self.output_text.config(state=tk.NORMAL) self.output_text.delete(1.0, tk.END) self.update_progress(0) # 使用改进后的词法分析器 self.log_to_gui("执行词法分析...") lexer = SimpleCLexer() tokens = lexer.tokenize(code) self.update_progress(30) # 使用改进后的语法分析器 self.log_to_gui("执行语法分析...") analyzer = EnhancedFunctionAnalyzer() # 使用改进的分析器 analyzer.analyze(tokens) # 显示结果 self.log_to_gui("生成解析报告...") self.display_results( analyzer.local_vars, analyzer.global_vars, analyzer.function_calls, analyzer.function_name, analyzer.parameters ) self.update_progress(100) self.output_text.config(state=tk.DISABLED) self.log_to_gui("解析完成!") messagebox.showinfo("完成", "函数体解析成功完成!") except Exception as e: self.log_to_gui(f"解析错误: {str(e)}", "error") self.log_to_gui(f"错误详情: {traceback.format_exc()}", "error") messagebox.showerror("解析错误", f"发生错误:\n{str(e)}") self.update_progress(0) def display_results(self, local_vars, global_vars, function_calls, func_name, func_params): """增强版结果显示""" # 显示函数签名 self.output_text.insert(tk.END, "=== 函数签名 ===\n", "header") if func_name: self.output_text.insert(tk.END, f"函数名: {func_name}\n") if func_params: param_list = [] for param in func_params: param_list.append(f"{param['type']} {param['name']}") self.output_text.insert(tk.END, f"参数: {', '.join(param_list)}\n\n") else: self.output_text.insert(tk.END, "参数: 无\n\n") else: self.output_text.insert(tk.END, "警告: 无法识别函数签名\n\n") # 显示所有找到的变量 self.output_text.insert(tk.END, "=== 变量分析 ===\n", "header") self.output_text.insert(tk.END, "类型 | 名称 | 作用域 | 行号 | 类别\n", "subheader") self.output_text.insert(tk.END, "-" * 60 + "\n") # 显示参数 for param in func_params: self.output_text.insert(tk.END, f"参数 | {param['name']} | 参数 | {param['line']} | 基本类型\n") # 显示局部变量 for var in local_vars: category = "结构体" if var.get('is_struct', False) else "基本类型" struct_type = var.get('struct_type', '') if struct_type: category = f"结构体({struct_type})" self.output_text.insert(tk.END, f"变量 | {var['name']} | 局部 | {var['line']} | {category}\n") # 显示全局变量 for var in global_vars: category = "结构体" if var.get('is_struct', False) else "基本类型" struct_type = var.get('struct_type', '') if struct_type: category = f"结构体({struct_type})" self.output_text.insert(tk.END, f"变量 | {var['name']} | 全局 | {var['line']} | {category}\n") # 显示函数调用 for func in function_calls: self.output_text.insert(tk.END, f"函数调用 | {func['name']} | 调用 | {func['line']} | 函数\n") self.output_text.insert(tk.END, "\n") # 显示局部变量详情 if local_vars: self.output_text.insert(tk.END, "=== 局部变量详情 ===\n", "header") # 基本类型局部变量 basic_locals = [v for v in local_vars if not v.get('is_struct', False)] if basic_locals: self.output_text.insert(tk.END, "基本类型变量:\n") for var in basic_locals: self.output_text.insert(tk.END, f"{var['type']} {var['name']} (行号: {var['line']})\n") # 结构体局部变量 struct_locals = [v for v in local_vars if v.get('is_struct', False)] if struct_locals: self.output_text.insert(tk.END, "\n结构体变量:\n") for var in struct_locals: struct_type = var.get('struct_type', '未知结构体') self.output_text.insert(tk.END, f"{var['type']} {var['name']} (类型: {struct_type}, 行号: {var['line']})\n") else: self.output_text.insert(tk.END, "未找到局部变量\n\n") # 显示使用的全局变量 if global_vars: self.output_text.insert(tk.END, "=== 使用的全局变量 ===\n", "header") # 基本类型全局变量 basic_globals = [v for v in global_vars if not v.get('is_struct', False)] if basic_globals: self.output_text.insert(tk.END, "基本类型变量:\n") for var in basic_globals: self.output_text.insert(tk.END, f"{var['name']} (行号: {var['line']})\n") # 结构体全局变量 struct_globals = [v for v in global_vars if v.get('is_struct', False)] if struct_globals: self.output_text.insert(tk.END, "\n结构体变量:\n") for var in struct_globals: struct_type = var.get('struct_type', '未知结构体') self.output_text.insert(tk.END, f"{var['name']} (类型: {struct_type}, 行号: {var['line']})\n") self.output_text.insert(tk.END, "\n") else: self.output_text.insert(tk.END, "未使用全局变量\n\n") # 显示函数调用详情 if function_calls: self.output_text.insert(tk.END, "=== 函数调用详情 ===\n", "header") for func in function_calls: self.output_text.insert(tk.END, f"函数名: {func['name']} (行号: {func['line']})\n") self.output_text.insert(tk.END, f"返回类型: {func['return_type']}\n") self.output_text.insert(tk.END, f"参数: {func['params']}\n") self.output_text.insert(tk.END, "-" * 50 + "\n") else: self.output_text.insert(tk.END, "未调用任何函数\n\n") # 添加解析统计 self.output_text.insert(tk.END, "=== 解析统计 ===\n", "header") self.output_text.insert(tk.END, f"参数数量: {len(func_params)}\n") self.output_text.insert(tk.END, f"局部变量数量: {len(local_vars)}\n") self.output_text.insert(tk.END, f"全局变量数量: {len(global_vars)}\n") self.output_text.insert(tk.END, f"函数调用数量: {len(function_calls)}\n") # 结构体统计 struct_locals = [v for v in local_vars if v.get('is_struct', False)] struct_globals = [v for v in global_vars if v.get('is_struct', False)] self.output_text.insert(tk.END, f"结构体变量数量: {len(struct_locals) + len(struct_globals)}\n") self.output_text.insert(tk.END, f"总变量数量: {len(func_params) + len(local_vars) + len(global_vars) + len(function_calls)}\n") # 配置标签样式 self.output_text.tag_config("header", font=("Arial", 12, "bold"), foreground="#2c3e50") self.output_text.tag_config("subheader", font=("Arial", 10, "bold"), foreground="#34495e") if __name__ == "__main__": root = tk.Tk() app = FunctionParserApp(root) root.mainloop() 开始执行词法分析时会卡住,请解决此问题
07-19
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值