取唯一grpid

ZZ HK SCG0003 1 LIN 801
ZZ HK SCG0003 1 HGL KEYLOOK
ZZ HK SCG0003 1 CAT DRS

ZZ HK SCG0008 2 LIN 801
ZZ HK SCG0008 2 CAT DRS
ZZ HK SCG0008 2 HGL MUSTHAVE

 

SELECT LVL1.M142ILVLGRPID
FROM
(SELECT M142ILVLGRPID FROM ZTM142I WHERE M142ICOMCOD='ZZ' AND M142ISCGNUM='SCG0003' AND M142IMKTGRP='HK' AND M142IPLUATB='LIN' AND M142IATBCOD='801') LVL1,
(SELECT M142ILVLGRPID FROM ZTM142I WHERE M142ICOMCOD='ZZ' AND M142ISCGNUM='SCG0003' AND M142IMKTGRP='HK' AND M142IPLUATB='HGL' AND M142IATBCOD='KEYLOOK') LVL2,
(SELECT M142ILVLGRPID FROM ZTM142I WHERE M142ICOMCOD='ZZ' AND M142ISCGNUM='SCG0003' AND M142IMKTGRP='HK' AND M142IPLUATB='CAT' AND M142IATBCOD='DRS') LVL3
WHERE LVL1.M142ILVLGRPID = LVL2.M142ILVLGRPID AND LVL2.M142ILVLGRPID = LVL3.M142ILVLGRPID
% 主程序:孕周数据解析、BMI分组和风险评估 % 1. 导入并清洗数据 data = readtable("D:\附件-男胎检测数据2.xlsx", 'VariableNamingRule', 'preserve'); % 保留关键列 coreFields = {'孕妇代码', '孕妇BMI', '检测孕周', 'Y染色体浓度', '检测抽血次数'}; data = data(:, coreFields); disp(['原始数据行数:', num2str(height(data))]); % 清洗缺失值 cleanFields = {'孕妇BMI', '检测孕周', 'Y染色体浓度', '检测抽血次数'}; data = rmmissing(data, 'DataVariables', cleanFields); disp(['清洗后行数:', num2str(height(data))]); % 重命名列为英文 data.Properties.VariableNames = {'pID', 'BMI', 'GA_str', 'Y_concentration', 'test_count'}; % 修复错误:确保正确处理字符串类型 if iscellstr(data.GA_str) data.GA_str = string(data.GA_str); end % 2. 解析孕周数据 gaValues = nan(height(data), 1); for i = 1:height(data) % 修复错误:使用圆括号()索引代替花括号{} input = data.GA_str(i); % 处理数值输入 if isnumeric(input) ga = double(input); if ~isnan(ga) && ga >= 5 && ga <= 40 gaValues(i) = ga; end continue; end % 确保输入为字符串 if ~isstring(input) input = string(input); end str = input(1); % 提第一个元素 str = lower(strtrim(str)); % 去除非法字符 str = regexprep(str, '[^\w\d\.\+\-]', '', 'once'); % 包含"周"和"天"的格式 if contains(str, '周') && (contains(str, '天') || contains(str, '+')) weekPart = extractBefore(str, '周'); if contains(str, '+') dayPart = extractAfter(str, '+'); else dayPart = extractAfter(str, '周'); end % 提数字部分 weekPart = regexprep(weekPart, '[^0-9]', '', 'once'); dayPart = regexprep(dayPart, '[^0-9]', '', 'once'); week = str2double(weekPart); day = str2double(dayPart); if ~isnan(week) && ~isnan(day) gaValues(i) = week + day / 7; end % 只包含"周"的格式 elseif contains(str, '周') weekPart = regexprep(extractBefore(str, '周'), '[^0-9.]', '', 'once'); gaValues(i) = str2double(weekPart); % 只包含"天"的格式 elseif contains(str, '天') dayPart = regexprep(str, '[^0-9]', '', 'once'); days = str2double(dayPart); gaValues(i) = days / 7; % 纯数字格式 else gaValues(i) = str2double(str); end end % 添加孕周列并验证范围 data.GA = gaValues; validGA = (data.GA >= 10 & data.GA <= 25) & ~isnan(data.GA); data = data(validGA, :); disp(['有效孕周数据:', num2str(height(data))]); % 3. 按孕妇分组数据 [uniqueIDs, ~, grpID] = unique(data.pID); groupedData = cell(numel(uniqueIDs), 1); for i = 1:numel(uniqueIDs) idx = (grpID == i); groupedData{i} = sortrows(data(idx, :), 'test_count'); end % 4. 计算最早达标时间 threshold = 0.04; % Y染色体浓度阈值 earlyT = nan(numel(uniqueIDs), 1); bmiVals = nan(numel(uniqueIDs), 1); for i = 1:numel(uniqueIDs) patientData = groupedData{i}; bmiVals(i) = patientData.BMI(1); % 存储BMI值 aboveIdx = find(patientData.Y_concentration >= threshold, 1); if ~isempty(aboveIdx) earlyT(i) = patientData.GA(aboveIdx); else earlyT(i) = Inf; % 未达标 end end % 5. 提有效数据并排序 validIdx = ~isnan(bmiVals) & ~isinf(earlyT); sortedBMI = bmiVals(validIdx); sortedT = earlyT(validIdx); [sortedBMI, sortIdx] = sort(sortedBMI); sortedT = sortedT(sortIdx); n = numel(sortedBMI); % 6. 肘部法则确定分组数 maxM = min(5, n); sse = zeros(maxM, 1); for m = 1:maxM % 等间距分组 edges = linspace(min(sortedBMI), max(sortedBMI), m+1); groups = discretize(sortedBMI, edges); for j = 1:m groupIdx = (groups == j); if any(groupIdx) groupMean = mean(sortedT(groupIdx), 'omitnan'); sse(m) = sse(m) + sum((sortedT(groupIdx) - groupMean).^2, 'omitnan'); end end end % 计算肘点 if maxM > 1 diffs = diff(sse); relDiffs = diffs(1:end-1) ./ diffs(2:end); [~, bestM] = max(relDiffs); bestM = bestM + 1; % 补偿索引 bestM = min(bestM, maxM); else bestM = 1; end % 7. 最终分组 edges = linspace(min(sortedBMI), max(sortedBMI), bestM + 1); groups = discretize(sortedBMI, edges); % 8. 计算每组最佳时间和风险 bestGA = zeros(bestM, 1); groupRisk = zeros(bestM, 1); candidateGA = linspace(10, 25, 101); % 10-25周之间评估 for i = 1:bestM groupIdx = (groups == i); groupT = sortedT(groupIdx); validT = groupT(~isinf(groupT)); if isempty(validT) bestGA(i) = NaN; groupRisk(i) = 0; continue; end % 计算每个候选时间的风险 risks = arrayfun(@(ga) sum(validT > ga), candidateGA); [minRisk, idx] = min(risks); bestGA(i) = candidateGA(idx); groupRisk(i) = minRisk; end totalRisk = sum(groupRisk); % 9. 输出结果 fprintf('\n===== BMI分组区间 =====\n'); for i = 1:bestM if i < bestM fprintf('组%d: [%.2f, %.2f)\n', i, edges(i), edges(i+1)); else fprintf('组%d: [%.2f, %.2f]\n', i, edges(i), edges(i+1)); end end fprintf('\n===== 最佳NIPT时点 =====\n'); for i = 1:bestM fprintf('组%d: %.2f周\n', i, bestGA(i)); end fprintf('\n===== 潜在风险值 =====\n'); for i = 1:bestM fprintf('组%d: %.0f\n', i, groupRisk(i)); end fprintf('总风险值: %.0f\n', totalRisk); 原始数据行数:1082 清洗后行数:1082 有效孕周数据:1069 ===== BMI分组区间 ===== 组1: [20.70, 32.84) 组2: [32.84, 44.98] ===== 最佳NIPT时点 ===== 组1: 23.65周 组2: 24.10周 ===== 潜在风险值 ===== 组1: 0 组2: 0 总风险值: 0 >> 这代码运行出来的结果有点扯,给我修改一下代码
09-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值