脑胶质瘤的数据处理代码

该代码段用于从 DICOM 文件构建肿瘤数据集,包含 T1, Flair 和 T2 图像序列。它读取 DICOM 图像,进行预处理,组合成三通道图像,并创建 COCO 格式的标注文件,包括图像信息和分割注释。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import pandas as pd
import time
import os
from collections import Counter
import numpy as np
import cv2
import SimpleITK
from tqdm import tqdm
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
import matplotlib.pyplot as plt


def getFiles(path):
    Filelist = []
    for home, dirs, files in os.walk(path):
        for file in files:
            # 文件名列表,包含完整路径
            Filelist.append(os.path.join(home, file))
            #Filelist.append(file)
    return Filelist
   
root_path = '/home/tione/notebook/taop-2021/100003/'
train_files = pd.read_csv('/home/tione/notebook/taop-2021/100003/train2_data_info.csv', usecols=['id_patient', 'id_series', 'id_area', 'mask_path', 'class(1:LBSA;2:EA;3:NA;4:CA)'])

INFO = {
    "description": "Tumor Dataset",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2017,
    "contributor": "xiao",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

# 根据自己的需要添加种类
CATEGORIES2 = [
    {
        'id': 1,
        'name': 'LBSA',
        'supercategory': 'tumor',
    },
        {
        'id': 2,
        'name': 'NA',
        'supercategory': 'tumor',
    },
        {
        'id': 3,
        'name': 'CA',
        'supercategory': 'tumor',
    },
        {
        'id': 4,
        'name': 'EA',
        'supercategory': 'tumor',
    }   
]

coco_output1 = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES1,
        "images": [],
        "annotations": []
    }

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

c = Counter(train_files['id_patient'])

idx = 0

image_id = 1
segmentation_id1 = 1
segmentation_id2 = 1

#How many patient
#每个病人的t1,t2, flair 都是一样的,各20张。image总共就321x20=6420张,之后看要不要把一个病人的image合在一个大图中?
#mask可能有多个,所以mask导入可能会多些
for i in tqdm(c):
    idx_each_patient = idx
    
    #一个病人有几个mask
    nums_per_patient = int(c[i] / 3)

    #Flair_path = '0001_Flair' CET1_path = '0001_CET1'  T2_path = '0001_T2'
    Flair_path = train_files.iloc[idx_each_patient]['id_series']
    Flair_path = Flair_path.split('_')[0] + '_' + Flair_path.split('_')[1].capitalize()
       
    CET1_path = train_files.iloc[idx_each_patient+1]['id_series']

    if CET1_path.split('_')[1] == 't1ce' or CET1_path.split('_')[1] == 'T1CE':
        CET1_path = CET1_path.split('_')[0] + '_CET1'

    T2_path = train_files.iloc[idx_each_patient+2]['id_series']
    T2_path = T2_path.split('_')[0] + '_' + T2_path.split('_')[1].upper()
    
    if Flair_path == '0078_Flair':
        CET1_path = train_files.iloc[idx_each_patient+2]['id_series']

        if CET1_path.split('_')[1] == 't1ce' or CET1_path.split('_')[1] == 'T1CE':
            CET1_path = CET1_path.split('_')[0] + '_CET1'

        T2_path = train_files.iloc[idx_each_patient+4]['id_series']
        T2_path = T2_path.split('_')[0] + '_' + T2_path.split('_')[1].upper()       

    #获取dicom的路径
    dicoms_flair_path = root_path + Flair_path.split('_')[0] + '/' + Flair_path
    dicoms_cet1_path = root_path + Flair_path.split('_')[0] + '/' + CET1_path
    dicoms_t2_path = root_path + Flair_path.split('_')[0] + '/' + T2_path

    #taop-2021/100003/0001/0001_Flair taop-2021/100003/0001/0001_CET1 taop-2021/100003/0001/0001_T2
    #print(dicoms_flair_path, dicoms_cet1_path, dicoms_t2_path)
    
    #获取每个病人的flair下的20张dicom图像
    flair_dicoms = getFiles(dicoms_flair_path)
    for flair_dicom in flair_dicoms:
        #10.dcm
        pic_name = os.path.split(flair_dicom)[1]
        if pic_name == '0.dcm' or pic_name == '1.dcm':
            continue
        cet1_dicom = dicoms_cet1_path + '/' + pic_name
        t2_dicom = dicoms_t2_path + '/' + pic_name

        flair_ds = SimpleITK.ReadImage(flair_dicom)
        flair_ds = SimpleITK.GetArrayFromImage(flair_ds)

        cet1_ds = SimpleITK.ReadImage(cet1_dicom)
        cet1_ds = SimpleITK.GetArrayFromImage(cet1_ds)

        t2_ds = SimpleITK.ReadImage(t2_dicom)
        t2_ds = SimpleITK.GetArrayFromImage(t2_ds)
        
        flair_array = np.reshape(flair_ds, (flair_ds.shape[1], flair_ds.shape[2])) 
        cet1_array = np.reshape(cet1_ds, (cet1_ds.shape[1], cet1_ds.shape[2]))
        t2_array = np.reshape(t2_ds, (t2_ds.shape[1], t2_ds.shape[2]))

#         flair_array = ((flair_array - flair_array.min())/(flair_array.max()-flair_array.min()))*255
#         cet1_array = ((cet1_array - cet1_array.min())/(cet1_array.max()-cet1_array.min()))*255
#         t2_array = ((t2_array - t2_array.min())/(t2_array.max()-t2_array.min()))*255
        
        flair_array = (flair_array - np.mean(flair_array)) / np.std(flair_array)
        cet1_array = (cet1_array - np.mean(cet1_array)) / np.std(cet1_array)
        t2_array = (t2_array - np.mean(t2_array)) / np.std(t2_array)
        
        flair_array = ((flair_array - flair_array.min())/(flair_array.max()-flair_array.min()))*255
        cet1_array = ((cet1_array - cet1_array.min())/(cet1_array.max()-cet1_array.min()))*255
        t2_array = ((t2_array - t2_array.min())/(t2_array.max()-t2_array.min()))*255
        
        flair_array = np.asarray(flair_array).astype(np.float32)
        cet1_array = np.asarray(cet1_array).astype(np.float32)
        t2_array = np.asarray(t2_array).astype(np.float32)
        
        stacked_img = np.stack((cet1_array, flair_array,t2_array), axis=-1)
#         stacked_img = np.stack((flair_array, cet1_array, t2_array), axis=-1)
#         stacked_img = np.asarray(stacked_img).astype(np.float32)
#         stacked_img = (stacked_img - stacked_img.min())/(stacked_img.max()-stacked_img.min())
#         stacked_img = stacked_img * 255

        # 把三个序列的图像转出三通道的一张图像
        image_path = '/home/tione/notebook/img-2021-11-7-xhliu-step2/'

        if not os.path.exists(image_path):
            os.makedirs(image_path)
        image_name = pic_name.split('.')[0] + '.png'
        #cv2.imwrite(image_path + Flair_path.split('_')[0] + '_' + image_name, stacked_img)
        #print(type(image_id), type(os.path.basename(image_path + image_name)), type(stacked_img.size))
        image_info = pycococreatortools.create_image_info(image_id, Flair_path.split('_')[0] + '_' + image_name, stacked_img.shape[:2])
        coco_output1["images"].append(image_info)
        coco_output2["images"].append(image_info)
        h,w,_ = stacked_img.shape
        out_mask = np.zeros((h, w))
        #print("images", image_info)
        for j in range(nums_per_patient):
            if True:
            #if train_files.iloc[idx_each_patient+j]['class(1:LBSA;2:EA;3:NA;4:CA)'] == train_files.iloc[idx_each_patient+j+nums_per_patient]['class(1:LBSA;2:EA;3:NA;4:CA)'] and train_files.iloc[idx_each_patient+j]['class(1:LBSA;2:EA;3:NA;4:CA)'] == train_files.iloc[idx_each_patient+j+2*nums_per_patient]['class(1:LBSA;2:EA;3:NA;4:CA)']:            
                #获得mask的array
                mask_path = train_files.iloc[idx_each_patient+j*3]['mask_path']
                if Flair_path == '0078_Flair':
                    if j == 0:
                        mask_path = train_files.iloc[idx_each_patient+4]['mask_path']
                    elif j == 1:
                        mask_path = train_files.iloc[idx_each_patient+5]['mask_path']
                    elif j == 2:
                        mask_path = train_files.iloc[idx_each_patient+6]['mask_path']
                    elif j == 3:
                        mask_path = train_files.iloc[idx_each_patient+9]['mask_path']
                    elif j == 4:
                        mask_path = train_files.iloc[idx_each_patient+12]['mask_path']
                mask_path = root_path + mask_path
                mask_ds = SimpleITK.ReadImage(mask_path)
                mask_ds = SimpleITK.GetArrayFromImage(mask_ds)

                mask_img = mask_ds[int(pic_name.split('.')[0])]
                #print(stacked_img.shape, mask_img.shape)
                class_id2 = train_files.iloc[idx_each_patient+j]['class(1:LBSA;2:EA;3:NA;4:CA)']
                category_info2 = {'id': class_id2, 'is_crowd': 0}
                binary_mask2 = np.asarray(mask_img).astype(np.uint8)
                binary_mask = np.asarray(mask_img).astype(np.int)
                out_mask[binary_mask==np.ones_like(binary_mask)]=1
                print(np.all(out_mask==binary_mask))
                
                #print(image_id, binary_mask.shape)
                if np.all(binary_mask2 == 0):                   
                    pass
                else:
                    if int(pic_name.split('.')[0]) == 0:
                        print('0')
                    elif int(pic_name.split('.')[0]) ==1:
                        print('1')

                    annotation_info2 = pycococreatortools.create_annotation_info(segmentation_id2, image_id, category_info2, binary_mask2, tolerance=2)
                    #print('segmentation', annotation_info)
                    if annotation_info2 is not None:
                        coco_output2["annotations"].append(annotation_info2)
                        segmentation_id2 = segmentation_id2+ 1
                        annotation_info2 = None
                    #time.sleep(10)
                
        class_id1 = 1
        category_info1 = {'id': class_id, 'is_crowd': 0}
        
        #print(image_id, binary_mask.shape)
        out_mask = out_mask.astype(np.uint8)
        if np.all(out_mask == 0):                   
            pass
        else:
            if int(pic_name.split('.')[0]) == 0:
                print('0')
            elif int(pic_name.split('.')[0]) ==1:
                print('1')

            annotation_info1 = pycococreatortools.create_annotation_info(segmentation_id1, image_id, category_info1, out_mask, tolerance=2)
            #print('segmentation', annotation_info)
            if annotation_info1 is not None:
                coco_output1["annotations"].append(annotation_info1)
                segmentation_id1 = segmentation_id1 + 1
                annotation_info = None
        image_id = image_id + 1
        
        #根据out_mask,将非mask区域变为背景
        stacked_img[out_mask!=np.ones_like(out_mask)]=0
        
        
        
        cv2.imwrite(image_path + Flair_path.split('_')[0] + '_' + image_name, stacked_img)
                            
    idx = idx + c[i]

with open('step1_xhliu.json', 'w') as output_json_file1:
    json.dump(coco_output1, output_json_file1, cls=NpEncoder)
with open('step2_xhliu.json', 'w') as output_json_file2:
    json.dump(coco_output2, output_json_file2, cls=NpEncoder)  
print('it is ok!')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

骨子带刺

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值