import pandas as pd
import time
import os
from collections import Counter
import numpy as np
import cv2
import SimpleITK
from tqdm import tqdm
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
import matplotlib.pyplot as plt
def getFiles(path):
Filelist = []
for home, dirs, files in os.walk(path):
for file in files:
Filelist.append(os.path.join(home, file))
return Filelist
root_path = '/home/tione/notebook/taop-2021/100003/'
train_files = pd.read_csv('/home/tione/notebook/taop-2021/100003/train2_data_info.csv', usecols=['id_patient', 'id_series', 'id_area', 'mask_path', 'class(1:LBSA;2:EA;3:NA;4:CA)'])
INFO = {
"description": "Tumor Dataset",
"url": "https://github.com/waspinator/pycococreator",
"version": "0.1.0",
"year": 2017,
"contributor": "xiao",
"date_created": datetime.datetime.utcnow().isoformat(' ')
}
LICENSES = [
{
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License",
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
}
]
CATEGORIES2 = [
{
'id': 1,
'name': 'LBSA',
'supercategory': 'tumor',
},
{
'id': 2,
'name': 'NA',
'supercategory': 'tumor',
},
{
'id': 3,
'name': 'CA',
'supercategory': 'tumor',
},
{
'id': 4,
'name': 'EA',
'supercategory': 'tumor',
}
]
coco_output1 = {
"info": INFO,
"licenses": LICENSES,
"categories": CATEGORIES1,
"images": [],
"annotations": []
}
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(NpEncoder, self).default(obj)
c = Counter(train_files['id_patient'])
idx = 0
image_id = 1
segmentation_id1 = 1
segmentation_id2 = 1
for i in tqdm(c):
idx_each_patient = idx
nums_per_patient = int(c[i] / 3)
Flair_path = train_files.iloc[idx_each_patient]['id_series']
Flair_path = Flair_path.split('_')[0] + '_' + Flair_path.split('_')[1].capitalize()
CET1_path = train_files.iloc[idx_each_patient+1]['id_series']
if CET1_path.split('_')[1] == 't1ce' or CET1_path.split('_')[1] == 'T1CE':
CET1_path = CET1_path.split('_')[0] + '_CET1'
T2_path = train_files.iloc[idx_each_patient+2]['id_series']
T2_path = T2_path.split('_')[0] + '_' + T2_path.split('_')[1].upper()
if Flair_path == '0078_Flair':
CET1_path = train_files.iloc[idx_each_patient+2]['id_series']
if CET1_path.split('_')[1] == 't1ce' or CET1_path.split('_')[1] == 'T1CE':
CET1_path = CET1_path.split('_')[0] + '_CET1'
T2_path = train_files.iloc[idx_each_patient+4]['id_series']
T2_path = T2_path.split('_')[0] + '_' + T2_path.split('_')[1].upper()
dicoms_flair_path = root_path + Flair_path.split('_')[0] + '/' + Flair_path
dicoms_cet1_path = root_path + Flair_path.split('_')[0] + '/' + CET1_path
dicoms_t2_path = root_path + Flair_path.split('_')[0] + '/' + T2_path
flair_dicoms = getFiles(dicoms_flair_path)
for flair_dicom in flair_dicoms:
pic_name = os.path.split(flair_dicom)[1]
if pic_name == '0.dcm' or pic_name == '1.dcm':
continue
cet1_dicom = dicoms_cet1_path + '/' + pic_name
t2_dicom = dicoms_t2_path + '/' + pic_name
flair_ds = SimpleITK.ReadImage(flair_dicom)
flair_ds = SimpleITK.GetArrayFromImage(flair_ds)
cet1_ds = SimpleITK.ReadImage(cet1_dicom)
cet1_ds = SimpleITK.GetArrayFromImage(cet1_ds)
t2_ds = SimpleITK.ReadImage(t2_dicom)
t2_ds = SimpleITK.GetArrayFromImage(t2_ds)
flair_array = np.reshape(flair_ds, (flair_ds.shape[1], flair_ds.shape[2]))
cet1_array = np.reshape(cet1_ds, (cet1_ds.shape[1], cet1_ds.shape[2]))
t2_array = np.reshape(t2_ds, (t2_ds.shape[1], t2_ds.shape[2]))
flair_array = (flair_array - np.mean(flair_array)) / np.std(flair_array)
cet1_array = (cet1_array - np.mean(cet1_array)) / np.std(cet1_array)
t2_array = (t2_array - np.mean(t2_array)) / np.std(t2_array)
flair_array = ((flair_array - flair_array.min())/(flair_array.max()-flair_array.min()))*255
cet1_array = ((cet1_array - cet1_array.min())/(cet1_array.max()-cet1_array.min()))*255
t2_array = ((t2_array - t2_array.min())/(t2_array.max()-t2_array.min()))*255
flair_array = np.asarray(flair_array).astype(np.float32)
cet1_array = np.asarray(cet1_array).astype(np.float32)
t2_array = np.asarray(t2_array).astype(np.float32)
stacked_img = np.stack((cet1_array, flair_array,t2_array), axis=-1)
image_path = '/home/tione/notebook/img-2021-11-7-xhliu-step2/'
if not os.path.exists(image_path):
os.makedirs(image_path)
image_name = pic_name.split('.')[0] + '.png'
image_info = pycococreatortools.create_image_info(image_id, Flair_path.split('_')[0] + '_' + image_name, stacked_img.shape[:2])
coco_output1["images"].append(image_info)
coco_output2["images"].append(image_info)
h,w,_ = stacked_img.shape
out_mask = np.zeros((h, w))
for j in range(nums_per_patient):
if True:
mask_path = train_files.iloc[idx_each_patient+j*3]['mask_path']
if Flair_path == '0078_Flair':
if j == 0:
mask_path = train_files.iloc[idx_each_patient+4]['mask_path']
elif j == 1:
mask_path = train_files.iloc[idx_each_patient+5]['mask_path']
elif j == 2:
mask_path = train_files.iloc[idx_each_patient+6]['mask_path']
elif j == 3:
mask_path = train_files.iloc[idx_each_patient+9]['mask_path']
elif j == 4:
mask_path = train_files.iloc[idx_each_patient+12]['mask_path']
mask_path = root_path + mask_path
mask_ds = SimpleITK.ReadImage(mask_path)
mask_ds = SimpleITK.GetArrayFromImage(mask_ds)
mask_img = mask_ds[int(pic_name.split('.')[0])]
class_id2 = train_files.iloc[idx_each_patient+j]['class(1:LBSA;2:EA;3:NA;4:CA)']
category_info2 = {'id': class_id2, 'is_crowd': 0}
binary_mask2 = np.asarray(mask_img).astype(np.uint8)
binary_mask = np.asarray(mask_img).astype(np.int)
out_mask[binary_mask==np.ones_like(binary_mask)]=1
print(np.all(out_mask==binary_mask))
if np.all(binary_mask2 == 0):
pass
else:
if int(pic_name.split('.')[0]) == 0:
print('0')
elif int(pic_name.split('.')[0]) ==1:
print('1')
annotation_info2 = pycococreatortools.create_annotation_info(segmentation_id2, image_id, category_info2, binary_mask2, tolerance=2)
if annotation_info2 is not None:
coco_output2["annotations"].append(annotation_info2)
segmentation_id2 = segmentation_id2+ 1
annotation_info2 = None
class_id1 = 1
category_info1 = {'id': class_id, 'is_crowd': 0}
out_mask = out_mask.astype(np.uint8)
if np.all(out_mask == 0):
pass
else:
if int(pic_name.split('.')[0]) == 0:
print('0')
elif int(pic_name.split('.')[0]) ==1:
print('1')
annotation_info1 = pycococreatortools.create_annotation_info(segmentation_id1, image_id, category_info1, out_mask, tolerance=2)
if annotation_info1 is not None:
coco_output1["annotations"].append(annotation_info1)
segmentation_id1 = segmentation_id1 + 1
annotation_info = None
image_id = image_id + 1
stacked_img[out_mask!=np.ones_like(out_mask)]=0
cv2.imwrite(image_path + Flair_path.split('_')[0] + '_' + image_name, stacked_img)
idx = idx + c[i]
with open('step1_xhliu.json', 'w') as output_json_file1:
json.dump(coco_output1, output_json_file1, cls=NpEncoder)
with open('step2_xhliu.json', 'w') as output_json_file2:
json.dump(coco_output2, output_json_file2, cls=NpEncoder)
print('it is ok!')