Source code for WORC.facade.helpers.processing

import pandas as pd
import os
from WORC.addexceptions import WORCKeyError

# All standard texture features accepted
texture_features = ['GLCM', 'GLDZM', 'GLRLM', 'GLSZM', 'NGLDM', 'NGTDM']


[docs]def convert_radiomix_features(input_file, output_folder): ''' Convert .xlsx from RadiomiX to WORC compatible .hdf5 format Input: -------------- input_file: .xlsx in which the feature are stored. output_folder: folder in which features are stored ''' print('Converting .xlsx from RadiomiX to WORC compatible .hdf5 format...') # Check if output folder exists: otherwise create if not os.path.exists(output_folder): os.mkdir(output_folder) # Read the input file and extract relevant fields f = pd.read_excel(input_file) pids = f.values[:, 4] segs = f.values[:, 5] features = f.values[:, 10:] # Read the feature labels, and rename them according to the group they belong to feature_labels = list(f.keys()[10:]) for i in range(0, len(feature_labels)): l = feature_labels[i] if any(l.startswith(j) for j in texture_features): # Texture feature feature_labels[i] = 'tf_' + 'RadiomiX_' + l elif any(l.startswith(j) for j in ['IH_', 'Stats_']): # Histogram feature feature_labels[i] = 'hf_' + 'RadiomiX_' + l elif l.startswith('Shape_'): # Shape feature feature_labels[i] = 'sf_' + 'RadiomiX_' + l elif l.startswith('LoG_'): # LoG feature feature_labels[i] = 'logf_' + 'RadiomiX_' + l elif l.startswith('Fractal_'): # Fractal feature feature_labels[i] = 'fracf_' + 'RadiomiX_' + l elif l.startswith('LocInt_'): # Location feature feature_labels[i] = 'locf_' + 'RadiomiX_' + l elif l.startswith('RGRD_'): # RGRD feature feature_labels[i] = 'rgrdf_' + 'RadiomiX_' + l elif l.startswith('Wavelet_'): # RGRD feature feature_labels[i] = 'waveletf_' + 'RadiomiX_' + l else: raise WORCKeyError(f'Unknown feature {l}.') # Initiate labels for pandas file panda_labels = ['feature_values', 'feature_labels'] # For each patient, convert features for i_patient in range(0, len(pids)): feature_values = features[i_patient, :].tolist() # Make an output folder per patient, remove invalid symbols. output = pids[i_patient] + segs[i_patient] output = output.replace(' ', '_') output = output.replace('(', '_') output = output.replace(')', '_') output = os.path.join(output_folder, output) # Check if output folder exists: otherwise create if not os.path.exists(output): os.mkdir(output) output = os.path.join(output, 'features.hdf5') print(f'\t Writing {output}') # Convert to pandas Series and save as hdf5 panda_data = pd.Series([feature_values, feature_labels], index=panda_labels, name='Image features' ) # Save the features to the .hdf5 file print('\t Saving image features') panda_data.to_hdf(output, 'image_features')