#!/usr/bin/env python
# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sklearn.svm import SVC
from sklearn.svm import SVR as SVMR
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import SGDClassifier, ElasticNet, SGDRegressor
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.naive_bayes import GaussianNB, ComplementNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
import scipy
from WORC.classification.estimators import RankedSVM
from WORC.classification.AdvancedSampler import log_uniform, discrete_uniform
import WORC.addexceptions as ae
[docs]def construct_classifier(config):
"""Interface to create classification
Different classifications can be created using this common interface
Parameters
----------
config: dict, mandatory
Contains the required config settings. See the Github Wiki for
all available fields.
Returns:
Constructed classifier
"""
# NOTE: Function is not working anymore for regression: need
# to move param grid creation to the create_param_grid function
max_iter = config['max_iter']
if 'SVM' in config['classifiers']:
# Support Vector Machine
classifier = construct_SVM(config)
elif config['classifiers'] == 'SVR':
# Support Vector Regression
classifier = construct_SVM(config, True)
elif config['classifiers'] == 'RF':
# Random forest kernel
classifier = RandomForestClassifier(verbose=0,
class_weight='balanced',
n_estimators=config['RFn_estimators'],
min_samples_split=config['RFmin_samples_split'],
max_depth=config['RFmax_depth'],
random_state=config['random_seed'])
elif config['classifiers'] == 'RFR':
# Random forest kernel regression
classifier = RandomForestRegressor(verbose=0,
n_estimators=config['RFn_estimators'],
min_samples_split=config['RFmin_samples_split'],
max_depth=config['RFmax_depth'],
random_state=config['random_seed'])
elif config['classifiers'] == 'ElasticNet':
# Elastic Net Regression
classifier = ElasticNet(alpha=config['ElasticNet_alpha'],
l1_ratio=config['ElasticNet_l1_ratio'],
max_iter=max_iter,
random_state=config['random_seed'])
elif config['classifiers'] == 'Lasso':
# LASSO Regression
classifier = Lasso(max_iter=max_iter,
random_state=config['random_seed'])
elif config['classifiers'] == 'SGD':
# Stochastic Gradient Descent classifier
classifier = SGDClassifier(n_iter=config['max_iter'],
alpha=config['SGD_alpha'],
l1_ratio=config['SGD_l1_ratio'],
loss=config['SGD_loss'],
penalty=config['SGD_penalty'],
random_state=config['random_seed'])
elif config['classifiers'] == 'SGDR':
# Stochastic Gradient Descent regressor
classifier = SGDRegressor(n_iter=config['max_iter'],
alpha=config['SGD_alpha'],
l1_ratio=config['SGD_l1_ratio'],
loss=config['SGD_loss'],
penalty=config['SGD_penalty'],
random_state=config['random_seed'])
elif config['classifiers'] == 'LR':
# Logistic Regression
if config['LRpenalty'] == 'elasticnet' or config['LRpenalty'] == 'l1':
# saga solver required for elasticnet
if config['LR_solver'] != 'saga':
p = config['LRpenalty']
print(f"[WORC Warning] {p} penalty requires saga " +\
f"solver, got {config['LR_solver']}. Changing solver.")
config['LR_solver'] = 'saga'
classifier = LogisticRegression(max_iter=max_iter,
penalty=config['LRpenalty'],
solver=config['LR_solver'],
l1_ratio=config['LR_l1_ratio'],
C=config['LRC'],
random_state=config['random_seed'])
elif config['classifiers'] == 'GaussianNB':
# Naive Bayes classifier using Gaussian distributions
classifier = GaussianNB()
elif config['classifiers'] == 'ComplementNB':
# Complement Naive Bayes classifier
classifier = ComplementNB()
elif config['classifiers'] == 'LDA':
# Linear Discriminant Analysis
if config['LDA_solver'] == 'svd':
# Shrinkage does not work with svd solver
shrinkage = None
else:
shrinkage = config['LDA_shrinkage']
classifier = LDA(solver=config['LDA_solver'],
shrinkage=shrinkage)
elif config['classifiers'] == 'QDA':
# Linear Discriminant Analysis
classifier = QDA(reg_param=config['QDA_reg_param'])
else:
message = ('Classifier {} unknown.').format(str(config['classifiers']))
raise ae.WORCKeyError(message)
return classifier
[docs]def construct_SVM(config, regression=False):
"""
Constructs a SVM classifier
Args:
config (dict): Dictionary of the required config settings
features (pandas dataframe): A pandas dataframe containing the features
to be used for classification
Returns:
SVM/SVR classifier, parameter grid
"""
max_iter = config['max_iter']
if not regression:
clf = SVC(class_weight='balanced', probability=True, max_iter=max_iter,
random_state=config['random_seed'])
else:
clf = SVMR(max_iter=max_iter, random_state=config['random_seed'])
clf.kernel = str(config['SVMKernel'])
clf.C = config['SVMC']
clf.degree = config['SVMdegree']
clf.coef0 = config['SVMcoef0']
clf.gamma = config['SVMgamma']
# Check if we need to use a ranked SVM
if config['classifiers'] == 'RankedSVM':
clf = RankedSVM()
param_grid = {'svm': ['Poly'],
'degree': [2, 3, 4, 5],
'gamma': scipy.stats.uniform(loc=0, scale=1e-3),
'coefficient': scipy.stats.uniform(loc=0, scale=1e-2),
}
return clf
[docs]def create_param_grid(config):
''' Create a parameter grid for the WORC classifiers based on the
provided configuration. '''
# We only need parameters from the Classification part of the config
config = config['Classification']
# Create grid and put in name of classifiers and maximum iterations
param_grid = dict()
param_grid['classifiers'] = config['classifiers']
param_grid['max_iter'] = config['max_iter']
# SVM parameters
param_grid['SVMKernel'] = config['SVMKernel']
param_grid['SVMC'] = log_uniform(loc=config['SVMC'][0],
scale=config['SVMC'][1])
param_grid['SVMdegree'] = scipy.stats.uniform(loc=config['SVMdegree'][0],
scale=config['SVMdegree'][1])
param_grid['SVMcoef0'] = scipy.stats.uniform(loc=config['SVMcoef0'][0],
scale=config['SVMcoef0'][1])
param_grid['SVMgamma'] = log_uniform(loc=config['SVMgamma'][0],
scale=config['SVMgamma'][1])
# RF parameters
# RF parameters
param_grid['RFn_estimators'] =\
discrete_uniform(loc=config['RFn_estimators'][0],
scale=config['RFn_estimators'][1])
param_grid['RFmin_samples_split'] =\
discrete_uniform(loc=config['RFmin_samples_split'][0],
scale=config['RFmin_samples_split'][1])
param_grid['RFmax_depth'] =\
discrete_uniform(loc=config['RFmax_depth'][0],
scale=config['RFmax_depth'][1])
# Logistic Regression parameters
param_grid['LRpenalty'] = config['LRpenalty']
param_grid['LR_solver'] = config['LR_solver']
param_grid['LR_l1_ratio'] =\
scipy.stats.uniform(loc=config['LR_l1_ratio'][0],
scale=config['LR_l1_ratio'][1])
param_grid['LRC'] = scipy.stats.uniform(loc=config['LRC'][0],
scale=config['LRC'][1])
# LDA/QDA parameters
param_grid['LDA_solver'] = config['LDA_solver']
param_grid['LDA_shrinkage'] = log_uniform(loc=config['LDA_shrinkage'][0],
scale=config['LDA_shrinkage'][1])
param_grid['QDA_reg_param'] = log_uniform(loc=config['QDA_reg_param'][0],
scale=config['QDA_reg_param'][1])
# ElasticNet parameters
param_grid['ElasticNet_alpha'] =\
log_uniform(loc=config['ElasticNet_alpha'][0],
scale=config['ElasticNet_alpha'][1])
param_grid['ElasticNet_l1_ratio'] =\
scipy.stats.uniform(loc=config['ElasticNet_l1_ratio'][0],
scale=config['ElasticNet_l1_ratio'][1])
# SGD Regression parameters
param_grid['SGD_alpha'] =\
log_uniform(loc=config['SGD_alpha'][0],
scale=config['SGD_alpha'][1])
param_grid['SGD_l1_ratio'] =\
scipy.stats.uniform(loc=config['SGD_l1_ratio'][0],
scale=config['SGD_l1_ratio'][1])
param_grid['SGD_loss'] = config['SGD_loss']
param_grid['SGD_penalty'] = config['SGD_penalty']
# Naive Bayes parameters
param_grid['CNB_alpha'] =\
scipy.stats.uniform(loc=config['CNB_alpha'][0],
scale=config['CNB_alpha'][1])
return param_grid