Source code for autopandas.utils.benchmark

# Benchmark Functions

import numpy as np
from sklearn.metrics import accuracy_score, r2_score, classification_report
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
#from autosklearn.classification import AutoSklearnClassifier
#from autosklearn.regression import AutoSklearnRegressor

# TODO:
# - Train error bars
# - Test error bars
# - Score can take list of metrics, list of models

[docs]def score_error_bars(data, n=10, model=None, metric=None, method='baseline', fit=True, test=None, verbose=False): """ Run score method several times to compute error bars. The parameters are the same as score method. TODO: optimize computation. TODO: cross-val :return: mean and variance. """ scores = [] for _ in range(n): scores.append(score(data, model=model, metric=metric, method=method, fit=fit, test=test, verbose=verbose)) mean = np.mean(scores) var = np.var(scores) return mean, var
[docs]def score(data, model=None, metric=None, method='baseline', fit=True, test=None, average='weighted', verbose=False): """ Benchmark, a.k.a. Utility. Return the metric score of a model trained and tested on data. If a test set is defined ('test' parameter), the model is trained on 'data' and tested on 'test'. :param model: Model to fit and test on data. :param metric: scoring function. :param method: 'baseline' or 'auto'. Useful only if model is None. :param fit: If True, fit the model. :param test: Test is an optional DataFrame to use as the test set. :param average: Method for averaging the multi-class One-vs-One metrics scheme. :param verbose: If True, prints model information, classification report and metric function. :rtype: float :return: Metric score of the model trained and tested on data. """ if 'y' not in data.indexes: raise Exception('No class defined. Please use set_class method before calling score.') if model is None: # Select model if method == 'baseline': clf = RandomForestClassifier() reg = RandomForestRegressor() elif method in ['auto', 'autosklearn', 'automl', 'automatic']: raise Exception('autosklearn got removed from requirements. This method is currently not implemented.') #clf = AutoSklearnClassifier() #reg = AutoSklearnRegressor() # multi-ouput ?? else: raise Exception('Unknown method: {}'.format(method)) # Select task task = data.get_task() if task == 'classification': model = clf if metric is None: metric = accuracy_score elif task == 'regression': model = reg if metric is None: metric = r2_score else: raise Exception('Unknown task: {}.'.format(task)) if test is not None: # test set is defined X_train = data.get_data('X') y_train = data.get_data('y') X_test = test.get_data('X') y_test = test.get_data('y') else: if 'test' not in data.indexes: raise Exception('No train/test split. Please use train_test_split method before calling score.') else: X_train = data.get_data('X_train') y_train = data.get_data('y_train') X_test = data.get_data('X_test') y_test = data.get_data('y_test') # mono-class if y_train.shape[1] == 1: y_train = y_train.values.ravel() y_test = y_test.values.ravel() if fit: model.fit(X_train, y_train) # Let's go! ### /!\ TODO: CLEAN CODE BELOW /!\ ### try: y_pred = model.predict_proba(X_test) # SOFT try: score = metric(y_test, y_pred, average=average, multi_class='ovo') #labels=np.unique(y_pred)) except: try: score = metric(y_test, y_pred, average=average) except: score = metric(y_test, y_pred) except: y_pred = model.predict(X_test) # HARD try: score = metric(y_test, y_pred, average=average, multi_class='ovo') except: try: score = metric(y_test, y_pred, average=average) except: try: score = metric(y_test, y_pred) except: labels = np.unique(y_pred) score = metric(y_test, y_pred, labels=labels) if verbose: print(model) print(classification_report(y_test, y_pred)) print('Metric: {}'.format(metric)) return score