Source code for autopandas.utils.benchmark

# Benchmark Functions

import numpy as np
from sklearn.metrics import accuracy_score, r2_score, classification_report
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
#from autosklearn.classification import AutoSklearnClassifier
#from autosklearn.regression import AutoSklearnRegressor

# TODO:
# - Train error bars
# - Test error bars
# - Score can take list of metrics, list of models

[docs]def score_error_bars(data, n=10, model=None, metric=None, method='baseline', fit=True, test=None, verbose=False):
    """ Run score method several times to compute error bars.
        The parameters are the same as score method.
        TODO: optimize computation.
        TODO: cross-val
        :return: mean and variance.
    """
    scores = []
    for _ in range(n):
        scores.append(score(data, model=model, metric=metric, method=method, fit=fit, test=test, verbose=verbose))
    mean = np.mean(scores)
    var = np.var(scores)
    return mean, var

[docs]def score(data, model=None, metric=None, method='baseline', fit=True, test=None, average='weighted', verbose=False):
    """ Benchmark, a.k.a. Utility.

        Return the metric score of a model trained and tested on data.
        If a test set is defined ('test' parameter), the model is trained on 'data' and tested on 'test'.

        :param model: Model to fit and test on data.
        :param metric: scoring function.
        :param method: 'baseline' or 'auto'. Useful only if model is None.
        :param fit: If True, fit the model.
        :param test: Test is an optional DataFrame to use as the test set.
        :param average: Method for averaging the multi-class One-vs-One metrics scheme.
        :param verbose: If True, prints model information, classification report and metric function.

        :rtype: float
        :return: Metric score of the model trained and tested on data.
    """
    if 'y' not in data.indexes:
        raise Exception('No class defined. Please use set_class method before calling score.')
    if model is None:
        # Select model
        if method == 'baseline':
            clf = RandomForestClassifier()
            reg = RandomForestRegressor()
        elif method in ['auto', 'autosklearn', 'automl', 'automatic']:
            raise Exception('autosklearn got removed from requirements. This method is currently not implemented.')
            #clf = AutoSklearnClassifier()
            #reg = AutoSklearnRegressor() # multi-ouput ??
        else:
            raise Exception('Unknown method: {}'.format(method))
        # Select task
        task = data.get_task()
        if  task == 'classification':
            model = clf
            if metric is None:
                metric = accuracy_score
        elif task == 'regression':
            model = reg
            if metric is None:
                metric = r2_score
        else:
            raise Exception('Unknown task: {}.'.format(task))
    if test is not None: # test set is defined
        X_train = data.get_data('X')
        y_train = data.get_data('y')
        X_test = test.get_data('X')
        y_test = test.get_data('y')
    else:
        if 'test' not in data.indexes:
            raise Exception('No train/test split. Please use train_test_split method before calling score.')
        else:
            X_train = data.get_data('X_train')
            y_train = data.get_data('y_train')
            X_test = data.get_data('X_test')
            y_test = data.get_data('y_test')
    # mono-class
    if y_train.shape[1] == 1:
        y_train = y_train.values.ravel()
        y_test = y_test.values.ravel()
    if fit:
        model.fit(X_train, y_train)
    # Let's go!
    ### /!\ TODO: CLEAN CODE BELOW /!\ ###
    try:
        y_pred = model.predict_proba(X_test) # SOFT
        try:
            score = metric(y_test, y_pred, average=average, multi_class='ovo') #labels=np.unique(y_pred))
        except:
            try:
                score = metric(y_test, y_pred, average=average)
            except:
                score = metric(y_test, y_pred)
    except:
        y_pred = model.predict(X_test) # HARD
        try:
            score = metric(y_test, y_pred, average=average, multi_class='ovo')
        except:
            try:
                score = metric(y_test, y_pred, average=average)
            except:
                try:
                    score = metric(y_test, y_pred)
                except:
                    labels = np.unique(y_pred)
                    score = metric(y_test, y_pred, labels=labels)
    if verbose:
        print(model)
        print(classification_report(y_test, y_pred))
        print('Metric: {}'.format(metric))
    return score