Optimizing hyperparameters with hyperopt

I sometimes use hyperopt library when I need to tune hyperparameters. It works good and can give you a better result. A simple example how to use it for classification problems:

  • Input data: X, y
  • Output: best (best parameters)
import time
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import xgboost as xgb

def objective(params):
    params['max_depth'] = int(params['max_depth'])
    params['learning_rate'] = float(params['learning_rate'])
    params['subsample'] = float(params['subsample'])
    params['colsample_bytree'] = float(params['colsample_bytree'])
    params['min_child_weight'] = float(params['min_child_weight'])
    params['objective'] = 'multi:softprob'
    params['eval_metric'] = 'merror'
    params['booster'] = 'gbtree'
    params['num_class'] = 5
    params['silent'] = 1
    params['nthread'] = 10
    params['seed'] = 2017
    
    global X, y, best
    
    RANDOM_STATE = 2
    n_folds = 18
    scores = []
    
    kf = StratifiedKFold(n_folds, random_state=RANDOM_STATE, shuffle = True)
    print('..........................')

    for i, (train_index, test_index) in enumerate(kf.split(X, y)):
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index], y[test_index]    

        xgtrain = xgb.DMatrix(X_train, label=y_train)

        X_val2 = xgb.DMatrix(X_val, label=y_val)

        watchlist = [(xgtrain, 'train'), (X_val2, 'eval')]

        model = xgb.train(params, xgtrain, 60000, watchlist, early_stopping_rounds=100, verbose_eval=False) 
        
        lg = model.predict(X_val2, ntree_limit=model.best_ntree_limit) 
        res = [np.argmax(lg[i]) for i in range(lg.shape[0])]
        scores.append(accuracy_score(y_val, res))
        print('Xgboost', scores[-1])
    
    score = np.mean(scores)
    
    print("############### Score: {0}".format(score))
    print("############### Prms: ", params)
    print('..........................')
    
    return {
        'loss': 1-score,
        'status': STATUS_OK,
        'eval_time': time.time(),
        }

xgb_space = {
        'learning_rate': hp.quniform('eta', 0.005, 0.05, 0.005),
        'max_depth': hp.quniform('max_depth', 3, 14, 1),
        'min_child_weight': hp.quniform('min_child_weight', 1, 10, 1),
        'subsample': hp.quniform('subsample', 0.5, 1, 0.05),
        'gamma': hp.quniform('gamma', 0.5, 1, 0.01),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.4, 1, 0.05),
        
}

                                  
trials = Trials()
best = fmin(objective,
    space=xgb_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials)

print (best)

 

Leave a Reply

Your email address will not be published. Required fields are marked *