import xgboost as xgb
import datetime as dt
from datetime import datetime

sep_oos= "2007-01-01"                                          # Starting point for backtest
ticks= list(data_ml['stock_id'].unique())                      # List of all asset ids
N= len(ticks)                                                  # Max number of assets
t_oos= list(returns.index[returns.index>sep_oos].values)       # Out-of-sample dates
t_as= list(returns.index.values)                               # Out-of-sample dates 
Tt= len(t_oos)                                                 # Nb of dates
nb_port = 2                                                    # Nb of portfolios/strategies
portf_weights= np.zeros(shape=(Tt, nb_port, max(ticks)+1))     # Initialize portfolio weights
portf_returns= np.zeros(shape=(Tt, nb_port))                   # Initialize portfolio returns


def weights_xgb(train_data, test_data, features):
    train_features= train_data[features]                                                                    # Indep. variable
    train_label= train_data['R12M_Usd']/ np.exp(train_data['Vol1Y_Usd'])                                    # Dep. variable ##T##
    ind = (train_label < np.quantile(train_label, 0.2))|(train_label > np.quantile(train_label, 0.8))       # Filter
    train_features= train_features.loc[ind]                                                                 # Filtered features
    train_label= train_label.loc[ind]                                                                       # Filtered label
    train_matrix=xgb.DMatrix(train_features, label=train_label)                                             # XGB format!
    params={'eta' : 0.3,                                                                                    # Learning rate
      'objective' : "reg:squarederror",                                                                     # Objective function
      'max_depth' : 4}                                                                                      # Maximum depth of trees
    fit_xgb =xgb.train(params, train_matrix,num_boost_round=80)                                             # Number of trees used
    test_features=test_data[features]                                                                       # Test sample => XGB format
    test_matrix=xgb.DMatrix(test_features)                                                                  # XGB format!
    pred = fit_xgb.predict(test_matrix)                                                                     # Single prediction  
    w_names=test_data["stock_id"]                                                                           # Stocks' list
    w = pred > np.median(pred)                                                                              # Keep only the 50% best predictions                   
    w = w / np.sum(w)                                                                                       # Best predictions, equally-weighted
    return w, w_names


def portf_compo(train_data, test_data, features, j):
    if j == 0:                                                                       # This is the benchmark
        N = len(test_data["stock_id"])                                               # Test data dictates allocation
        w = np.repeat(1/N,N)                                                         # EW portfolio
        w_names=test_data["stock_id"]                                                # Asset names
        return w, w_names               
    elif j == 1:                                                                     # This is the ML strategy.                    
        return weights_xgb(train_data, test_data, features)


m_offset = 12                                                                       # Offset in months for buffer period(label)
train_size = 5                                                                      # Size of training set in years
for t in range(len(t_oos)-1):                                                       # Stop before last date: no fwd ret.!
    print(t_oos[t])                                                                 # Just checking the date status
    ind= (data_ml['date'] < datetime.strftime(datetime.strptime(t_oos[t], "%Y-%m-%d")-dt.timedelta(m_offset*30), "%Y-%m-%d")) & (
        data_ml['date'] > datetime.strftime(datetime.strptime(t_oos[t], "%Y-%m-%d")-dt.timedelta(m_offset*30)-dt.timedelta(365 * train_size), "%Y-%m-%d"))                   
    train_data= data_ml.loc[ind,:]                                                 # Train sample
    test_data= data_ml.loc[data_ml['date'] == t_oos[t],:]                          # Test sample  
    realized_returns= test_data["R1M_Usd"]                                         # Computing returns via: 1M holding period!
    for j in range(nb_port):
        temp_weights, stocks = portf_compo(train_data, test_data, features, j)      # Weights
        portf_weights[t,j,stocks] = temp_weights                                    # Allocate weights 
        portf_returns[t,j] = np.sum(temp_weights * realized_returns)                # Compute returns


def turnover(weights, asset_returns, t_oos):
    turn = 0
    for t in range(1, len(t_oos)):
        realised_returns = asset_returns[returns.index == t_oos[t]].values
        prior_weights = weights[t-1] * (1+realised_returns)                         # Before rebalancing
        turn =turn + np.sum(np.abs(weights[t] - prior_weights/np.sum(prior_weights)))
    return turn/(len(t_oos)-1)


def perf_met(portf_returns, weights, asset_returns, t_oos):
    avg_ret = np.nanmean(portf_returns)                                             # Arithmetic mean 
    vol = np.nanstd(portf_returns, ddof=1)                                          # Volatility
    Sharpe_ratio = avg_ret / vol                                                    # Sharpe ratio
    VaR_5 = np.quantile(portf_returns, 0.05)                                        # Value-at-risk
    turn = turnover(weights, asset_returns, t_oos)                                  # using the turnover function
    met = [avg_ret, vol, Sharpe_ratio, VaR_5, turn]                                 # Aggregation of all of this
    return met


def perf_met_multi(portf_returns, weights, asset_returns, t_oos, strat_name):   
    J = weights.shape[1]                                                            # Number of strategies                                                    
    met = []                                                                        # Initialization of metrics
    for j in range(J):                                                              # One slighlty less ugly loop
        temp_met = perf_met(portf_returns[:,j], weights[:,j,:], asset_returns, t_oos)
        met.append(temp_met)
    return pd.DataFrame(met, index=strat_name, columns = ['avg_ret', 'vol', 'Sharpe_ratio', 'VaR_5', 'turn']) # Stores the name of the strat


asset_returns = data_ml[['date', 'stock_id', 'R1M_Usd']].pivot(index='date', columns='stock_id',values='R1M_Usd') 
na = list(set(np.arange(max(asset_returns.columns)+1)).difference(set(asset_returns.columns)))          # find the missing stock_id
asset_returns[na]=0                                                                                     # Adding into asset return dataframe
asset_returns = asset_returns.loc[:,sorted(asset_returns.columns)]
asset_returns.fillna(0, inplace=True)                                                                   # Zero returns for missing points
perf_met_multi(portf_returns, portf_weights, asset_returns, t_oos,strat_name = ["EW", "XGB_SR"])


g1 = pd.DataFrame([t_oos, np.cumprod(1+portf_returns[:,0]), np.cumprod(1+portf_returns[:,1])],index = ["date","benchmark","ml_based"]).T # Creating cumulated timeseries
g1.reset_index(inplace=True)                                                                                                             # Data wrangling
g1['date_month']=pd.to_datetime(g1['date']).dt.month                    # Creating a new column to select dataframe partition for secong plot (yearly performance)
g1.set_index('date',inplace=True)                                       # Setting date index for plots
g2=g1[g1['date_month']==12]                                             # Selecting pseudo end of year NAV
g2=g2.append(g1.iloc[[0]])                                              # Adding the first date of Jan 2007
g2.sort_index(inplace=True)                                             # Sorting dates 
g1[["benchmark","ml_based"]].plot(figsize=[16,6],ylabel='Cumulated dollar value')                                                        # plot evidently!
g2[["benchmark","ml_based"]].pct_change(1).plot.bar(figsize=[16,6],ylabel='Yearly performance')                                          # plot evidently!

<AxesSubplot:xlabel='date', ylabel='Yearly performance'>


def strat(data, feature, thresh, direction):
    data_tmp = data[[feature, 'date', 'R1M_Usd']].copy()                        # Data for individual feature
    data_tmp['decision'] = direction*data_tmp[feature] > direction*thresh       # Investment decision as a Boolean
    data_tmp =  data_tmp.groupby('date').apply(                                 # Date-by-date  analysis
        lambda x: np.sum(x['decision']/np.sum(x['decision']) * x['R1M_Usd']))   # Asset contribution, weight * return
    avg = np.nanmean(data_tmp)                                                  # Portfolio average return
    sd = np.nanstd(data_tmp, ddof=1)                                            # Portfolio volatility non annualised
    SR = avg / sd                                                               # Portfolio sharpe ratio
    return np.around([avg, sd, SR],4)


strat(data_ml, "Pb", 0.3, 1)                                                                # Large cap

array([0.0102, 0.0496, 0.2065])


import itertools
feature = ["Div_Yld", "Ebit_Bv", "Mkt_Cap_6M_Usd", "Mom_11M_Usd", "Pb", "Vol1Y_Usd"]
thresh = np.arange(0.2, 0.9, 0.1)                                                           # Threshold
direction = np.array([1,-1])                                                                # Decision direction


grd = []                                                                 # Empty placeholder, parameters for the grid search 
for f, t, d in itertools.product(feature,thresh,direction):              # Parameters for the grid search 
    strat_data=[]                                                        # Empty placeholder, dataframe for the function
    strat_data=pd.DataFrame(strat(data_ml,f,t,d)).T                      # Function on which to apply the grid search
    strat_data.rename(columns={0: 'avg', 1: 'sd',2:'SR'}, inplace=True)  # Change columns names
    strat_data[['feature', 'thresh', 'direction']]=f, t, d               # Feeding parameters to construc the dataframe
    grd.append(strat_data)                                               # Appending/inserting 
grd = pd.concat(grd)[['feature','thresh','direction','avg', 'sd', 'SR']] # Putting all together and reordering columns

grd[grd['direction']==-1].pivot(index='thresh',columns='feature',values='SR').plot(figsize=[16,6],ylabel='Direction = -1') # Plot!
grd[grd['direction']==1].pivot(index='thresh',columns='feature',values='SR').plot(figsize=[16,6],ylabel='Direction = 1')   # Plot!

<AxesSubplot:xlabel='thresh', ylabel='Direction = 1'>


from scipy import special as special
from scipy import stats as stats

def DSR(SR, Tt, M, g3, g4, SR_m, SR_v):                                                                         # First, we build the function
    gamma = -special.digamma(1)                                                                                 # Euler-Mascheroni constant
    SR_star = SR_m + np.sqrt(SR_v)*((1-gamma)*stats.norm.ppf(1-1/M) + gamma*stats.norm.ppf(1-1/M/np.exp(1)))    # SR*
    num = (SR-SR_star) * np.sqrt(Tt-1)                                                                          # Numerator
    den = np.sqrt(1 - g3*SR + (g4-1)/4*SR**2)                                                                   # Denominator
    return round(stats.norm.cdf(num/den),4)


M = grd.shape[0]                                                                                                # Number of strategies we tested
SR = np.max(grd['SR'])                                                                                          # The SR we want to test
SR_m = np.mean(grd['SR'])                                                                                       # Average SR across all strategies
SR_v = np.var(grd['SR'], ddof=1)                                                                                # Std dev of SR
data_tmp = data_ml[['Mkt_Cap_6M_Usd', 'date', 'R1M_Usd']].copy()                                                # feature = Mkt_Cap 
data_tmp.rename({'Mkt_Cap_6M_Usd':'feature'}, axis=1, inplace=True)
data_tmp['decision'] = data_tmp['feature'] < 0.2                                                                # Investment decision: 0.2 is the best threshold
returns_DSR = data_tmp.groupby('date').apply(                                                                   # Date-by-date  analysis
    lambda x:np.sum(x['decision']/np.sum(x['decision']) * x['R1M_Usd']))                                        # Asset contribution, weight * return
g3 = stats.skew(returns_DSR)                                                                                    # Function/method from Scipy.stats
g4 = stats.kurtosis(returns_DSR, fisher=False)                                                                  # Function/method from Scipy.stats
Tt = returns_DSR.shape[0]                                                                                       # Number of dates
DSR(SR, Tt, M, g3, g4, SR_m, SR_v)                                                                              # The sought value!

0.6657

	avg_ret	vol	Sharpe_ratio	VaR_5	turn
EW	0.009697	0.056429	0.171848	-0.077125	0.071451
XGB_SR	0.012603	0.063768	0.197635	-0.083359	0.567993

Chapter 12 Portfolio backtesting¶

12.1 Setting the protocol¶

12.2 Turning signals into portfolio weights¶

12.3 Performance metrics¶

12.3.1 Discussion¶

12.3.2 Pure performance and risk indicators¶

12.3.3 Factor-based evaluation¶

12.3.4 Risk-adjusted measures¶

12.3.5 Transaction costs and turnover¶

12.4 Common errors and issues¶

12.4.1 Forward looking data¶

12.4.2 Backtest overfitting¶

Simple safeguards¶

12.5 Implication of non-stationarity: forecasting is hard¶

12.5.1 General comments¶

12.5.2 The no free lunch theorem¶

12.6 First example: a complete backtest¶

12.7 Second example: backtest overfitting¶

12.8 Coding exercises¶

References¶