Train day-by-day models

#all_no_test
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

source

load_cfg

 load_cfg (path_cfg)
cfg = load_cfg('final_cfg.json')
cfg
{'start_test': 1942,
 'start_train': 140,
 'days_to_predict': 'all',
 'fobj': None,
 'fobj_weight_col': 'total_scaled_weight',
 'weight_hess': 1,
 'feval': 'mse',
 'feval_weight_col': 'scale',
 'weight_col': None,
 'lgb_params': {'boosting_type': 'gbdt',
  'objective': 'regression',
  'metric': None,
  'subsample': 0.5,
  'subsample_freq': 1,
  'learning_rate': 0.03,
  'num_leaves': 255,
  'min_data_in_leaf': 255,
  'feature_fraction': 0.8,
  'n_estimators': 5000,
  'early_stopping_rounds': 50,
  'device_type': 'cpu',
  'seed': 42,
  'verbose': -1},
 'target': 'sales',
 'p_horizon': 28,
 'num_series': 30490,
 'features_json': 'final_features.json',
 'path_data_raw': '../../../data/raw',
 'path_features': '../../../data/features',
 'path_models': '../../../data/models',
 'use_neptune': 0,
 'neptune_project': None,
 'neptune_api_token': None}
FINAL_CFG
{'start_test': 1942,
 'start_train': 140,
 'days_to_predict': 'all',
 'fobj': 'mse',
 'fobj_weight_col': 'total_scaled_weight',
 'weight_hess': 1,
 'feval': 'mse',
 'feval_weight_col': 'scale',
 'weight_col': None,
 'lgb_params': {'boosting_type': 'gbdt',
  'objective': None,
  'metric': None,
  'subsample': 0.5,
  'subsample_freq': 1,
  'learning_rate': 0.03,
  'num_leaves': 255,
  'min_data_in_leaf': 255,
  'feature_fraction': 0.8,
  'n_estimators': 1,
  'early_stopping_rounds': 50,
  'device_type': 'cpu',
  'seed': 42,
  'verbose': -1},
 'target': 'sales',
 'p_horizon': 28,
 'num_series': 30490,
 'features_json': 'pkl_final_features.json',
 'path_data_raw': 'data/raw',
 'path_features': 'data/features',
 'path_models': 'data/models',
 'use_neptune': 0,
 'neptune_project': 0,
 'neptune_api_token': None}
DICT_FEATURES
{'fe_base.csv': ['dept_id', 'store_id'],
 'fe_cal.csv': ['event_name_1', 'tm_d', 'tm_w', 'tm_m', 'tm_dw', 'tm_w_end'],
 'fe_price.csv': ['sell_price',
  'price_min',
  'price_max',
  'price_median',
  'price_mode',
  'price_mean',
  'price_std',
  'price_norm_max',
  'price_norm_mode',
  'price_norm_mean',
  'price_momentum',
  'price_roll_momentum_4',
  'price_roll_momentum_24',
  'price_end_digits'],
 'fe_snap_event.csv': ['snap_transform_1',
  'snap_transform_2',
  'next_event_type_1',
  'last_event_type_1',
  'days_since_event',
  'days_until_event'],
 'shift_fe_dow_means_and_days_since_sale.csv': ['mean_4_dow_0',
  'mean_4_dow_1',
  'mean_4_dow_2',
  'mean_4_dow_3',
  'mean_4_dow_4',
  'mean_4_dow_5',
  'mean_4_dow_6',
  'mean_20_dow_0',
  'mean_20_dow_1',
  'mean_20_dow_2',
  'mean_20_dow_3',
  'mean_20_dow_4',
  'mean_20_dow_5',
  'mean_20_dow_6',
  'days_since_sale'],
 'shift_fe_ipca_15_84.csv': ['index',
  'ipca_15_84_comp_1',
  'ipca_15_84_comp_2',
  'ipca_15_84_comp_3',
  'ipca_15_84_comp_4',
  'ipca_15_84_comp_5',
  'ipca_15_84_comp_6',
  'ipca_15_84_comp_7',
  'ipca_15_84_comp_8',
  'ipca_15_84_comp_9',
  'ipca_15_84_comp_10',
  'ipca_15_84_comp_11',
  'ipca_15_84_comp_12',
  'ipca_15_84_comp_13',
  'ipca_15_84_comp_14'],
 'shift_fe_lags_1_14.csv': ['lag_1',
  'lag_2',
  'lag_3',
  'lag_4',
  'lag_5',
  'lag_6',
  'lag_7',
  'lag_8',
  'lag_9',
  'lag_10',
  'lag_11',
  'lag_12',
  'lag_13',
  'lag_14'],
 'shift_fe_rw_1.csv': ['shift_1_rolling_nanmean_3',
  'shift_1_rolling_mean_decay_3',
  'shift_1_rolling_nanmean_7',
  'shift_1_rolling_mean_decay_7',
  'shift_1_rolling_nanstd_7'],
 'shift_fe_rw_2.csv': ['shift_1_rolling_nanmean_14',
  'shift_1_rolling_mean_decay_14',
  'shift_1_rolling_diff_nanmean_14',
  'shift_1_rolling_nanstd_14',
  'shift_1_rolling_nanmean_30',
  'shift_1_rolling_mean_decay_30'],
 'shift_fe_rw_3.csv': ['shift_1_rolling_nanmean_60',
  'shift_1_rolling_nanmedian_60',
  'shift_1_rolling_mean_decay_60',
  'shift_1_rolling_nanstd_60',
  'shift_1_rolling_nanmean_140',
  'shift_1_rolling_mean_decay_140',
  'shift_1_rolling_nanstd_140'],
 'shift_fe_shifts_mom_1.csv': ['shift_8_rolling_nanmean_7',
  'momentum_7_rolling_nanmean_7',
  'shift_8_rolling_mean_decay_7',
  'momentum_7_rolling_mean_decay_7',
  'momentum_7_rolling_diff_nanmean_7',
  'shift_29_rolling_nanmean_7',
  'momentum_28_rolling_nanmean_7',
  'shift_29_rolling_mean_decay_7',
  'momentum_28_rolling_mean_decay_7',
  'shift_29_rolling_diff_nanmean_7',
  'momentum_28_rolling_diff_nanmean_7'],
 'shift_fe_shifts_mom_2.csv': ['shift_8_rolling_nanmean_30',
  'momentum_7_rolling_nanmean_30',
  'shift_8_rolling_mean_decay_30',
  'shift_29_rolling_nanmean_30',
  'momentum_28_rolling_nanmean_30',
  'shift_29_rolling_mean_decay_30'],
 'shift_fe_shifts_mom_3.csv': ['shift_29_rolling_nanmean_60',
  'shift_91_rolling_nanmean_60',
  'shift_91_rolling_mean_decay_60']}

source

prep_data

 prep_data (cfg)

source

neptune

 neptune (cfg)

Not implemented

neptune(cfg)

source

cli_lgb_daily

 cli_lgb_daily (path_cfg:str<pathtotheconfigurationjson>='cfg.json')

source

lgb_daily

 lgb_daily (path_cfg:str='cfg.json')

Train 1 model for each day of prediction accoring to path_cfg.