NVIDIA · ericchristoffersen · May 17, 2021
diff --git a/DeepRecommender.py b/DeepRecommender.py
@@ -0,0 +1,33 @@
+import run
+from run import DeepRecommenderTrainBenchmark
+import infer
+from infer import DeepRecommenderInferenceBenchmark
+
+class DeepRecommenderBenchmark:
+  def __init__(self, device="cpu", jit=False):
+    self.train = DeepRecommenderTrainBenchmark(device = device, jit = jit)
+    self.infer = DeepRecommenderInferenceBenchmark(device = device, jit = jit)
+
+  def train(self, niter=1):
+    self.train.train(train.args.num_epochs)
+
+  def eval(self, niter=1):
+    self.infer.eval(self, niter)
+
+  def timedInfer(self):
+    self.infer.TimedInferenceRun()
+
+  def timedTrain(self):
+    self.train.TimedTrainingRun()
+
+def main():
+  cudaBenchMark = DeepRecommenderBenchmark(device = 'cuda', jit = False)
+  cudaBenchMark.timedTrain()
+  cudaBenchMark.timedInfer()
+
+  cpuBenchMark = DeepRecommenderBenchmark(device = 'cpu', jit = False)
+  cpuBenchMark.timedTrain()
+  cpuBenchMark.timedInfer()
+
+if __name__ == '__main__':
+  main()
diff --git a/README.md b/README.md
@@ -58,21 +58,7 @@ $ python ./data_utils/netflix_data_convert.py training_set Netflix
 ### Train the model
 In this example, the model will be trained for 12 epochs. In paper we train for 102.
 ```
-python run.py --gpu_ids 0 \
---path_to_train_data Netflix/NF_TRAIN \
---path_to_eval_data Netflix/NF_VALID \
---hidden_layers 512,512,1024 \
---non_linearity_type selu \
---batch_size 128 \
---logdir model_save \
---drop_prob 0.8 \
---optimizer momentum \
---lr 0.005 \
---weight_decay 0 \
---aug_step 1 \
---noise_prob 0 \
---num_epochs 12 \
---summary_frequency 1000
+python run.py --gpu_ids 0 --path_to_train_data Netflix/NF_TRAIN --path_to_eval_data Netflix/NF_VALID --hidden_layers 512,512,1024 --non_linearity_type selu --batch_size 128 --logdir model_save --drop_prob 0.8 --optimizer momentum --lr 0.005 --weight_decay 0 --aug_step 1 --noise_prob 0 --num_epochs 12 --summary_frequency 1000
 ```
 
 Note that you can run Tensorboard in parallel
@@ -92,6 +78,10 @@ python infer.py \
 --predictions_path preds.txt
 ```
 
+python infer.py --path_to_train_data Netflix\NW1_TRAIN --path_to_eval_data Netflix\NW1_TEST --hidden_layers 512,512,1024 --non_linearity_type selu --save_path model_save/model.epoch_0 --drop_prob 0.8 --predictions_path preds.txt
+
+
+
 ### Compute Test RMSE
 ```
 python compute_RMSE.py --path_to_predictions=preds.txt

diff --git a/data_utils/netflix_data_convert.py b/data_utils/netflix_data_convert.py
@@ -85,9 +85,11 @@ def main(args):
   out_folder = args[2]
   # create necessary folders:
   for output_dir in [(out_folder + f) for f in [
-    "/N3M_TRAIN", "/N3M_VALID", "/N3M_TEST", "/N6M_TRAIN",
-    "/N6M_VALID", "/N6M_TEST", "/N1Y_TRAIN", "/N1Y_VALID",
-    "/N1Y_TEST", "/NF_TRAIN", "/NF_VALID", "/NF_TEST"]]:
+    "/N1W_TRAIN", "/N1W_VALID", "/N1W_TEST",
+    "/N3M_TRAIN", "/N3M_VALID", "/N3M_TEST",
+    "/N6M_TRAIN", "/N6M_VALID", "/N6M_TEST",
+    "/N1Y_TRAIN", "/N1Y_VALID", "/N1Y_TEST",
+    "/NF_TRAIN",  "/NF_VALID",  "/NF_TEST"]]:
     makedirs(output_dir, exist_ok=True)
 
   text_files = [path.join(folder, f)
@@ -118,6 +120,26 @@ def main(args):
   print("STATS FOR ALL INPUT DATA")
   print_stats(all_data)
 
+  # Netflix 1 week, for benchmark
+  (n1w_train, n1w_valid, n1w_test) = create_NETFLIX_data_timesplit(all_data,
+                                                                   "2005-09-01",
+                                                                   "2005-09-07",
+                                                                   "2005-09-10",
+                                                                   "2005-09-11")
+  print("Netflix 1w train")
+  print_stats(n1w_train)
+  save_data_to_file(n1w_train, out_folder+"/N1W_TRAIN/n1w.train.txt")
+  print("Netflix 1w valid")
+  print_stats(n1w_valid)
+  save_data_to_file(n1w_valid, out_folder + "/N1W_VALID/n1w.valid.txt")
+  print("Netflix 1w test")
+  print_stats(n1w_test)
+  save_data_to_file(n1w_test, out_folder + "/N1W_TEST/n1w.test.txt")
+
+  print("finished 1 week!")
+
+  quit()
+
   # Netflix full
   (nf_train, nf_valid, nf_test) = create_NETFLIX_data_timesplit(all_data,
                                                                 "1999-12-01",