这是indexloc提供的服务,不要输入任何密码
Skip to content
This repository was archived by the owner on Aug 3, 2021. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions DeepRecommender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import run
from run import DeepRecommenderTrainBenchmark
import infer
from infer import DeepRecommenderInferenceBenchmark

class DeepRecommenderBenchmark:
def __init__(self, device="cpu", jit=False):
self.train = DeepRecommenderTrainBenchmark(device = device, jit = jit)
self.infer = DeepRecommenderInferenceBenchmark(device = device, jit = jit)

def train(self, niter=1):
self.train.train(train.args.num_epochs)

def eval(self, niter=1):
self.infer.eval(self, niter)

def timedInfer(self):
self.infer.TimedInferenceRun()

def timedTrain(self):
self.train.TimedTrainingRun()

def main():
cudaBenchMark = DeepRecommenderBenchmark(device = 'cuda', jit = False)
cudaBenchMark.timedTrain()
cudaBenchMark.timedInfer()

cpuBenchMark = DeepRecommenderBenchmark(device = 'cpu', jit = False)
cpuBenchMark.timedTrain()
cpuBenchMark.timedInfer()

if __name__ == '__main__':
main()
20 changes: 5 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,7 @@ $ python ./data_utils/netflix_data_convert.py training_set Netflix
### Train the model
In this example, the model will be trained for 12 epochs. In paper we train for 102.
```
python run.py --gpu_ids 0 \
--path_to_train_data Netflix/NF_TRAIN \
--path_to_eval_data Netflix/NF_VALID \
--hidden_layers 512,512,1024 \
--non_linearity_type selu \
--batch_size 128 \
--logdir model_save \
--drop_prob 0.8 \
--optimizer momentum \
--lr 0.005 \
--weight_decay 0 \
--aug_step 1 \
--noise_prob 0 \
--num_epochs 12 \
--summary_frequency 1000
python run.py --gpu_ids 0 --path_to_train_data Netflix/NF_TRAIN --path_to_eval_data Netflix/NF_VALID --hidden_layers 512,512,1024 --non_linearity_type selu --batch_size 128 --logdir model_save --drop_prob 0.8 --optimizer momentum --lr 0.005 --weight_decay 0 --aug_step 1 --noise_prob 0 --num_epochs 12 --summary_frequency 1000
```

Note that you can run Tensorboard in parallel
Expand All @@ -92,6 +78,10 @@ python infer.py \
--predictions_path preds.txt
```

python infer.py --path_to_train_data Netflix\NW1_TRAIN --path_to_eval_data Netflix\NW1_TEST --hidden_layers 512,512,1024 --non_linearity_type selu --save_path model_save/model.epoch_0 --drop_prob 0.8 --predictions_path preds.txt



### Compute Test RMSE
```
python compute_RMSE.py --path_to_predictions=preds.txt
Expand Down
28 changes: 25 additions & 3 deletions data_utils/netflix_data_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,11 @@ def main(args):
out_folder = args[2]
# create necessary folders:
for output_dir in [(out_folder + f) for f in [
"/N3M_TRAIN", "/N3M_VALID", "/N3M_TEST", "/N6M_TRAIN",
"/N6M_VALID", "/N6M_TEST", "/N1Y_TRAIN", "/N1Y_VALID",
"/N1Y_TEST", "/NF_TRAIN", "/NF_VALID", "/NF_TEST"]]:
"/N1W_TRAIN", "/N1W_VALID", "/N1W_TEST",
"/N3M_TRAIN", "/N3M_VALID", "/N3M_TEST",
"/N6M_TRAIN", "/N6M_VALID", "/N6M_TEST",
"/N1Y_TRAIN", "/N1Y_VALID", "/N1Y_TEST",
"/NF_TRAIN", "/NF_VALID", "/NF_TEST"]]:
makedirs(output_dir, exist_ok=True)

text_files = [path.join(folder, f)
Expand Down Expand Up @@ -118,6 +120,26 @@ def main(args):
print("STATS FOR ALL INPUT DATA")
print_stats(all_data)

# Netflix 1 week, for benchmark
(n1w_train, n1w_valid, n1w_test) = create_NETFLIX_data_timesplit(all_data,
"2005-09-01",
"2005-09-07",
"2005-09-10",
"2005-09-11")
print("Netflix 1w train")
print_stats(n1w_train)
save_data_to_file(n1w_train, out_folder+"/N1W_TRAIN/n1w.train.txt")
print("Netflix 1w valid")
print_stats(n1w_valid)
save_data_to_file(n1w_valid, out_folder + "/N1W_VALID/n1w.valid.txt")
print("Netflix 1w test")
print_stats(n1w_test)
save_data_to_file(n1w_test, out_folder + "/N1W_TEST/n1w.test.txt")

print("finished 1 week!")

quit()

# Netflix full
(nf_train, nf_valid, nf_test) = create_NETFLIX_data_timesplit(all_data,
"1999-12-01",
Expand Down
Loading