这是indexloc提供的服务,不要输入任何密码
Skip to content
This repository was archived by the owner on Aug 3, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Data files and folders
download/
training_set/
nf_prize_dataset.tar.gz
Netflix/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
31 changes: 9 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,16 @@ $ python -m unittest test/test_model.py

### Get the data

**Note: Run all these commands within your `DeepRecommender` folder**

[Netflix prize](http://netflixprize.com/)

* ```$ mkdir -p ~/Recommendations``` you can use any other folder name
* Download from [here](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) to ```~/Recommendations```
* ```$ cd ~/Recommendations```
* ```$ tar -xvf nf_prize_dataset.tar.gz```
* ```$ tar -xf download/training_set.tar ```
* Create necessary folders
* Download from [here](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) into your ```DeepRecommender``` folder
```
mkdir -p Netflix/N3M_TRAIN
mkdir -p Netflix/N3M_VALID
mkdir -p Netflix/N3M_TEST
mkdir -p Netflix/N6M_TRAIN
mkdir -p Netflix/N6M_VALID
mkdir -p Netflix/N6M_TEST
mkdir -p Netflix/N1Y_TRAIN
mkdir -p Netflix/N1Y_VALID
mkdir -p Netflix/N1Y_TEST
mkdir -p Netflix/NF_TRAIN
mkdir -p Netflix/NF_VALID
mkdir -p Netflix/NF_TEST
$ tar -xvf nf_prize_dataset.tar.gz
$ tar -xf download/training_set.tar
$ python ./data_utils/netflix_data_convert.py training_set Netflix
```
* ```$ python ~/repos/DeepRecoEncoders/data_utils/netflix_data_convert.py training_set Netflix```. Here ```~/repos/DeepRecoEncoders''' is a path to this repo.

#### Data stats
| Dataset | Netflix 3 months | Netflix 6 months | Netflix 1 year | Netflix full |
Expand All @@ -62,7 +49,7 @@ mkdir -p Netflix/NF_TEST
### Train the model
In this example, the model will be trained for 12 epochs. In paper we train for 102.
```
python ~/repos/DeepRecoEncoders/run.py --gpu_ids 0 \
python run.py --gpu_ids 0 \
--path_to_train_data Netflix/NF_TRAIN \
--path_to_eval_data Netflix/NF_VALID \
--hidden_layers 512,512,1024 \
Expand All @@ -86,7 +73,7 @@ $ tensorboard --logdir=model_save

### Run inference on the Test set
```
python ~/repos/DeepRecoEncoders/infer.py \
python infer.py \
--path_to_train_data Netflix/NF_TRAIN \
--path_to_eval_data Netflix/NF_TEST \
--hidden_layers 512,512,1024 \
Expand All @@ -98,7 +85,7 @@ python ~/repos/DeepRecoEncoders/infer.py \

### Compute Test RMSE
```
python ~/repos/DeepRecoEncoders/compute_RMSE.py --path_to_predictions=preds.txt
python compute_RMSE.py --path_to_predictions=preds.txt
```
After 12 epochs you should get RMSE around 0.927. Train longer to get below 0.92

Expand Down
9 changes: 8 additions & 1 deletion data_utils/netflix_data_convert.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2017 NVIDIA Corporation
from os import listdir, path
from os import listdir, path, makedirs
import random
import sys
import time
Expand Down Expand Up @@ -75,6 +75,13 @@ def create_NETFLIX_data_timesplit(all_data,


def main(args):
# create necessary folders:
for output_dir in [
"Netflix/N3M_TRAIN", "Netflix/N3M_VALID", "Netflix/N3M_TEST", "Netflix/N6M_TRAIN",
"Netflix/N6M_VALID", "Netflix/N6M_TEST", "Netflix/N1Y_TRAIN", "Netflix/N1Y_VALID",
"Netflix/N1Y_TEST", "Netflix/NF_TRAIN", "Netflix/NF_VALID", "Netflix/NF_TEST"]:
makedirs(output_dir, exist_ok=True)

user2id_map = dict()
item2id_map = dict()
userId = 0
Expand Down
5 changes: 0 additions & 5 deletions infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@
import argparse
from reco_encoder.data import input_layer
from reco_encoder.model import model
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable
import copy
import time
from pathlib import Path
import numpy as np

parser = argparse.ArgumentParser(description='RecoEncoder')

Expand Down Expand Up @@ -92,4 +88,3 @@ def main():
if __name__ == '__main__':
main()


2 changes: 0 additions & 2 deletions reco_encoder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
# Copyright (c) 2017 NVIDIA Corporation
from . import data
from . import model
1 change: 0 additions & 1 deletion reco_encoder/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# Copyright (c) 2017 NVIDIA Corporation
from . import input_layer
2 changes: 0 additions & 2 deletions reco_encoder/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
# Copyright (c) 2017 NVIDIA Corporation
from .model import AutoEncoder
from .model import MSEloss
2 changes: 0 additions & 2 deletions test/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,3 @@
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import reco_encoder
1 change: 0 additions & 1 deletion test/data_layer_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) 2017 NVIDIA Corporation
import unittest
import sys
from .context import reco_encoder

class UserItemRecDataProviderTest(unittest.TestCase):
Expand Down
2 changes: 0 additions & 2 deletions test/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
import sys
sys.path.append('data')
sys.path.append('model')
import torch
from .context import reco_encoder
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable

class iRecAutoEncoderTest(unittest.TestCase):
Expand Down