From 4aa21a62719be5bb32f17ea1e7454742365257c2 Mon Sep 17 00:00:00 2001
From: Thomas Nyberg <tomnyberg@gmail.com>
Date: Wed, 20 Sep 2017 17:32:01 +0200
Subject: [PATCH 1/4] Make LDAResults._expElogbeta() method constant

The method was being computed each time it was called, but all data used
to do the computation is constant throughout the life of the class.
---
 rosetta/text/vw_helpers.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/rosetta/text/vw_helpers.py b/rosetta/text/vw_helpers.py
index dfcb857..93f8efc 100644
--- a/rosetta/text/vw_helpers.py
+++ b/rosetta/text/vw_helpers.py
@@ -332,6 +332,9 @@ def _set_probabilities(self, topics, predictions):
         self.pr_doc = doc_sums / doc_sums.sum()
         self.pr_doc_topic = predictions / predictions.sum().sum()
 
+        lam = self._lambda_word_sums * self.pr_token_topic
+        self._constExpElogbeta = np.exp(self._dirichlet_expectation(lam + EPS))
+
     def prob_token_topic(self, token=None, topic=None, c_token=None,
                          c_topic=None):
         """
@@ -565,9 +568,7 @@ def _expElogbeta(self):
         topic-word weights.
         """
         # Get lambda, the dirichlet parameter originally returned by VW.
-        lam = self._lambda_word_sums * self.pr_token_topic
-
-        return np.exp(self._dirichlet_expectation(lam + EPS))
+        return self._constExpElogbeta
 
     def _dirichlet_expectation(self, alpha):
         """

From 63e116584d25bab24ad895684b9dddbe55c331e8 Mon Sep 17 00:00:00 2001
From: Thomas Nyberg <tomnyberg@gmail.com>
Date: Fri, 22 Sep 2017 11:06:09 +0200
Subject: [PATCH 2/4] Add missing dependencies to requirements.txt

---
 requirements.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 58c53ba..03b800e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,6 @@ pyth
 pymongo
 MySQL-python
 scipy
+unidecode
+multiprocess
+nltk

From 8f73f7063ea1b07c5001d5b0b4ceb979daa09cc9 Mon Sep 17 00:00:00 2001
From: Thomas Nyberg <tomnyberg@gmail.com>
Date: Mon, 2 Oct 2017 09:50:02 +0200
Subject: [PATCH 3/4] Pin pandas to version == 0.16.2

There have been backwards-incompatible changes in later versions of
pandas for which rosetta has not been updated. Until rosetta is updated,
the version should be pinned to reflect this.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 03b800e..6fb272a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-pandas
+pandas==0.16.2
 scikit-learn
 statsmodels
 gensim

From a574a0ea47d4410415f1686b20827f638d251dd9 Mon Sep 17 00:00:00 2001
From: Thomas Nyberg <tomnyberg@gmail.com>
Date: Mon, 2 Oct 2017 09:52:47 +0200
Subject: [PATCH 4/4] Remove TestLDAResults.test_expElogbeta()

This test was using unexposed internal class methods in a way which was
incompatible with legitimate usage. It worked before due to how the
internals of the class were desiged, but no longer due to the fact that
the LDAResults._expElogbeta() method is const.
---
 rosetta/tests/test_text.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/rosetta/tests/test_text.py b/rosetta/tests/test_text.py
index d26d470..3bf9f90 100644
--- a/rosetta/tests/test_text.py
+++ b/rosetta/tests/test_text.py
@@ -323,16 +323,6 @@ def test_dirichlet_expectation(self):
             [-0.13470677,  -13.32429878]]).T
         assert_allclose(result, benchmark, atol=1e-4)
 
-    def test_expElogbeta(self):
-        # Make sure equal to exponential of dirichlet_expectation when we
-        # pass in all ones
-        lda = self.choose_lda('lda')
-        lda._lambda_word_sums = pd.Series(
-            np.ones(lda.num_topics), index=lda.topics)
-        result = lda._expElogbeta
-        benchmark = np.exp(lda._dirichlet_expectation(lda.pr_token_topic))
-        assert_frame_equal(result, benchmark)
-
     def test_predict_1(self):
         # Use fact that w0  <--> topic_0,  w1 <--> topic_1
         lda = self.choose_lda('lda_2')