aryn-ai · mdwelsh · Oct 17, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -73,10 +73,6 @@ jobs:
       - name: Run more tests
         run: poetry run python sycamore/tests/manual/test_fast_sycamore_import.py
         working-directory: lib/sycamore
-      - name: Run query-ui test
-        run: poetry run bash -c 'cd ../../apps/query-ui/queryui && PYTHONPATH=. pytest .'
-        # run here because that's where the poetry venv is set up.
-        working-directory: lib/sycamore
       - name: DF-6
         run: df
 

diff --git a/apps/query-eval/queryeval/driver.py b/apps/query-eval/queryeval/driver.py
@@ -40,6 +40,7 @@ class QueryEvalDriver:
         natural_language_response: If True, return the response in natural language format. Otherwise,
             return the raw DocSet results.
         doc_limit: Limit the number of documents in each result set to this number.
+        llm: LLM model name to use.
         overwrite: If True, overwrite the results file if it already exists.
     """
 
@@ -54,6 +55,7 @@ def __init__(
         dry_run: bool = False,
         natural_language_response: bool = True,
         doc_limit: Optional[int] = None,
+        llm: Optional[str] = None,
         overwrite: bool = False,
     ):
         console.print(":moon: Sycamore Query Eval Driver starting")
@@ -76,6 +78,7 @@ def __init__(
             self.config.config.natural_language_response or natural_language_response
         )
         self.config.config.doc_limit = self.config.config.doc_limit or doc_limit
+        self.config.config.llm = self.config.config.llm or llm
         self.config.config.overwrite = self.config.config.overwrite or overwrite
 
         # Configure logging.
@@ -106,7 +109,9 @@ def __init__(
 
         # Set up Sycamore Query Client.
         self.client = SycamoreQueryClient(
-            s3_cache_path=self.config.config.llm_cache_path, cache_dir=self.config.config.query_cache_path
+            llm_cache_dir=self.config.config.llm_cache_path,
+            cache_dir=self.config.config.query_cache_path,
+            llm=self.config.config.llm,
         )
 
         # Use schema from the results file, input file, or OpenSearch, in that order.

diff --git a/apps/query-eval/queryeval/main.py b/apps/query-eval/queryeval/main.py
@@ -17,6 +17,7 @@
 from rich.console import Console
 
 
+from sycamore.llms import MODELS
 from queryeval.driver import QueryEvalDriver
 
 
@@ -33,6 +34,7 @@
 @click.option("--dry-run", help="Dry run - do not run any stages", is_flag=True)
 @click.option("--doc-limit", help="Limit number of docs in result set", type=int)
 @click.option("--overwrite", help="Overwrite existing results file", is_flag=True)
+@click.option("--llm", help="LLM model name", type=click.Choice(list(MODELS.keys())))
 @click.option(
     "--raw-output", help="Output should be a raw DocSet, rather than natural language", is_flag=True, default=False
 )
@@ -48,6 +50,7 @@ def cli(
     dry_run: bool,
     doc_limit: Optional[int],
     overwrite: bool,
+    llm: Optional[str],
     raw_output: bool,
 ):
     ctx.ensure_object(dict)
@@ -61,6 +64,7 @@ def cli(
         natural_language_response=not raw_output,
         log_file=logfile,
         doc_limit=doc_limit,
+        llm=llm,
         overwrite=overwrite,
     )
     ctx.obj["driver"] = driver

diff --git a/apps/query-eval/queryeval/types.py b/apps/query-eval/queryeval/types.py
@@ -16,6 +16,7 @@ class QueryEvalConfig(BaseModel):
     config_file: Optional[str] = None
     results_file: Optional[str] = None
     log_file: Optional[str] = None
+    llm: Optional[str] = None
     index: Optional[str] = None
     query_cache_path: Optional[str] = None
     llm_cache_path: Optional[str] = None

diff --git a/apps/query-server/queryserver/main.py b/apps/query-server/queryserver/main.py
@@ -22,7 +22,7 @@
 CACHE_PATH = os.getenv("QUERYSERVER_CACHE_PATH", os.path.join(tempfile.gettempdir(), "queryserver_cache"))
 LLM_CACHE_PATH = os.getenv("QUERYSERVER_LLM_CACHE_PATH", os.path.join(tempfile.gettempdir(), "queryserver_llm_cache"))
 
-sqclient = SycamoreQueryClient(s3_cache_path=LLM_CACHE_PATH, cache_dir=CACHE_PATH)
+sqclient = SycamoreQueryClient(llm_cache_dir=LLM_CACHE_PATH, cache_dir=CACHE_PATH)
 
 
 class Index(BaseModel):

diff --git a/apps/query-ui/queryui/Sycamore_Query.py b/apps/query-ui/queryui/Sycamore_Query.py
@@ -60,7 +60,7 @@ def show_code(code: str):
 def run_query():
     """Run the given query."""
     client = get_sycamore_query_client(
-        s3_cache_path=st.session_state.llm_cache_dir,
+        llm_cache_dir=st.session_state.llm_cache_dir,
         trace_dir=st.session_state.trace_dir,
         cache_dir=st.session_state.cache_dir,
         exec_mode=ExecMode.LOCAL if st.session_state.local_mode else ExecMode.RAY,

diff --git a/apps/query-ui/queryui/configuration.py b/apps/query-ui/queryui/configuration.py
@@ -9,27 +9,27 @@
 
 For example, you can write:
     context = sycamore.init(params={
-        "default": {"llm": OpenAI(OpenAIModels.GPT_4O.value, cache=cache_from_path(s3_cache_path))},
+        "default": {"llm": OpenAI(OpenAIModels.GPT_4O.value, cache=cache_from_path(llm_cache_dir))},
         "opensearch": {"os_client_args": get_opensearch_client_args(),
                        "text_embedder": SycamoreQueryClient.default_text_embedder()}
     })
     return SycamoreQueryClient(context=context, trace_dir=trace_dir)
 
 Note that if you include keys in your configuration, it is safer to write:
     import mycompany_configuration
-    return mycompany_configuration.get_sycamore_query_client(s3_cache_path, trace_dir)
+    return mycompany_configuration.get_sycamore_query_client(llm_cache_dir, trace_dir)
 
 We require you to edit this file so that if the arguments to get_sycamore_query_client change you
 will get a merge conflict.
 """
 
 
 def get_sycamore_query_client(
-    s3_cache_path: Optional[str] = None,
+    llm_cache_dir: Optional[str] = None,
     trace_dir: Optional[str] = None,
     cache_dir: Optional[str] = None,
     exec_mode: ExecMode = ExecMode.RAY,
 ) -> SycamoreQueryClient:
     return SycamoreQueryClient(
-        s3_cache_path=s3_cache_path, trace_dir=trace_dir, cache_dir=cache_dir, sycamore_exec_mode=exec_mode
+        llm_cache_dir=llm_cache_dir, trace_dir=trace_dir, cache_dir=cache_dir, sycamore_exec_mode=exec_mode
     )
diff --git a/apps/query-ui/queryui/pages/Chat.py b/apps/query-ui/queryui/pages/Chat.py
@@ -175,7 +175,7 @@ def query_data_source(query: str, index: str) -> Tuple[Any, Optional[Any], Optio
         return do_rag_query(query, index), None, None
     else:
         sqclient = SycamoreQueryClient(
-            s3_cache_path=st.session_state.llm_cache_dir,
+            llm_cache_dir=st.session_state.llm_cache_dir,
             trace_dir=st.session_state.trace_dir,
             cache_dir=st.session_state.cache_dir,
             sycamore_exec_mode=ExecMode.LOCAL if st.session_state.local_mode else ExecMode.RAY,

diff --git a/apps/query-ui/queryui/test_configuration.py b/apps/query-ui/queryui/test_configuration.py
diff --git a/lib/sycamore/sycamore/llms/__init__.py b/lib/sycamore/sycamore/llms/__init__.py
@@ -1,8 +1,29 @@
+from typing import Callable, Dict
+
 from sycamore.llms.llms import LLM
 from sycamore.llms.openai import OpenAI, OpenAIClientType, OpenAIModels, OpenAIClientParameters, OpenAIClientWrapper
 from sycamore.llms.bedrock import Bedrock, BedrockModels
 
+# Register the model constructors.
+MODELS: Dict[str, Callable[..., LLM]] = {}
+MODELS.update(
+    {f"openai.{model.value.name}": lambda **kwargs: OpenAI(model.value.name, **kwargs) for model in OpenAIModels}
+)
+MODELS.update(
+    {f"bedrock.{model.value.name}": lambda **kwargs: Bedrock(model.value.name, **kwargs) for model in BedrockModels}
+)
+
+
+def get_llm(model_name: str) -> Callable[..., LLM]:
+    """Returns a function that instantiates the given model."""
+    if model_name not in MODELS:
+        raise ValueError(f"Unknown model name: {model_name}")
+    return MODELS[model_name]
+
+
 __all__ = [
+    "MODELS",
+    "get_llm",
     "LLM",
     "OpenAI",
     "OpenAIClientType",

diff --git a/lib/sycamore/sycamore/llms/bedrock.py b/lib/sycamore/sycamore/llms/bedrock.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import boto3
 import json
-from typing import Dict, Optional, Union
+from typing import Dict, List, Optional, Union
 
 
 from sycamore.llms.llms import LLM
@@ -60,6 +60,23 @@ def is_chat_mode(self) -> bool:
         """Returns True if the LLM is in chat mode, False otherwise."""
         return True
 
+    def _rewrite_system_messages(self, messages: Optional[List[Dict]]) -> Optional[List[Dict]]:
+        # Anthropic models don't accept messages with "role" set to "system", and
+        # requires alternation between "user" and "assistant" roles. So, we rewrite
+        # the messages to fold all "system" messages into the "user" role.
+        if not messages:
+            return messages
+        orig_messages = messages.copy()
+        cur_system_message = ""
+        for i, message in enumerate(orig_messages):
+            if message.get("role") == "system":
+                cur_system_message += message.get("content", "")
+            else:
+                if cur_system_message:
+                    messages[i]["content"] = cur_system_message + "\n" + message.get("content", "")
+                    cur_system_message = ""
+        return [m for m in messages if m.get("role") != "system"]
+
     def _get_generate_kwargs(self, prompt_kwargs: Dict, llm_kwargs: Optional[Dict] = None) -> Dict:
         kwargs = {
             "temperature": 0,
@@ -74,6 +91,8 @@ def _get_generate_kwargs(self, prompt_kwargs: Dict, llm_kwargs: Optional[Dict] =
             kwargs.update({"messages": [{"role": "user", "content": f"{prompt}"}]})
         elif "messages" in prompt_kwargs:
             kwargs.update({"messages": prompt_kwargs["messages"]})
+            if self._model_name.startswith("anthropic."):
+                kwargs["messages"] = self._rewrite_system_messages(kwargs["messages"])
         else:
             raise ValueError("Either prompt or messages must be present in prompt_kwargs.")
         return kwargs

diff --git a/lib/sycamore/sycamore/llms/llms.py b/lib/sycamore/sycamore/llms/llms.py
@@ -26,6 +26,9 @@ async def generate_async(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict
         """Generates a response from the LLM for the given prompt and LLM parameters asynchronously."""
         raise NotImplementedError("This LLM does not support asynchronous generation.")
 
+    def __str__(self):
+        return f"{self.__class__.__name__}({self._model_name})"
+
     def _get_cache_key(self, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str:
         """Return a cache key for the given prompt and LLM parameters."""
         assert self._cache