From 85ff2c12b31d9478eef15887fb683c31b951489e Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Wed, 22 Jan 2025 17:18:25 -0800 Subject: [PATCH 01/10] capturing metadata and fixing prompt for instruct models --- lib/sycamore/sycamore/llms/openai.py | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index f90d3e1d9..67fc31cdd 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -22,7 +22,7 @@ from sycamore.llms.prompts import SimplePrompt from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data_url - +from sycamore.data.metadata import add_metadata logger = logging.getLogger(__name__) @@ -328,7 +328,7 @@ def _get_generate_kwargs(self, prompt_kwargs: dict, llm_kwargs: Optional[dict] = kwargs.update({"messages": [{"role": "user", "content": prompt}]}) else: if isinstance(prompt, SimplePrompt): - prompt = f"{prompt.system}\n{prompt.user}" + prompt = f"{prompt.system.format(**prompt_kwargs)}\n{prompt.user.format(**prompt_kwargs)}" kwargs.update({"prompt": prompt}) elif "messages" in prompt_kwargs: kwargs.update({"messages": prompt_kwargs["messages"]}) @@ -367,10 +367,37 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: if self.is_chat_mode(): completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) logging.debug("OpenAI completion: %s", completion) + add_metadata( + **{ + "model": self._model_name, + "temperature": kwargs.get("temperature",None), + "usage": { + "completion_tokens": completion.usage.completion_tokens, + "prompt_tokens": completion.usage.prompt_tokens, + "total_tokens": completion.usage.total_tokens + }, + "prompt": kwargs.get("messages"), + "output": completion.choices[0].message.content, + "finish_reason": completion.choices[0].finish_reason + }) + + return completion.choices[0].message.content else: completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) logging.debug("OpenAI completion: %s", completion) + add_metadata( + **{ + "model": self._model_name, + "temperature": kwargs.get("temperature", None), + "usage": { + "completion_tokens": completion.usage.completion_tokens, + "prompt_tokens": completion.usage.prompt_tokens, + "total_tokens": completion.usage.total_tokens + }, + "prompt": kwargs.get("prompt"), + "output": completion.choices[0].text + }) return completion.choices[0].text def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: From 3be160992483f29cfb8b0950690ac569ea9e0cf0 Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Wed, 22 Jan 2025 17:38:10 -0800 Subject: [PATCH 02/10] lint smh --- lib/sycamore/sycamore/llms/openai.py | 96 +++++++++++++++------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 67fc31cdd..0c6db6fc3 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -328,7 +328,9 @@ def _get_generate_kwargs(self, prompt_kwargs: dict, llm_kwargs: Optional[dict] = kwargs.update({"messages": [{"role": "user", "content": prompt}]}) else: if isinstance(prompt, SimplePrompt): - prompt = f"{prompt.system.format(**prompt_kwargs)}\n{prompt.user.format(**prompt_kwargs)}" + system_msg = prompt.system.format(**prompt_kwargs) if prompt.system else "" + user_msg = prompt.user.format(**prompt_kwargs) if prompt.user else "" + prompt = f"{system_msg}\n{user_msg}" if system_msg else user_msg kwargs.update({"prompt": prompt}) elif "messages" in prompt_kwargs: kwargs.update({"messages": prompt_kwargs["messages"]}) @@ -363,42 +365,42 @@ def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) - logging.debug("OpenAI prompt: %s", kwargs) - if self.is_chat_mode(): - completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) - logging.debug("OpenAI completion: %s", completion) - add_metadata( - **{ - "model": self._model_name, - "temperature": kwargs.get("temperature",None), - "usage": { - "completion_tokens": completion.usage.completion_tokens, - "prompt_tokens": completion.usage.prompt_tokens, - "total_tokens": completion.usage.total_tokens - }, - "prompt": kwargs.get("messages"), - "output": completion.choices[0].message.content, - "finish_reason": completion.choices[0].finish_reason - }) - - - return completion.choices[0].message.content - else: - completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) - logging.debug("OpenAI completion: %s", completion) - add_metadata( - **{ - "model": self._model_name, - "temperature": kwargs.get("temperature", None), - "usage": { - "completion_tokens": completion.usage.completion_tokens, - "prompt_tokens": completion.usage.prompt_tokens, - "total_tokens": completion.usage.total_tokens - }, - "prompt": kwargs.get("prompt"), - "output": completion.choices[0].text - }) - return completion.choices[0].text + logging.debug("OpenAI request parameters: %s", kwargs) + + try: + if self.is_chat_mode(): + completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) + response_text = completion.choices[0].message.content + else: + completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) + response_text = completion.choices[0].text + metadata = self.generate_metadata(kwargs, completion, response_text) + logging.debug("OpenAI completion response: %s", completion) + add_metadata(**metadata) + + if not response_text: + raise ValueError("OpenAI returned empty response") + + return response_text + + except Exception as e: + logging.error("Error in OpenAI API call: %s", str(e)) + raise + + def generate_metadata(self, kwargs, completion, response_text): + metadata = { + "model": self._model_name, + "temperature": kwargs.get("temperature", None), + "usage": { + "completion_tokens": completion.usage.completion_tokens, + "prompt_tokens": completion.usage.prompt_tokens, + "total_tokens": completion.usage.total_tokens, + }, + "prompt": kwargs.get("prompt"), + "output": response_text, + } + + return metadata def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: @@ -407,11 +409,14 @@ def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: completion = self.client_wrapper.get_client().beta.chat.completions.parse( model=self._model_name, **kwargs ) + response_text = completion.choices[0].message.content + metadata = self.generate_metadata(kwargs, completion, response_text) + add_metadata(**metadata) else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") # completion = self.client_wrapper.get_client().beta.completions.parse(model=self._model_name, **kwargs) - assert completion.choices[0].message.content is not None, "OpenAI refused to respond to the query" - return completion.choices[0].message.content + assert response_text is not None, "OpenAI refused to respond to the query" + return response_text except Exception as e: # OpenAI will not respond in two scenarios: # 1.) The LLM ran out of output context length(usually do to hallucination of repeating the same phrase) @@ -439,12 +444,18 @@ async def _generate_awaitable_using_openai(self, prompt_kwargs, llm_kwargs) -> s completion = await self.client_wrapper.get_async_client().chat.completions.create( model=self._model_name, **kwargs ) - return completion.choices[0].message.content + response_text = completion.choices[0].message.content + metadata = self.generate_metadata(kwargs, completion, response_text) + add_metadata(**metadata) + return response_text else: completion = await self.client_wrapper.get_async_client().completions.create( model=self._model_name, **kwargs ) - return completion.choices[0].text + response_text = completion.choices[0].text + metadata = self.generate_metadata(kwargs, completion, response_text) + add_metadata(**metadata) + return response_text async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: @@ -455,9 +466,6 @@ async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_k ) else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") - # completion = await self.client_wrapper.get_async_client().beta.completions.parse( - # model=self._model_name, **kwargs - # ) assert completion.choices[0].message.content is not None, "OpenAI refused to respond to the query" return completion.choices[0].message.content except Exception as e: From 346617fa1e07e30cbc66bab0187b5160dd99beda Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Thu, 23 Jan 2025 11:44:48 -0800 Subject: [PATCH 03/10] adding time to metadata for openai --- lib/sycamore/sycamore/llms/openai.py | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 0c6db6fc3..594405758 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -6,6 +6,7 @@ from enum import Enum from PIL import Image from typing import Any, Dict, Optional, Union +from datetime import datetime from openai import AzureOpenAI as AzureOpenAIClient from openai import AsyncAzureOpenAI as AsyncAzureOpenAIClient @@ -368,13 +369,15 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: logging.debug("OpenAI request parameters: %s", kwargs) try: + starttime = datetime.now() if self.is_chat_mode(): completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) response_text = completion.choices[0].message.content else: completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) response_text = completion.choices[0].text - metadata = self.generate_metadata(kwargs, completion, response_text) + wall_latency = datetime.now() - starttime + metadata = self.generate_metadata(kwargs, completion, response_text,wall_latency) logging.debug("OpenAI completion response: %s", completion) add_metadata(**metadata) @@ -387,7 +390,7 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: logging.error("Error in OpenAI API call: %s", str(e)) raise - def generate_metadata(self, kwargs, completion, response_text): + def generate_metadata(self, kwargs, completion, response_text, wall_latency) -> dict: metadata = { "model": self._model_name, "temperature": kwargs.get("temperature", None), @@ -396,6 +399,7 @@ def generate_metadata(self, kwargs, completion, response_text): "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens, }, + "wall_latency": wall_latency, "prompt": kwargs.get("prompt"), "output": response_text, } @@ -406,11 +410,13 @@ def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) if self.is_chat_mode(): + starttime = datetime.now() completion = self.client_wrapper.get_client().beta.chat.completions.parse( model=self._model_name, **kwargs ) + wall_latency = datetime.now() - starttime response_text = completion.choices[0].message.content - metadata = self.generate_metadata(kwargs, completion, response_text) + metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) add_metadata(**metadata) else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") @@ -440,34 +446,38 @@ async def generate_async(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict async def _generate_awaitable_using_openai(self, prompt_kwargs, llm_kwargs) -> str: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) + starttime = datetime.now() if self.is_chat_mode(): completion = await self.client_wrapper.get_async_client().chat.completions.create( model=self._model_name, **kwargs ) response_text = completion.choices[0].message.content - metadata = self.generate_metadata(kwargs, completion, response_text) - add_metadata(**metadata) - return response_text else: completion = await self.client_wrapper.get_async_client().completions.create( model=self._model_name, **kwargs ) response_text = completion.choices[0].text - metadata = self.generate_metadata(kwargs, completion, response_text) - add_metadata(**metadata) - return response_text + wall_latency = datetime.now() - starttime + metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) + add_metadata(**metadata) + return response_text async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) if self.is_chat_mode(): + starttime = datetime.now() completion = await self.client_wrapper.get_async_client().beta.chat.completions.parse( model=self._model_name, **kwargs ) + wall_latency = datetime.now() - starttime else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") - assert completion.choices[0].message.content is not None, "OpenAI refused to respond to the query" - return completion.choices[0].message.content + response_text = completion.choices[0].message.content + assert response_text is not None, "OpenAI refused to respond to the query" + metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) + add_metadata(**metadata) + return response_text except Exception as e: # OpenAI will not respond in two scenarios: # 1.) The LLM ran out of output context length(usually do to hallucination of repeating the same phrase) From f4c2269de25f1bd58f3cfabe9c45f94443719136 Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Thu, 23 Jan 2025 12:59:58 -0800 Subject: [PATCH 04/10] capturing metadata from anthropic --- lib/sycamore/sycamore/llms/anthropic.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/sycamore/sycamore/llms/anthropic.py b/lib/sycamore/sycamore/llms/anthropic.py index 81bc4903d..6c34b66dd 100644 --- a/lib/sycamore/sycamore/llms/anthropic.py +++ b/lib/sycamore/sycamore/llms/anthropic.py @@ -10,7 +10,7 @@ from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data from sycamore.utils.import_utils import requires_modules - +from sycamore.data.metadata import add_metadata DEFAULT_MAX_TOKENS = 1000 @@ -140,18 +140,18 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = response = self._client.messages.create(model=self.model.value, **kwargs) wall_latency = datetime.now() - start - in_tokens = response.usage.input_tokens out_tokens = response.usage.output_tokens output = response.content[0].text + metadata = self.get_metadata(kwargs, response, output, wall_latency) ret = { "output": output, "wall_latency": wall_latency, "in_tokens": in_tokens, "out_tokens": out_tokens, } - + add_metadata(**metadata) logging.debug(f"Generated response from Anthropic model: {ret}") self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) @@ -160,3 +160,18 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str: d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs) return d["output"] + + def get_metadata(self, kwargs, response, response_text, wall_latency) -> dict: + metadata = { + "model": self._model_name, + "temperature": kwargs.get("temperature", None), + "usage": { + "completion_tokens": response.usage.input_tokens, + "prompt_tokens": response.usage.output_tokens, + "total_tokens":response.usage.input_tokens + response.usage.output_tokens, + }, + "wall_latency": wall_latency, + "prompt": kwargs.get("prompt"), + "output": response_text, + } + return metadata \ No newline at end of file From d2e6ad7aeda177d6f7a4fcb8609c0a08c8b46d8f Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Fri, 24 Jan 2025 11:47:24 -0800 Subject: [PATCH 05/10] adding openai changes --- lib/sycamore/sycamore/llms/anthropic.py | 25 ++----- lib/sycamore/sycamore/llms/bedrock.py | 23 ++++++ lib/sycamore/sycamore/llms/llms.py | 15 ++++ lib/sycamore/sycamore/llms/openai.py | 98 ++++++++++++++++++------- 4 files changed, 117 insertions(+), 44 deletions(-) diff --git a/lib/sycamore/sycamore/llms/anthropic.py b/lib/sycamore/sycamore/llms/anthropic.py index 6c34b66dd..b02f0a4b8 100644 --- a/lib/sycamore/sycamore/llms/anthropic.py +++ b/lib/sycamore/sycamore/llms/anthropic.py @@ -10,7 +10,9 @@ from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data from sycamore.utils.import_utils import requires_modules -from sycamore.data.metadata import add_metadata +from sycamore.data.metadata import add_metadata +from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT + DEFAULT_MAX_TOKENS = 1000 @@ -144,14 +146,16 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = out_tokens = response.usage.output_tokens output = response.content[0].text - metadata = self.get_metadata(kwargs, response, output, wall_latency) ret = { "output": output, "wall_latency": wall_latency, "in_tokens": in_tokens, "out_tokens": out_tokens, } - add_metadata(**metadata) + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) + add_metadata(**metadata) logging.debug(f"Generated response from Anthropic model: {ret}") self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) @@ -160,18 +164,3 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str: d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs) return d["output"] - - def get_metadata(self, kwargs, response, response_text, wall_latency) -> dict: - metadata = { - "model": self._model_name, - "temperature": kwargs.get("temperature", None), - "usage": { - "completion_tokens": response.usage.input_tokens, - "prompt_tokens": response.usage.output_tokens, - "total_tokens":response.usage.input_tokens + response.usage.output_tokens, - }, - "wall_latency": wall_latency, - "prompt": kwargs.get("prompt"), - "output": response_text, - } - return metadata \ No newline at end of file diff --git a/lib/sycamore/sycamore/llms/bedrock.py b/lib/sycamore/sycamore/llms/bedrock.py index a7d115540..47cdcb2e0 100644 --- a/lib/sycamore/sycamore/llms/bedrock.py +++ b/lib/sycamore/sycamore/llms/bedrock.py @@ -10,6 +10,8 @@ from sycamore.llms.llms import LLM from sycamore.llms.anthropic import format_image, get_generate_kwargs from sycamore.utils.cache import Cache +from sycamore.data.metadata import add_metadata +from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT DEFAULT_MAX_TOKENS = 1000 DEFAULT_ANTHROPIC_VERSION = "bedrock-2023-05-31" @@ -114,9 +116,30 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = "in_tokens": in_tokens, "out_tokens": out_tokens, } + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) + add_metadata(**metadata) + + # add_metadata(**metadata) self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) return ret def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str: d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs) return d["output"] + + # def get_metadata(self, kwargs, response_text, in_tokens, out_tokens, output, wall_latency) -> dict: + # metadata = { + # "model": self._model_name, + # "temperature": kwargs.get("temperature", None), + # "usage": { + # "completion_tokens": in_tokens, + # "prompt_tokens": out_tokens, + # "total_tokens": in_tokens + out_tokens, + # }, + # "wall_latency": wall_latency, + # "prompt": kwargs.get("prompt"), + # "output": response_text, + # } + # return metadata diff --git a/lib/sycamore/sycamore/llms/llms.py b/lib/sycamore/sycamore/llms/llms.py index dc0541862..494512b16 100644 --- a/lib/sycamore/sycamore/llms/llms.py +++ b/lib/sycamore/sycamore/llms/llms.py @@ -91,6 +91,21 @@ def _llm_cache_set(self, prompt_kwargs: dict, llm_kwargs: Optional[dict], result }, ) + def get_metadata(self, kwargs, response_text, wall_latency, in_tokens, out_tokens) -> dict: + """Generate metadata for the LLM response.""" + return { + "model": self._model_name, + "temperature": kwargs.get("temperature", None), + "usage": { + "completion_tokens": in_tokens, + "prompt_tokens": out_tokens, + "total_tokens": in_tokens + out_tokens, + }, + "wall_latency": wall_latency, + "prompt": kwargs.get("prompt"), + "output": response_text, + } + class FakeLLM(LLM): """Useful for tests where the fake LLM needs to run in a ray function because mocks are not serializable""" diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 594405758..1436b8dec 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -24,6 +24,7 @@ from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data_url from sycamore.data.metadata import add_metadata +from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT logger = logging.getLogger(__name__) @@ -377,35 +378,34 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) response_text = completion.choices[0].text wall_latency = datetime.now() - starttime - metadata = self.generate_metadata(kwargs, completion, response_text,wall_latency) + + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 + metadata = self.get_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) logging.debug("OpenAI completion response: %s", completion) - add_metadata(**metadata) + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata( + kwargs, + response_text, + wall_latency, + completion_tokens, + prompt_tokens, + ) + add_metadata(**metadata) if not response_text: raise ValueError("OpenAI returned empty response") return response_text - except Exception as e: logging.error("Error in OpenAI API call: %s", str(e)) raise - def generate_metadata(self, kwargs, completion, response_text, wall_latency) -> dict: - metadata = { - "model": self._model_name, - "temperature": kwargs.get("temperature", None), - "usage": { - "completion_tokens": completion.usage.completion_tokens, - "prompt_tokens": completion.usage.prompt_tokens, - "total_tokens": completion.usage.total_tokens, - }, - "wall_latency": wall_latency, - "prompt": kwargs.get("prompt"), - "output": response_text, - } - - return metadata - def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) @@ -414,10 +414,24 @@ def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: completion = self.client_wrapper.get_client().beta.chat.completions.parse( model=self._model_name, **kwargs ) + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 wall_latency = datetime.now() - starttime response_text = completion.choices[0].message.content - metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) - add_metadata(**metadata) + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata( + kwargs, + response_text, + wall_latency, + completion_tokens, + prompt_tokens, + ) + add_metadata(**metadata) else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") # completion = self.client_wrapper.get_client().beta.completions.parse(model=self._model_name, **kwargs) @@ -457,9 +471,26 @@ async def _generate_awaitable_using_openai(self, prompt_kwargs, llm_kwargs) -> s model=self._model_name, **kwargs ) response_text = completion.choices[0].text - wall_latency = datetime.now() - starttime - metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) - add_metadata(**metadata) + wall_latency = datetime.now() - starttime + response_text = completion.choices[0].message.content + + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 + + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata( + kwargs, + response_text, + wall_latency, + completion_tokens, + prompt_tokens, + ) + add_metadata(**metadata) return response_text async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: @@ -475,8 +506,23 @@ async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_k raise ValueError("This method doesn't support instruct models. Please use a chat model.") response_text = completion.choices[0].message.content assert response_text is not None, "OpenAI refused to respond to the query" - metadata = self.generate_metadata(kwargs, completion, response_text, wall_latency) - add_metadata(**metadata) + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 + + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata( + kwargs, + response_text, + wall_latency, + completion_tokens, + prompt_tokens, + ) + add_metadata(**metadata) return response_text except Exception as e: # OpenAI will not respond in two scenarios: From f3e2ab163bd646ce95e28d2983f3d47341e8d07c Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Fri, 24 Jan 2025 13:40:58 -0800 Subject: [PATCH 06/10] adding openai changes --- lib/sycamore/sycamore/llms/openai.py | 72 +++++++++++++--------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 1436b8dec..62d833130 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -330,9 +330,7 @@ def _get_generate_kwargs(self, prompt_kwargs: dict, llm_kwargs: Optional[dict] = kwargs.update({"messages": [{"role": "user", "content": prompt}]}) else: if isinstance(prompt, SimplePrompt): - system_msg = prompt.system.format(**prompt_kwargs) if prompt.system else "" - user_msg = prompt.user.format(**prompt_kwargs) if prompt.user else "" - prompt = f"{system_msg}\n{user_msg}" if system_msg else user_msg + prompt = f"{prompt.system}\n{prompt.user}" kwargs.update({"prompt": prompt}) elif "messages" in prompt_kwargs: kwargs.update({"messages": prompt_kwargs["messages"]}) @@ -367,44 +365,40 @@ def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: kwargs = self._get_generate_kwargs(prompt_kwargs, llm_kwargs) - logging.debug("OpenAI request parameters: %s", kwargs) - - try: + logging.debug("OpenAI prompt: %s", kwargs) + if self.is_chat_mode(): starttime = datetime.now() - if self.is_chat_mode(): - completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) - response_text = completion.choices[0].message.content - else: - completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) - response_text = completion.choices[0].text + completion = self.client_wrapper.get_client().chat.completions.create(model=self._model_name, **kwargs) + logging.debug("OpenAI completion: %s", completion) wall_latency = datetime.now() - starttime - - if completion.usage is not None: - completion_tokens = completion.usage.completion_tokens or 0 - prompt_tokens = completion.usage.prompt_tokens or 0 - else: - completion_tokens = 0 - prompt_tokens = 0 - metadata = self.get_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) - logging.debug("OpenAI completion response: %s", completion) - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata( - kwargs, - response_text, - wall_latency, - completion_tokens, - prompt_tokens, - ) - add_metadata(**metadata) - - if not response_text: - raise ValueError("OpenAI returned empty response") - - return response_text - except Exception as e: - logging.error("Error in OpenAI API call: %s", str(e)) - raise + response_text = completion.choices[0].message.content + else: + starttime = datetime.now() + completion = self.client_wrapper.get_client().completions.create(model=self._model_name, **kwargs) + logging.debug("OpenAI completion: %s", completion) + wall_latency = datetime.now() - starttime + response_text = completion.choices[0].text + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 + metadata = self.get_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) + logging.debug("OpenAI completion response: %s", completion) + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata( + kwargs, + response_text, + wall_latency, + completion_tokens, + prompt_tokens, + ) + add_metadata(**metadata) + if not response_text: + raise ValueError("OpenAI returned empty response") + return response_text def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: try: From 6b6acfe9e1652724cfd414f62515a6d3b6e1555e Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Fri, 24 Jan 2025 13:55:05 -0800 Subject: [PATCH 07/10] removing dead code --- lib/sycamore/sycamore/llms/bedrock.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/lib/sycamore/sycamore/llms/bedrock.py b/lib/sycamore/sycamore/llms/bedrock.py index 47cdcb2e0..486ffddaa 100644 --- a/lib/sycamore/sycamore/llms/bedrock.py +++ b/lib/sycamore/sycamore/llms/bedrock.py @@ -120,26 +120,9 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = if tls.present(): metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) add_metadata(**metadata) - - # add_metadata(**metadata) self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) return ret def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str: d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs) return d["output"] - - # def get_metadata(self, kwargs, response_text, in_tokens, out_tokens, output, wall_latency) -> dict: - # metadata = { - # "model": self._model_name, - # "temperature": kwargs.get("temperature", None), - # "usage": { - # "completion_tokens": in_tokens, - # "prompt_tokens": out_tokens, - # "total_tokens": in_tokens + out_tokens, - # }, - # "wall_latency": wall_latency, - # "prompt": kwargs.get("prompt"), - # "output": response_text, - # } - # return metadata From 7dd09f38707972e2f1ac392f352d62f01a238b01 Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Mon, 27 Jan 2025 12:09:49 -0800 Subject: [PATCH 08/10] consolidating and adding unit tests --- lib/sycamore/sycamore/llms/anthropic.py | 7 +-- lib/sycamore/sycamore/llms/bedrock.py | 7 +-- lib/sycamore/sycamore/llms/llms.py | 8 +++ lib/sycamore/sycamore/llms/openai.py | 49 ++----------------- .../sycamore/tests/unit/llms/test_llms.py | 32 ++++++++++++ 5 files changed, 47 insertions(+), 56 deletions(-) diff --git a/lib/sycamore/sycamore/llms/anthropic.py b/lib/sycamore/sycamore/llms/anthropic.py index b02f0a4b8..ec400e9a0 100644 --- a/lib/sycamore/sycamore/llms/anthropic.py +++ b/lib/sycamore/sycamore/llms/anthropic.py @@ -10,8 +10,6 @@ from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data from sycamore.utils.import_utils import requires_modules -from sycamore.data.metadata import add_metadata -from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT DEFAULT_MAX_TOKENS = 1000 @@ -152,10 +150,7 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = "in_tokens": in_tokens, "out_tokens": out_tokens, } - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) - add_metadata(**metadata) + self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) logging.debug(f"Generated response from Anthropic model: {ret}") self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) diff --git a/lib/sycamore/sycamore/llms/bedrock.py b/lib/sycamore/sycamore/llms/bedrock.py index 486ffddaa..07855062f 100644 --- a/lib/sycamore/sycamore/llms/bedrock.py +++ b/lib/sycamore/sycamore/llms/bedrock.py @@ -10,8 +10,6 @@ from sycamore.llms.llms import LLM from sycamore.llms.anthropic import format_image, get_generate_kwargs from sycamore.utils.cache import Cache -from sycamore.data.metadata import add_metadata -from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT DEFAULT_MAX_TOKENS = 1000 DEFAULT_ANTHROPIC_VERSION = "bedrock-2023-05-31" @@ -116,10 +114,7 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = "in_tokens": in_tokens, "out_tokens": out_tokens, } - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) - add_metadata(**metadata) + self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) self._llm_cache_set(prompt_kwargs, llm_kwargs, ret) return ret diff --git a/lib/sycamore/sycamore/llms/llms.py b/lib/sycamore/sycamore/llms/llms.py index 494512b16..2146b07db 100644 --- a/lib/sycamore/sycamore/llms/llms.py +++ b/lib/sycamore/sycamore/llms/llms.py @@ -3,6 +3,8 @@ from PIL import Image from typing import Any, Optional from sycamore.utils.cache import Cache +from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT +from sycamore.data.metadata import add_metadata class LLM(ABC): @@ -106,6 +108,12 @@ def get_metadata(self, kwargs, response_text, wall_latency, in_tokens, out_token "output": response_text, } + def add_llm_metadata(self, kwargs, output, wall_latency, in_tokens, out_tokens): + tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) + if tls.present(): + metadata = self.get_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) + add_metadata(**metadata) + class FakeLLM(LLM): """Useful for tests where the fake LLM needs to run in a ray function because mocks are not serializable""" diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 62d833130..4f97dff80 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -23,8 +23,6 @@ from sycamore.llms.prompts import SimplePrompt from sycamore.utils.cache import Cache from sycamore.utils.image_utils import base64_data_url -from sycamore.data.metadata import add_metadata -from sycamore.utils.thread_local import ThreadLocalAccess, ADD_METADATA_TO_OUTPUT logger = logging.getLogger(__name__) @@ -384,18 +382,8 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: else: completion_tokens = 0 prompt_tokens = 0 - metadata = self.get_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) - logging.debug("OpenAI completion response: %s", completion) - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata( - kwargs, - response_text, - wall_latency, - completion_tokens, - prompt_tokens, - ) - add_metadata(**metadata) + + self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) if not response_text: raise ValueError("OpenAI returned empty response") return response_text @@ -416,16 +404,7 @@ def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: prompt_tokens = 0 wall_latency = datetime.now() - starttime response_text = completion.choices[0].message.content - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata( - kwargs, - response_text, - wall_latency, - completion_tokens, - prompt_tokens, - ) - add_metadata(**metadata) + self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) else: raise ValueError("This method doesn't support instruct models. Please use a chat model.") # completion = self.client_wrapper.get_client().beta.completions.parse(model=self._model_name, **kwargs) @@ -475,16 +454,7 @@ async def _generate_awaitable_using_openai(self, prompt_kwargs, llm_kwargs) -> s completion_tokens = 0 prompt_tokens = 0 - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata( - kwargs, - response_text, - wall_latency, - completion_tokens, - prompt_tokens, - ) - add_metadata(**metadata) + self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) return response_text async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: @@ -507,16 +477,7 @@ async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_k completion_tokens = 0 prompt_tokens = 0 - tls = ThreadLocalAccess(ADD_METADATA_TO_OUTPUT) - if tls.present(): - metadata = self.get_metadata( - kwargs, - response_text, - wall_latency, - completion_tokens, - prompt_tokens, - ) - add_metadata(**metadata) + self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) return response_text except Exception as e: # OpenAI will not respond in two scenarios: diff --git a/lib/sycamore/sycamore/tests/unit/llms/test_llms.py b/lib/sycamore/sycamore/tests/unit/llms/test_llms.py index 62cc4aed1..ab76de5f3 100644 --- a/lib/sycamore/sycamore/tests/unit/llms/test_llms.py +++ b/lib/sycamore/sycamore/tests/unit/llms/test_llms.py @@ -5,6 +5,38 @@ from sycamore.llms.llms import FakeLLM from sycamore.llms.prompts import EntityExtractorFewShotGuidancePrompt, EntityExtractorZeroShotGuidancePrompt from sycamore.utils.cache import DiskCache +import datetime +from sycamore.utils.thread_local import ThreadLocalAccess + + +def test_get_metadata(): + llm = FakeLLM() + wall_latency = datetime.timedelta(seconds=1) + metadata = llm.get_metadata({"prompt": "Hello", "temperature": 0.7}, "Test output", wall_latency, 10, 5) + assert metadata["model"] == llm._model_name + assert metadata["usage"] == { + "completion_tokens": 10, + "prompt_tokens": 5, + "total_tokens": 15, + } + assert metadata["prompt"] == "Hello" + assert metadata["output"] == "Test output" + assert metadata["temperature"] == 0.7 + assert metadata["wall_latency"] == wall_latency + + +@patch("sycamore.llms.llms.add_metadata") +def test_add_llm_metadata(mock_add_metadata): + llm = FakeLLM() + with patch.object(ThreadLocalAccess, "present", return_value=True): + llm.add_llm_metadata({}, "Test output", datetime.timedelta(seconds=0.5), 1, 2) + mock_add_metadata.assert_called_once() + + # If TLS not present, add_metadata should not be called + mock_add_metadata.reset_mock() + with patch.object(ThreadLocalAccess, "present", return_value=False): + llm.add_llm_metadata({}, "Test output", datetime.timedelta(seconds=0.5), 1, 2) + mock_add_metadata.assert_not_called() def test_openai_davinci_fallback(): From 63e72e37e4a588ce604b017e22e93497dffd4615 Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Mon, 27 Jan 2025 13:36:03 -0800 Subject: [PATCH 09/10] calculate prompt tokens --- lib/sycamore/sycamore/llms/openai.py | 33 +++++++++++----------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/lib/sycamore/sycamore/llms/openai.py b/lib/sycamore/sycamore/llms/openai.py index 4f97dff80..6e44fd35b 100644 --- a/lib/sycamore/sycamore/llms/openai.py +++ b/lib/sycamore/sycamore/llms/openai.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from enum import Enum from PIL import Image -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Tuple, Union from datetime import datetime from openai import AzureOpenAI as AzureOpenAIClient @@ -291,6 +291,15 @@ def is_chat_mode(self): def format_image(self, image: Image.Image) -> dict[str, Any]: return {"type": "image_url", "image_url": {"url": base64_data_url(image)}} + def validate_tokens(self, completion) -> Tuple[int, int]: + if completion.usage is not None: + completion_tokens = completion.usage.completion_tokens or 0 + prompt_tokens = completion.usage.prompt_tokens or 0 + else: + completion_tokens = 0 + prompt_tokens = 0 + return completion_tokens, prompt_tokens + def _convert_response_format(self, llm_kwargs: Optional[Dict]) -> Optional[Dict]: """Convert the response_format parameter to the appropriate OpenAI format.""" if llm_kwargs is None: @@ -376,13 +385,8 @@ def _generate_using_openai(self, prompt_kwargs, llm_kwargs) -> str: logging.debug("OpenAI completion: %s", completion) wall_latency = datetime.now() - starttime response_text = completion.choices[0].text - if completion.usage is not None: - completion_tokens = completion.usage.completion_tokens or 0 - prompt_tokens = completion.usage.prompt_tokens or 0 - else: - completion_tokens = 0 - prompt_tokens = 0 + completion_tokens, prompt_tokens = self.validate_tokens(completion) self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) if not response_text: raise ValueError("OpenAI returned empty response") @@ -396,12 +400,7 @@ def _generate_using_openai_structured(self, prompt_kwargs, llm_kwargs) -> str: completion = self.client_wrapper.get_client().beta.chat.completions.parse( model=self._model_name, **kwargs ) - if completion.usage is not None: - completion_tokens = completion.usage.completion_tokens or 0 - prompt_tokens = completion.usage.prompt_tokens or 0 - else: - completion_tokens = 0 - prompt_tokens = 0 + completion_tokens, prompt_tokens = self.validate_tokens(completion) wall_latency = datetime.now() - starttime response_text = completion.choices[0].message.content self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) @@ -470,13 +469,7 @@ async def _generate_awaitable_using_openai_structured(self, prompt_kwargs, llm_k raise ValueError("This method doesn't support instruct models. Please use a chat model.") response_text = completion.choices[0].message.content assert response_text is not None, "OpenAI refused to respond to the query" - if completion.usage is not None: - completion_tokens = completion.usage.completion_tokens or 0 - prompt_tokens = completion.usage.prompt_tokens or 0 - else: - completion_tokens = 0 - prompt_tokens = 0 - + completion_tokens, prompt_tokens = self.validate_tokens(completion) self.add_llm_metadata(kwargs, response_text, wall_latency, completion_tokens, prompt_tokens) return response_text except Exception as e: From cd47f7d6dc56c46c25039776525dc2a4a68f92a5 Mon Sep 17 00:00:00 2001 From: Soeb Hussain Date: Mon, 27 Jan 2025 15:21:20 -0800 Subject: [PATCH 10/10] assertions and small bug fix --- lib/sycamore/sycamore/llms/llms.py | 2 +- .../transforms/test_data_extraction.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/sycamore/sycamore/llms/llms.py b/lib/sycamore/sycamore/llms/llms.py index 2146b07db..a3e1c4743 100644 --- a/lib/sycamore/sycamore/llms/llms.py +++ b/lib/sycamore/sycamore/llms/llms.py @@ -104,7 +104,7 @@ def get_metadata(self, kwargs, response_text, wall_latency, in_tokens, out_token "total_tokens": in_tokens + out_tokens, }, "wall_latency": wall_latency, - "prompt": kwargs.get("prompt"), + "prompt": kwargs.get("prompt") or kwargs.get("messages"), "output": response_text, } diff --git a/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py b/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py index cb6e226a6..e3f415417 100644 --- a/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py +++ b/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py @@ -43,12 +43,16 @@ def test_extract_properties_from_dict_schema(llm): docs = ctx.read.document(docs) docs = docs.extract_properties(property_extractor) - taken = docs.take_all() + taken = docs.take_all(include_metadata=True) assert taken[0].properties["entity"]["name"] == "Vinayak" assert taken[0].properties["entity"]["age"] == 74 assert "Honolulu" in taken[0].properties["entity"]["from_location"] + assert len(taken) == 3 + assert taken[2].metadata["usage"]["prompt_tokens"] > 0 + assert taken[2].metadata["usage"]["completion_tokens"] > 0 + @pytest.mark.parametrize("llm", llms) def test_extract_properties_from_schema(llm): @@ -61,6 +65,7 @@ def test_extract_properties_from_schema(llm): field_type="str", description="This is the name of an entity", examples=["Mark", "Ollie", "Winston"], + default="null", ), SchemaField(name="age", field_type="int", default=999), SchemaField(name="date", field_type="str", description="Any date in the doc in YYYY-MM-DD format"), @@ -80,14 +85,20 @@ def test_extract_properties_from_schema(llm): docs = ctx.read.document(docs) docs = docs.extract_properties(property_extractor) - taken = docs.take_all() + taken = docs.take_all(include_metadata=True) assert taken[0].properties["entity"]["name"] == "Vinayak" assert taken[0].properties["entity"]["age"] == 74 assert taken[0].properties["entity"]["from_location"] == "Honolulu, HI", "Invalid location extracted or formatted" assert taken[0].properties["entity"]["date"] == "1923-02-24" - assert taken[1].properties["entity"]["name"] is None, "Default None value not being used correctly" + assert taken[1].properties["entity"]["name"] == "None" # Anthropic isn't generating valid JSON with null values. assert taken[1].properties["entity"]["age"] == 999, "Default value not being used correctly" assert taken[1].properties["entity"]["from_location"] == "New Delhi" assert taken[1].properties["entity"]["date"] == "2014-01-11" + + assert len(taken) == 5 + assert taken[3].metadata["usage"]["prompt_tokens"] > 0 + assert taken[3].metadata["usage"]["completion_tokens"] > 0 + assert taken[4].metadata["usage"]["prompt_tokens"] > 0 + assert taken[4].metadata["usage"]["completion_tokens"] > 0