这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libs/.docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ SEGMENT_WRITE_KEY=
STRIPE_SECRET_KEY=
# Research tool
TAVILY_API_KEY=
# Azure OpenAI embeddings
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=


###############################
Expand Down
1 change: 1 addition & 0 deletions libs/.docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ services:
- AGENTOPS_API_KEY=${AGENTOPS_API_KEY}
- AGENTOPS_ORG_KEY=${AGENTOPS_ORG_KEY}
- TAVILY_API_KEY=${TAVILY_API_KEY}
- AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=${AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT}
ports:
- 8080:8080
restart: unless-stopped
Expand Down
2 changes: 2 additions & 0 deletions libs/superagent/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,5 @@ STRIPE_SECRET_KEY=
# Tools
TAVILY_API_KEY=

# Optional for Azure Embeddings
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=
4 changes: 4 additions & 0 deletions libs/superagent/app/agents/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ async def _get_tools(
if agent_datasource.datasource.vectorDb
else None
),
# TODO: This will be removed in v0.3
# This is for the users who wants to
# use Azure both for LLM and embeddings
"embeddings_model_provider": self.agent_config.llms[0].llm.provider,
"query_type": "document",
}
if tool_type == DatasourceTool
Expand Down
1 change: 0 additions & 1 deletion libs/superagent/app/agents/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ class CustomAgentExecutor:
async def ainvoke(self, input, *_, **kwargs):
function_calling_res = {}

print("agent_config.tools", agent_config, input)
if len(agent_config.tools) > 0:
function_calling = await FunctionCalling(
enable_streaming=False,
Expand Down
10 changes: 5 additions & 5 deletions libs/superagent/app/api/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ async def get_llm_or_raise(data: LLMPayload) -> LLM:
where={"provider": provider, "apiUserId": data.user_id}
)

# if not llm:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail="Please set an LLM first",
# )
if not llm:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Please set an LLM first",
)

return llm

Expand Down
6 changes: 5 additions & 1 deletion libs/superagent/app/api/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from app.datasource.flow import delete_datasource, vectorize_datasource
from app.models.request import Datasource as DatasourceRequest
from app.models.request import EmbeddingsModelProvider
from app.models.response import (
Datasource as DatasourceResponse,
)
Expand Down Expand Up @@ -58,21 +59,23 @@ async def create(
data = await prisma.datasource.create(
{
"apiUserId": api_user.id,
**body.dict(),
**body.dict(exclude={"embeddingsModelProvider"}),
}
)

async def run_vectorize_flow(
datasource: Datasource,
options: Optional[dict],
vector_db_provider: Optional[str],
embeddings_model_provider: EmbeddingsModelProvider,
):
try:
await vectorize_datasource(
datasource=datasource,
# vector db configurations (api key, index name etc.)
options=options,
vector_db_provider=vector_db_provider,
embeddings_model_provider=embeddings_model_provider,
)
except Exception as flow_exception:
await prisma.datasource.update(
Expand All @@ -88,6 +91,7 @@ async def run_vectorize_flow(
vector_db_provider=(
vector_db.provider if vector_db is not None else None
),
embeddings_model_provider=body.embeddingsModelProvider,
)
)
return {"success": True, "data": data}
Expand Down
16 changes: 13 additions & 3 deletions libs/superagent/app/datasource/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from app.datasource.loader import DataLoader
from app.datasource.types import VALID_UNSTRUCTURED_DATA_TYPES
from app.models.request import EmbeddingsModelProvider
from app.utils.prisma import prisma
from app.vectorstores.base import VectorStoreMain
from prisma.enums import DatasourceStatus
Expand Down Expand Up @@ -39,12 +40,17 @@ async def handle_datasources(

@task
async def vectorize(
datasource: Datasource, options: Optional[dict], vector_db_provider: Optional[str]
datasource: Datasource,
options: Optional[dict],
vector_db_provider: Optional[str],
embeddings_model_provider: EmbeddingsModelProvider,
) -> None:
data = DataLoader(datasource=datasource).load()

vector_store = VectorStoreMain(
options=options, vector_db_provider=vector_db_provider
options=options,
vector_db_provider=vector_db_provider,
embeddings_model_provider=embeddings_model_provider,
)
vector_store.embed_documents(documents=data, datasource_id=datasource.id)

Expand Down Expand Up @@ -76,13 +82,17 @@ async def process_datasource(datasource_id: str, agent_id: str):
retries=0,
)
async def vectorize_datasource(
datasource: Datasource, options: Optional[dict], vector_db_provider: Optional[str]
datasource: Datasource,
options: Optional[dict],
vector_db_provider: Optional[str],
embeddings_model_provider: EmbeddingsModelProvider,
) -> None:
if datasource.type in VALID_UNSTRUCTURED_DATA_TYPES:
await vectorize(
datasource=datasource,
options=options,
vector_db_provider=vector_db_provider,
embeddings_model_provider=embeddings_model_provider,
)
await prisma.datasource.update(
where={"id": datasource.id}, data={"status": DatasourceStatus.DONE}
Expand Down
9 changes: 9 additions & 0 deletions libs/superagent/app/models/request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from enum import Enum
from typing import Any, Dict, List, Optional

from openai.types.beta.assistant_create_params import Tool as OpenAiAssistantTool
Expand Down Expand Up @@ -71,6 +72,11 @@ class AgentInvoke(BaseModel):
llm_params: Optional[LLMParams]


class EmbeddingsModelProvider(str, Enum):
OPENAI = "OPENAI"
AZURE_OPENAI = "AZURE_OPENAI"


class Datasource(BaseModel):
name: str
description: Optional[str]
Expand All @@ -79,6 +85,9 @@ class Datasource(BaseModel):
url: Optional[str]
metadata: Optional[Dict[Any, Any]]
vectorDbId: Optional[str]
embeddingsModelProvider: Optional[
EmbeddingsModelProvider
] = EmbeddingsModelProvider.OPENAI


class DatasourceUpdate(BaseModel):
Expand Down
2 changes: 2 additions & 0 deletions libs/superagent/app/tools/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def _run(
vector_store = VectorStoreMain(
options=self.metadata["options"],
vector_db_provider=self.metadata["provider"],
embeddings_model_provider=self.metadata["embeddings_model_provider"],
)
result = vector_store.query_documents(
prompt=question,
Expand All @@ -94,6 +95,7 @@ async def _arun(
vector_store = VectorStoreMain(
options=self.metadata["options"],
vector_db_provider=self.metadata["provider"],
embeddings_model_provider=self.metadata["embeddings_model_provider"],
)
result = vector_store.query_documents(
prompt=question,
Expand Down
10 changes: 5 additions & 5 deletions libs/superagent/app/vectorstores/astra.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import logging
import os
import uuid
from typing import List, Literal, Optional

import backoff
from decouple import config
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings # type: ignore
from pydantic.dataclasses import dataclass

from app.models.request import EmbeddingsModelProvider
from app.utils.helpers import get_first_non_null
from app.vectorstores.abstract import VectorStoreBase
from app.vectorstores.astra_client import AstraClient, QueryResponse
from app.vectorstores.embeddings import get_embeddings_model_provider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,6 +40,7 @@ class AstraVectorStore(VectorStoreBase):
def __init__(
self,
options: dict,
embeddings_model_provider: EmbeddingsModelProvider,
astra_id: str = None,
astra_region: str = None,
astra_application_token: str = None,
Expand Down Expand Up @@ -91,9 +92,8 @@ def __init__(
variables["ASTRA_DB_COLLECTION_NAME"],
)

self.embeddings = OpenAIEmbeddings(
model="text-embedding-3-small",
openai_api_key=os.getenv("OPENAI_API_KEY", ""),
self.embeddings = get_embeddings_model_provider(
embeddings_model_provider=embeddings_model_provider
)

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
Expand Down
13 changes: 11 additions & 2 deletions libs/superagent/app/vectorstores/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from decouple import config
from langchain.docstore.document import Document

from app.models.request import EmbeddingsModelProvider
from app.utils.helpers import get_first_non_null
from app.vectorstores.abstract import VectorStoreBase
from app.vectorstores.astra import AstraVectorStore
Expand All @@ -28,7 +29,12 @@


class VectorStoreMain(VectorStoreBase):
def __init__(self, options: Optional[dict], vector_db_provider: Optional[str]):
def __init__(
self,
options: Optional[dict],
vector_db_provider: Optional[str],
embeddings_model_provider: EmbeddingsModelProvider,
):
"""
Determine the vectorstore
"""
Expand All @@ -41,6 +47,7 @@ def __init__(self, options: Optional[dict], vector_db_provider: Optional[str]):
VECTOR_DB_MAPPING.get(config("VECTORSTORE", None)),
VectorDbProvider.PINECONE.value,
)
self.embeddings_model_provider = embeddings_model_provider
self.instance = self.get_database()

def get_database(self, index_name: Optional[str] = None) -> Any:
Expand Down Expand Up @@ -84,7 +91,9 @@ def get_database(self, index_name: Optional[str] = None) -> Any:
if index_name is None:
index_name = index_names.get(self.vectorstore)
return vectorstore_classes.get(self.vectorstore)(
index_name=index_name, options=self.options
index_name=index_name,
options=self.options,
embeddings_model_provider=self.embeddings_model_provider,
)

def query(
Expand Down
28 changes: 28 additions & 0 deletions libs/superagent/app/vectorstores/embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from decouple import config
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings

from app.models.request import EmbeddingsModelProvider
from app.utils.helpers import get_first_non_null


def get_embeddings_model_provider(embeddings_model_provider: EmbeddingsModelProvider):
if embeddings_model_provider == EmbeddingsModelProvider.AZURE_OPENAI:
return AzureOpenAIEmbeddings(
azure_deployment=config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"),
api_version=get_first_non_null(
config("AZURE_OPENAI_EMBEDDINGS_API_VERSION"),
config("AZURE_OPENAI_API_VERSION"),
),
api_key=get_first_non_null(
config("AZURE_OPENAI_EMBEDDINGS_API_KEY"),
config("AZURE_OPENAI_API_KEY"),
),
azure_endpoint=get_first_non_null(
config("AZURE_OPENAI_EMBEDDINGS_ENDPOINT"),
config("AZURE_OPENAI_ENDPOINT"),
),
)
else:
return OpenAIEmbeddings(
model="text-embedding-3-small", openai_api_key=config("OPENAI_API_KEY")
)
8 changes: 4 additions & 4 deletions libs/superagent/app/vectorstores/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import pinecone
from decouple import config
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings # type: ignore
from pinecone.core.client.models import QueryResponse
from pydantic.dataclasses import dataclass

from app.models.request import EmbeddingsModelProvider
from app.utils.helpers import get_first_non_null
from app.vectorstores.abstract import VectorStoreBase
from app.vectorstores.embeddings import get_embeddings_model_provider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,6 +41,7 @@ class PineconeVectorStore(VectorStoreBase):
def __init__(
self,
options: dict,
embeddings_model_provider: EmbeddingsModelProvider,
index_name: str = None,
environment: str = None,
pinecone_api_key: str = None,
Expand Down Expand Up @@ -82,9 +84,7 @@ def __init__(
self.index_name = variables["PINECONE_INDEX"]
logger.info(f"Index name: {self.index_name}")
self.index = pinecone.Index(self.index_name)
self.embeddings = OpenAIEmbeddings(
model="text-embedding-3-small", openai_api_key=config("OPENAI_API_KEY")
) # type: ignore
self.embeddings = get_embeddings_model_provider(embeddings_model_provider)

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def _embed_with_retry(self, texts):
Expand Down
Loading