这是indexloc提供的服务,不要输入任何密码
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ Initialize the client by providing required configuration options:

```typescript
client = new ReagClient(
model: "gpt-4o-mini", // LiteLLM model name
model: "gpt-4.1-nano", // LiteLLM model name
filtration_model: "minimax", // Filtration model name
system: Optional[str] // Optional system prompt
batchSize: Optional[Number] // Optional batch size
schema: Optional[BaseModel] // Optional Pydantic schema
Expand Down
86 changes: 53 additions & 33 deletions python/src/reag/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,14 @@ class ReagClient:
def __init__(
self,
model: str = "gpt-4o-mini",
filtration_model: str = "minimax",
system: str = None,
batch_size: int = DEFAULT_BATCH_SIZE,
schema: Optional[BaseModel] = None,
model_kwargs: Optional[Dict] = None,
):
self.model = model
self.filtration_model = filtration_model
self.system = system or REAG_SYSTEM_PROMPT
self.batch_size = batch_size
self.schema = schema or ResponseSchemaMessage
Expand Down Expand Up @@ -199,46 +201,64 @@ def format_doc(doc: Document) -> str:
)
)

# Process all documents in the batch concurrently
batch_responses = await asyncio.gather(*tasks)
# Use litellm for model completion with the filtration model
filtration_response = await acompletion(
model=self.filtration_model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": prompt},
],
response_format=self.schema,
)

# Process the responses
for document, response in zip(batch, batch_responses):
message_content = response.choices[0].message.content
filtration_message_content = filtration_response.choices[
0
].message.content # Might be a JSON string

try:
if self.model.startswith("ollama/"):
content, reasoning, is_irrelevant = self._extract_think_content(message_content)
results.append(
QueryResult(
content=content,
reasoning=reasoning,
is_irrelevant=is_irrelevant,
document=document,
)
)
else:
# Ensure it's parsed as a dict
data = (
json.loads(message_content)
if isinstance(message_content, str)
else message_content
)
# Ensure it's parsed as a dict
filtration_data = (
json.loads(filtration_message_content)
if isinstance(filtration_message_content, str)
else filtration_message_content
)

if filtration_data["source"].get("is_irrelevant", True):
continue

# Use litellm for model completion with the reasoning model
reasoning_response = await acompletion(
model=self.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": prompt},
],
response_format=self.schema,
)

reasoning_message_content = reasoning_response.choices[
0
].message.content # Might be a JSON string

if data["source"].get("is_irrelevant", True):
continue
reasoning_data = (
json.loads(reasoning_message_content)
if isinstance(reasoning_message_content, str)
else reasoning_message_content
)

results.append(
QueryResult(
content=data["source"].get("content", ""),
reasoning=data["source"].get("reasoning", ""),
is_irrelevant=data["source"].get("is_irrelevant", False),
document=document,
)
results.append(
QueryResult(
content=reasoning_data["source"].get("content", ""),
reasoning=reasoning_data["source"].get("reasoning", ""),
is_irrelevant=reasoning_data["source"].get(
"is_irrelevant", False
),
document=document,
)
except json.JSONDecodeError:
print("Error: Could not parse response:", message_content)
continue
print("Error: Could not parse response:", filtration_message_content)
continue # Skip this iteration if parsing fails


return results

Expand Down
2 changes: 2 additions & 0 deletions typescript/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { openai } from "@ai-sdk/openai";
// Initialize the SDK with required options
const client = new ReagClient({
model: openai("o3-mini", { structuredOutputs: true }),
filtrationModel: openai("minimax", { structuredOutputs: true }),
// system: optional system prompt here or use the default
});

Expand Down Expand Up @@ -62,6 +63,7 @@ Initialize the client by providing required configuration options:
```typescript
const client = new ReagClient({
model: openai("o3-mini", { structuredOutputs: true }),
filtrationModel: openai("minimax", { structuredOutputs: true }),
system?: string // Optional system prompt
batchSize?: number // Optional batch size
schema?: z.ZodSchema // Optional schema
Expand Down
30 changes: 27 additions & 3 deletions typescript/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ export interface ClientOptions {
* See: https://sdk.vercel.ai/docs/foundations/providers-and-models
*/
model: LanguageModel;
/**
* The filtration model instance to use for document filtration.
* This should be an instance of a model that implements the Vercel AI SDK's LanguageModel interface.
*/
filtrationModel: LanguageModel;
/**
* The system prompt that provides context and instructions to the model.
* This string sets the behavior and capabilities of the model for all queries.
Expand Down Expand Up @@ -69,6 +74,7 @@ const DEFAULT_BATCH_SIZE = 20;
*/
export class ReagClient {
private readonly model: LanguageModel;
private readonly filtrationModel: LanguageModel;
private readonly system: string;
private readonly batchSize: number;
private readonly schema: z.ZodSchema;
Expand All @@ -79,6 +85,7 @@ export class ReagClient {
*/
constructor(options: ClientOptions) {
this.model = options.model;
this.filtrationModel = options.filtrationModel;
this.system = options.system || REAG_SYSTEM_PROMPT;
this.batchSize = options.batchSize || DEFAULT_BATCH_SIZE;
this.schema = options.schema || RESPONSE_SCHEMA;
Expand Down Expand Up @@ -171,20 +178,37 @@ export class ReagClient {
const system = `${
this.system
}\n\n# Available source\n\n${formatDoc(document)}`;
const response = await generateObject({

// Use the filtration model for document filtration
const filtrationResponse = await generateObject({
model: this.filtrationModel,
system,
prompt,
schema: this.schema,
});

const filtrationData = filtrationResponse.object;

if (filtrationData.isIrrelevant) {
return null;
}

// Use the reasoning model for generating the final answer
const reasoningResponse = await generateObject({
model: this.model,
system,
prompt,
schema: this.schema,
});

return {
response,
response: reasoningResponse,
document,
};
})
);
return batchResponses;

return batchResponses.filter((response) => response !== null);
})
);

Expand Down