From a1b560072a655a894953e7fde4877f0c672ab138 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Tue, 18 Mar 2025 17:06:09 -0700 Subject: [PATCH 1/3] remove summarization from web-scraping flow block --- .../agentFlows/executors/web-scraping.js | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js index 3e9d0462af4..b2b15f04880 100644 --- a/server/utils/agentFlows/executors/web-scraping.js +++ b/server/utils/agentFlows/executors/web-scraping.js @@ -1,7 +1,4 @@ const { CollectorApi } = require("../../collectorApi"); -const { TokenManager } = require("../../helpers/tiktoken"); -const Provider = require("../../agents/aibitat/providers/ai-provider"); -const { summarizeContent } = require("../../agents/aibitat/utils/summarize"); /** * Execute a web scraping flow step @@ -11,7 +8,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize"); */ async function executeWebScraping(config, context) { const { url, captureAs = "text" } = config; - const { introspect, logger, aibitat } = context; + const { introspect, logger } = context; logger( `\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block` ); @@ -40,26 +37,7 @@ async function executeWebScraping(config, context) { throw new Error("There was no content to be collected or read."); } - const tokenCount = new TokenManager( - aibitat.defaultProvider.model - ).countFromString(content); - const contextLimit = Provider.contextLimit( - aibitat.defaultProvider.provider, - aibitat.defaultProvider.model - ); - if (tokenCount < contextLimit) return content; - - introspect( - `This page's content is way too long (${tokenCount} tokens). I will summarize it right now.` - ); - const summary = await summarizeContent({ - provider: aibitat.defaultProvider.provider, - model: aibitat.defaultProvider.model, - content, - }); - - introspect(`Successfully summarized content`); - return summary; + return content; } /** From 6040718116b2f78685768d64dba6f1870050c150 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Tue, 18 Mar 2025 17:18:49 -0700 Subject: [PATCH 2/3] add option to enable/disable summarization in web-scraping block --- .../nodes/WebScrapingNode/index.jsx | 36 ++++++++++++++++- .../agentFlows/executors/web-scraping.js | 39 +++++++++++++++++-- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx index 76655af74c8..053354e4fa0 100644 --- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx +++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx @@ -31,7 +31,9 @@ export default function WebScrapingNode({ onConfigChange({ querySelector: e.target.value })} + onChange={(e) => + onConfigChange({ ...config, querySelector: e.target.value }) + } placeholder=".article-content, #content, .main-content, etc." className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5" /> )} +
+ +
+ +
+

+ When enabled, long webpage content will be automatically summarized to + reduce token usage. Note: This may affect data quality and remove + specific details from the original content. +

+
+
+ +

+ When enabled, long webpage content will be automatically summarized to + reduce token usage. +
+
+ Note: This may affect data quality and remove specific details from + the original content. +

+
); } diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx index 053354e4fa0..f1e48b24de2 100644 --- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx +++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx @@ -1,3 +1,4 @@ +import { Info } from "@phosphor-icons/react"; import React from "react"; export default function WebScrapingNode({ @@ -71,15 +72,22 @@ export default function WebScrapingNode({ )} -
- +
+
+ + +
-

- When enabled, long webpage content will be automatically summarized to - reduce token usage. Note: This may affect data quality and remove - specific details from the original content. -

-