From a1b560072a655a894953e7fde4877f0c672ab138 Mon Sep 17 00:00:00 2001
From: shatfield4
Date: Tue, 18 Mar 2025 17:06:09 -0700
Subject: [PATCH 1/3] remove summarization from web-scraping flow block
---
.../agentFlows/executors/web-scraping.js | 26 ++-----------------
1 file changed, 2 insertions(+), 24 deletions(-)
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index 3e9d0462af4..b2b15f04880 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -1,7 +1,4 @@
const { CollectorApi } = require("../../collectorApi");
-const { TokenManager } = require("../../helpers/tiktoken");
-const Provider = require("../../agents/aibitat/providers/ai-provider");
-const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
/**
* Execute a web scraping flow step
@@ -11,7 +8,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
*/
async function executeWebScraping(config, context) {
const { url, captureAs = "text" } = config;
- const { introspect, logger, aibitat } = context;
+ const { introspect, logger } = context;
logger(
`\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`
);
@@ -40,26 +37,7 @@ async function executeWebScraping(config, context) {
throw new Error("There was no content to be collected or read.");
}
- const tokenCount = new TokenManager(
- aibitat.defaultProvider.model
- ).countFromString(content);
- const contextLimit = Provider.contextLimit(
- aibitat.defaultProvider.provider,
- aibitat.defaultProvider.model
- );
- if (tokenCount < contextLimit) return content;
-
- introspect(
- `This page's content is way too long (${tokenCount} tokens). I will summarize it right now.`
- );
- const summary = await summarizeContent({
- provider: aibitat.defaultProvider.provider,
- model: aibitat.defaultProvider.model,
- content,
- });
-
- introspect(`Successfully summarized content`);
- return summary;
+ return content;
}
/**
From 6040718116b2f78685768d64dba6f1870050c150 Mon Sep 17 00:00:00 2001
From: shatfield4
Date: Tue, 18 Mar 2025 17:18:49 -0700
Subject: [PATCH 2/3] add option to enable/disable summarization in
web-scraping block
---
.../nodes/WebScrapingNode/index.jsx | 36 ++++++++++++++++-
.../agentFlows/executors/web-scraping.js | 39 +++++++++++++++++--
2 files changed, 70 insertions(+), 5 deletions(-)
diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
index 76655af74c8..053354e4fa0 100644
--- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
@@ -31,7 +31,9 @@ export default function WebScrapingNode({
onConfigChange({ captureAs: e.target.value })}
+ onChange={(e) =>
+ onConfigChange({ ...config, captureAs: e.target.value })
+ }
className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
>
{[
@@ -60,13 +62,43 @@ export default function WebScrapingNode({
onConfigChange({ querySelector: e.target.value })}
+ onChange={(e) =>
+ onConfigChange({ ...config, querySelector: e.target.value })
+ }
placeholder=".article-content, #content, .main-content, etc."
className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
/>
)}
+
+
+ Content Summarization
+
+
+
+
+ onConfigChange({
+ ...config,
+ enableSummarization: e.target.checked,
+ })
+ }
+ className="sr-only peer"
+ aria-label="Toggle content summarization"
+ />
+
+
+
+
+ When enabled, long webpage content will be automatically summarized to
+ reduce token usage. Note: This may affect data quality and remove
+ specific details from the original content.
+
+
+
Result Variable
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index b2b15f04880..7f5515faabb 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -1,4 +1,7 @@
const { CollectorApi } = require("../../collectorApi");
+const { TokenManager } = require("../../helpers/tiktoken");
+const Provider = require("../../agents/aibitat/providers/ai-provider");
+const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
/**
* Execute a web scraping flow step
@@ -7,8 +10,8 @@ const { CollectorApi } = require("../../collectorApi");
* @returns {Promise} Scraped content
*/
async function executeWebScraping(config, context) {
- const { url, captureAs = "text" } = config;
- const { introspect, logger } = context;
+ const { url, captureAs = "text", enableSummarization = false } = config;
+ const { introspect, logger, aibitat } = context;
logger(
`\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`
);
@@ -37,7 +40,37 @@ async function executeWebScraping(config, context) {
throw new Error("There was no content to be collected or read.");
}
- return content;
+ if (!enableSummarization) {
+ logger(`Returning raw content as summarization is disabled`);
+ return content;
+ }
+
+ const tokenCount = new TokenManager(
+ aibitat.defaultProvider.model
+ ).countFromString(content);
+ const contextLimit = Provider.contextLimit(
+ aibitat.defaultProvider.provider,
+ aibitat.defaultProvider.model
+ );
+
+ if (tokenCount < contextLimit) {
+ logger(
+ `Content within token limit (${tokenCount}/${contextLimit}). Returning raw content.`
+ );
+ return content;
+ }
+
+ introspect(
+ `This page's content is way too long (${tokenCount} tokens). I will summarize it right now.`
+ );
+ const summary = await summarizeContent({
+ provider: aibitat.defaultProvider.provider,
+ model: aibitat.defaultProvider.model,
+ content,
+ });
+
+ introspect(`Successfully summarized content`);
+ return summary;
}
/**
From 636e9faec1a230243e6b5079d2c048497cb02717 Mon Sep 17 00:00:00 2001
From: timothycarambat
Date: Thu, 15 May 2025 10:10:54 -0700
Subject: [PATCH 3/3] change layout summarize default to true (was the normal
prior)
---
.../src/pages/Admin/AgentBuilder/index.jsx | 28 +++++++++++--------
.../nodes/WebScrapingNode/index.jsx | 24 ++++++++--------
.../agentFlows/executors/web-scraping.js | 2 +-
3 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/frontend/src/pages/Admin/AgentBuilder/index.jsx b/frontend/src/pages/Admin/AgentBuilder/index.jsx
index f658931dad5..930a575a7fc 100644
--- a/frontend/src/pages/Admin/AgentBuilder/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/index.jsx
@@ -1,5 +1,6 @@
import React, { useState, useEffect, useRef } from "react";
import { useNavigate, useParams } from "react-router-dom";
+import { Tooltip } from "react-tooltip";
import BlockList, { BLOCK_TYPES, BLOCK_INFO } from "./BlockList";
import AddBlockMenu from "./AddBlockMenu";
@@ -288,18 +289,6 @@ export default function AgentBuilder() {
});
};
- // const runFlow = async (uuid) => {
- // try {
- // const { success, error, _results } = await AgentFlows.runFlow(uuid);
- // if (!success) throw new Error(error);
-
- // showToast("Flow executed successfully!", "success", { clear: true });
- // } catch (error) {
- // console.error(error);
- // showToast("Failed to run agent flow", "error", { clear: true });
- // }
- // };
-
const clearFlow = () => {
if (!!flowId) navigate(paths.agents.builder());
setAgentName("");
@@ -356,6 +345,21 @@ export default function AgentBuilder() {
+
+
+ When enabled, long webpage content will be automatically summarized to
+ reduce token usage.
+
+
+ Note: This may affect data quality and remove specific details from
+ the original content.
+
+
);
}
diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
index 053354e4fa0..f1e48b24de2 100644
--- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
@@ -1,3 +1,4 @@
+import { Info } from "@phosphor-icons/react";
import React from "react";
export default function WebScrapingNode({
@@ -71,15 +72,22 @@ export default function WebScrapingNode({
)}
-
-
- Content Summarization
-
+
+
+
+ Content Summarization
+
+
+
-
- When enabled, long webpage content will be automatically summarized to
- reduce token usage. Note: This may affect data quality and remove
- specific details from the original content.
-
-
Result Variable
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index 7f5515faabb..e54b95fe6f7 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -10,7 +10,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
* @returns {Promise} Scraped content
*/
async function executeWebScraping(config, context) {
- const { url, captureAs = "text", enableSummarization = false } = config;
+ const { url, captureAs = "text", enableSummarization = true } = config;
const { introspect, logger, aibitat } = context;
logger(
`\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`