From a1b560072a655a894953e7fde4877f0c672ab138 Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Tue, 18 Mar 2025 17:06:09 -0700
Subject: [PATCH 1/3] remove summarization from web-scraping flow block

---
 .../agentFlows/executors/web-scraping.js      | 26 ++-----------------
 1 file changed, 2 insertions(+), 24 deletions(-)
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index 3e9d0462af4..b2b15f04880 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -1,7 +1,4 @@
 const { CollectorApi } = require("../../collectorApi");
-const { TokenManager } = require("../../helpers/tiktoken");
-const Provider = require("../../agents/aibitat/providers/ai-provider");
-const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
 
 /**
  * Execute a web scraping flow step
@@ -11,7 +8,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
  */
 async function executeWebScraping(config, context) {
   const { url, captureAs = "text" } = config;
-  const { introspect, logger, aibitat } = context;
+  const { introspect, logger } = context;
   logger(
     `\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`
   );
@@ -40,26 +37,7 @@ async function executeWebScraping(config, context) {
     throw new Error("There was no content to be collected or read.");
   }
 
-  const tokenCount = new TokenManager(
-    aibitat.defaultProvider.model
-  ).countFromString(content);
-  const contextLimit = Provider.contextLimit(
-    aibitat.defaultProvider.provider,
-    aibitat.defaultProvider.model
-  );
-  if (tokenCount < contextLimit) return content;
-
-  introspect(
-    `This page's content is way too long (${tokenCount} tokens). I will summarize it right now.`
-  );
-  const summary = await summarizeContent({
-    provider: aibitat.defaultProvider.provider,
-    model: aibitat.defaultProvider.model,
-    content,
-  });
-
-  introspect(`Successfully summarized content`);
-  return summary;
+  return content;
 }
 
 /**

From 6040718116b2f78685768d64dba6f1870050c150 Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Tue, 18 Mar 2025 17:18:49 -0700
Subject: [PATCH 2/3] add option to enable/disable summarization in
 web-scraping block

---
 .../nodes/WebScrapingNode/index.jsx           | 36 ++++++++++++++++-
 .../agentFlows/executors/web-scraping.js      | 39 +++++++++++++++++--
 2 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
index 76655af74c8..053354e4fa0 100644
--- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
@@ -31,7 +31,9 @@ export default function WebScrapingNode({
         </label>
         <select
           value={config.captureAs}
-          onChange={(e) => onConfigChange({ captureAs: e.target.value })}
+          onChange={(e) =>
+            onConfigChange({ ...config, captureAs: e.target.value })
+          }
           className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
         >
           {[
@@ -60,13 +62,43 @@ export default function WebScrapingNode({
           </p>
           <input
             value={config.querySelector}
-            onChange={(e) => onConfigChange({ querySelector: e.target.value })}
+            onChange={(e) =>
+              onConfigChange({ ...config, querySelector: e.target.value })
+            }
             placeholder=".article-content, #content, .main-content, etc."
             className="w-full border-none bg-theme-settings-input-bg text-theme-text-primary text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none p-2.5"
           />
         </div>
       )}
 
+      <div>
+        <label className="block text-sm font-medium text-theme-text-primary mb-2">
+          Content Summarization
+        </label>
+        <div className="flex items-center gap-2 mb-2">
+          <label className="relative inline-flex items-center cursor-pointer">
+            <input
+              type="checkbox"
+              checked={config.enableSummarization ?? false}
+              onChange={(e) =>
+                onConfigChange({
+                  ...config,
+                  enableSummarization: e.target.checked,
+                })
+              }
+              className="sr-only peer"
+              aria-label="Toggle content summarization"
+            />
+            <div className="w-11 h-6 bg-theme-settings-input-bg peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-primary-button/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-primary-button"></div>
+          </label>
+        </div>
+        <p className="text-xs text-theme-text-secondary">
+          When enabled, long webpage content will be automatically summarized to
+          reduce token usage. Note: This may affect data quality and remove
+          specific details from the original content.
+        </p>
+      </div>
+
       <div>
         <label className="block text-sm font-medium text-theme-text-primary mb-2">
           Result Variable
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index b2b15f04880..7f5515faabb 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -1,4 +1,7 @@
 const { CollectorApi } = require("../../collectorApi");
+const { TokenManager } = require("../../helpers/tiktoken");
+const Provider = require("../../agents/aibitat/providers/ai-provider");
+const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
 
 /**
  * Execute a web scraping flow step
@@ -7,8 +10,8 @@ const { CollectorApi } = require("../../collectorApi");
  * @returns {Promise<string>} Scraped content
  */
 async function executeWebScraping(config, context) {
-  const { url, captureAs = "text" } = config;
-  const { introspect, logger } = context;
+  const { url, captureAs = "text", enableSummarization = false } = config;
+  const { introspect, logger, aibitat } = context;
   logger(
     `\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`
   );
@@ -37,7 +40,37 @@ async function executeWebScraping(config, context) {
     throw new Error("There was no content to be collected or read.");
   }
 
-  return content;
+  if (!enableSummarization) {
+    logger(`Returning raw content as summarization is disabled`);
+    return content;
+  }
+
+  const tokenCount = new TokenManager(
+    aibitat.defaultProvider.model
+  ).countFromString(content);
+  const contextLimit = Provider.contextLimit(
+    aibitat.defaultProvider.provider,
+    aibitat.defaultProvider.model
+  );
+
+  if (tokenCount < contextLimit) {
+    logger(
+      `Content within token limit (${tokenCount}/${contextLimit}). Returning raw content.`
+    );
+    return content;
+  }
+
+  introspect(
+    `This page's content is way too long (${tokenCount} tokens). I will summarize it right now.`
+  );
+  const summary = await summarizeContent({
+    provider: aibitat.defaultProvider.provider,
+    model: aibitat.defaultProvider.model,
+    content,
+  });
+
+  introspect(`Successfully summarized content`);
+  return summary;
 }
 
 /**

From 636e9faec1a230243e6b5079d2c048497cb02717 Mon Sep 17 00:00:00 2001
From: timothycarambat <rambat1010@gmail.com>
Date: Thu, 15 May 2025 10:10:54 -0700
Subject: [PATCH 3/3] change layout summarize default to true (was the normal
 prior)

---
 .../src/pages/Admin/AgentBuilder/index.jsx    | 28 +++++++++++--------
 .../nodes/WebScrapingNode/index.jsx           | 24 ++++++++--------
 .../agentFlows/executors/web-scraping.js      |  2 +-
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/frontend/src/pages/Admin/AgentBuilder/index.jsx b/frontend/src/pages/Admin/AgentBuilder/index.jsx
index f658931dad5..930a575a7fc 100644
--- a/frontend/src/pages/Admin/AgentBuilder/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/index.jsx
@@ -1,5 +1,6 @@
 import React, { useState, useEffect, useRef } from "react";
 import { useNavigate, useParams } from "react-router-dom";
+import { Tooltip } from "react-tooltip";
 
 import BlockList, { BLOCK_TYPES, BLOCK_INFO } from "./BlockList";
 import AddBlockMenu from "./AddBlockMenu";
@@ -288,18 +289,6 @@ export default function AgentBuilder() {
     });
   };
 
-  // const runFlow = async (uuid) => {
-  //   try {
-  //     const { success, error, _results } = await AgentFlows.runFlow(uuid);
-  //     if (!success) throw new Error(error);
-
-  //     showToast("Flow executed successfully!", "success", { clear: true });
-  //   } catch (error) {
-  //     console.error(error);
-  //     showToast("Failed to run agent flow", "error", { clear: true });
-  //   }
-  // };
-
   const clearFlow = () => {
     if (!!flowId) navigate(paths.agents.builder());
     setAgentName("");
@@ -356,6 +345,21 @@ export default function AgentBuilder() {
           </div>
         </div>
       </div>
+      <Tooltip
+        id="content-summarization-tooltip"
+        place="top"
+        delayShow={300}
+        className="tooltip !text-xs z-99"
+      >
+        <p className="text-sm">
+          When enabled, long webpage content will be automatically summarized to
+          reduce token usage.
+          <br />
+          <br />
+          Note: This may affect data quality and remove specific details from
+          the original content.
+        </p>
+      </Tooltip>
     </div>
   );
 }
diff --git a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
index 053354e4fa0..f1e48b24de2 100644
--- a/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
+++ b/frontend/src/pages/Admin/AgentBuilder/nodes/WebScrapingNode/index.jsx
@@ -1,3 +1,4 @@
+import { Info } from "@phosphor-icons/react";
 import React from "react";
 
 export default function WebScrapingNode({
@@ -71,15 +72,22 @@ export default function WebScrapingNode({
         </div>
       )}
 
-      <div>
-        <label className="block text-sm font-medium text-theme-text-primary mb-2">
-          Content Summarization
-        </label>
+      <div className="flex justify-between items-center">
+        <div className="flex flex-row items-center gap-x-1 mb-2">
+          <label className="block text-sm font-medium text-theme-text-primary">
+            Content Summarization
+          </label>
+          <Info
+            size={16}
+            className="text-theme-text-secondary cursor-pointer"
+            data-tooltip-id="content-summarization-tooltip"
+          />
+        </div>
         <div className="flex items-center gap-2 mb-2">
           <label className="relative inline-flex items-center cursor-pointer">
             <input
               type="checkbox"
-              checked={config.enableSummarization ?? false}
+              checked={config.enableSummarization ?? true}
               onChange={(e) =>
                 onConfigChange({
                   ...config,
@@ -92,13 +100,7 @@ export default function WebScrapingNode({
             <div className="w-11 h-6 bg-theme-settings-input-bg peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-primary-button/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-primary-button"></div>
           </label>
         </div>
-        <p className="text-xs text-theme-text-secondary">
-          When enabled, long webpage content will be automatically summarized to
-          reduce token usage. Note: This may affect data quality and remove
-          specific details from the original content.
-        </p>
       </div>
-
       <div>
         <label className="block text-sm font-medium text-theme-text-primary mb-2">
           Result Variable
diff --git a/server/utils/agentFlows/executors/web-scraping.js b/server/utils/agentFlows/executors/web-scraping.js
index 7f5515faabb..e54b95fe6f7 100644
--- a/server/utils/agentFlows/executors/web-scraping.js
+++ b/server/utils/agentFlows/executors/web-scraping.js
@@ -10,7 +10,7 @@ const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
  * @returns {Promise<string>} Scraped content
  */
 async function executeWebScraping(config, context) {
-  const { url, captureAs = "text", enableSummarization = false } = config;
+  const { url, captureAs = "text", enableSummarization = true } = config;
   const { introspect, logger, aibitat } = context;
   logger(
     `\x1b[43m[AgentFlowToolExecutor]\x1b[0m - executing Web Scraping block`