diff --git a/collector/extensions/index.js b/collector/extensions/index.js index 0e91d173161..6a3f3393e13 100644 --- a/collector/extensions/index.js +++ b/collector/extensions/index.js @@ -4,69 +4,112 @@ const { reqBody } = require("../utils/http"); function extensions(app) { if (!app) return; - app.post("/ext/github-repo", [verifyPayloadIntegrity], async function (request, response) { - try { - const loadGithubRepo = require("../utils/extensions/GithubRepo"); - const { success, reason, data } = await loadGithubRepo(reqBody(request)); - response.status(200).json({ - success, - reason, - data - }); - } catch (e) { - console.error(e); - response.status(200).json({ - success: false, - reason: e.message || "A processing error occurred.", - data: {}, - }); + app.post( + "/ext/github-repo", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadGithubRepo = require("../utils/extensions/GithubRepo"); + const { success, reason, data } = await loadGithubRepo( + reqBody(request) + ); + response.status(200).json({ + success, + reason, + data, + }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + reason: e.message || "A processing error occurred.", + data: {}, + }); + } + return; } - return; - }); + ); // gets all branches for a specific repo - app.post("/ext/github-repo/branches", [verifyPayloadIntegrity], async function (request, response) { - try { - const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); - const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches() - response.status(200).json({ - success: true, - reason: null, - data: { - branches: allBranches - } - }); - } catch (e) { - console.error(e); - response.status(400).json({ - success: false, - reason: e.message, - data: { - branches: [] - } - }); + app.post( + "/ext/github-repo/branches", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); + const allBranches = await new GithubRepoLoader( + reqBody(request) + ).getRepoBranches(); + response.status(200).json({ + success: true, + reason: null, + data: { + branches: allBranches, + }, + }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + branches: [], + }, + }); + } + return; } - return; - }); + ); - app.post("/ext/youtube-transcript", [verifyPayloadIntegrity], async function (request, response) { - try { - const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); - const { success, reason, data } = await loadYouTubeTranscript(reqBody(request)); - response.status(200).json({ success, reason, data }); - } catch (e) { - console.error(e); - response.status(400).json({ - success: false, - reason: e.message, - data: { - title: null, - author: null - } - }); + app.post( + "/ext/youtube-transcript", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); + const { success, reason, data } = await loadYouTubeTranscript( + reqBody(request) + ); + response.status(200).json({ success, reason, data }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + title: null, + author: null, + }, + }); + } + return; } - return; - }); + ); + + app.post( + "/ext/confluence", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadConfluence = require("../utils/extensions/Confluence"); + const { success, reason, data } = await loadConfluence( + reqBody(request) + ); + response.status(200).json({ success, reason, data }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + title: null, + author: null, + }, + }); + } + return; + } + ); } module.exports = extensions; diff --git a/collector/package.json b/collector/package.json index 4a5a99fffde..5d2e5f0f58d 100644 --- a/collector/package.json +++ b/collector/package.json @@ -49,4 +49,4 @@ "nodemon": "^2.0.22", "prettier": "^2.4.1" } -} \ No newline at end of file +} diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js new file mode 100644 index 00000000000..1ea642e1aa3 --- /dev/null +++ b/collector/utils/extensions/Confluence/index.js @@ -0,0 +1,110 @@ +const fs = require("fs"); +const path = require("path"); +const { default: slugify } = require("slugify"); +const { v4 } = require("uuid"); +const { writeToServerDocuments } = require("../../files"); +const { tokenizeString } = require("../../tokenizer"); +const { + ConfluencePagesLoader, +} = require("langchain/document_loaders/web/confluence"); + +function validSpaceUrl(spaceUrl = "") { + const UrlPattern = require("url-pattern"); + const pattern = new UrlPattern( + "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" + ); + const match = pattern.match(spaceUrl); + if (!match) return { valid: false, result: null }; + return { valid: true, result: match }; +} + +async function loadConfluence({ pageUrl, username, accessToken }) { + if (!pageUrl || !username || !accessToken) { + return { + success: false, + reason: + "You need either a username and access token, or a personal access token (PAT), to use the Confluence connector.", + }; + } + + const validSpace = validSpaceUrl(pageUrl); + if (!validSpace.result) { + return { + success: false, + reason: + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + }; + } + + const { subdomain, spaceKey } = validSpace.result; + console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const loader = new ConfluencePagesLoader({ + baseUrl: `https://${subdomain}.atlassian.net/wiki`, + spaceKey, + username, + accessToken, + }); + + const { docs, error } = await loader + .load() + .then((docs) => { + return { docs, error: null }; + }) + .catch((e) => { + return { + docs: [], + error: e.message?.split("Error:")?.[1] || e.message, + }; + }); + + if (!docs.length || !!error) { + return { + success: false, + reason: error ?? "No pages found for that Confluence space.", + }; + } + const outFolder = slugify( + `${subdomain}-confluence-${v4().slice(0, 4)}` + ).toLowerCase(); + const outFolderPath = path.resolve( + __dirname, + `../../../../server/storage/documents/${outFolder}` + ); + fs.mkdirSync(outFolderPath); + + docs.forEach((doc) => { + const data = { + id: v4(), + url: doc.metadata.url + ".page", + title: doc.metadata.title || doc.metadata.source, + docAuthor: subdomain, + description: doc.metadata.title, + docSource: `${subdomain} Confluence`, + chunkSource: `confluence://${doc.metadata.url}`, + published: new Date().toLocaleString(), + wordCount: doc.pageContent.split(" ").length, + pageContent: doc.pageContent, + token_count_estimate: tokenizeString(doc.pageContent).length, + }; + + console.log( + `[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}` + ); + writeToServerDocuments( + data, + `${slugify(doc.metadata.title)}-${data.id}`, + outFolderPath + ); + }); + + return { + success: true, + reason: null, + data: { + spaceKey, + destination: outFolder, + }, + }; +} + +module.exports = loadConfluence; diff --git a/frontend/src/components/DataConnectorOption/media/confluence.jpeg b/frontend/src/components/DataConnectorOption/media/confluence.jpeg new file mode 100644 index 00000000000..7559663a68a Binary files /dev/null and b/frontend/src/components/DataConnectorOption/media/confluence.jpeg differ diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js index 543bed5f74b..ac8105975ed 100644 --- a/frontend/src/components/DataConnectorOption/media/index.js +++ b/frontend/src/components/DataConnectorOption/media/index.js @@ -1,9 +1,11 @@ import Github from "./github.svg"; import YouTube from "./youtube.svg"; +import Confluence from "./confluence.jpeg"; const ConnectorImages = { github: Github, youtube: YouTube, + confluence: Confluence, }; export default ConnectorImages; diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx new file mode 100644 index 00000000000..52ca7e63df6 --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx @@ -0,0 +1,164 @@ +import { useState } from "react"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; +import { Warning } from "@phosphor-icons/react"; +import { Tooltip } from "react-tooltip"; + +export default function ConfluenceOptions() { + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast( + "Fetching all pages for Confluence space - this may take a while.", + "info", + { + clear: true, + autoClose: false, + } + ); + const { data, error } = await System.dataConnectors.confluence.collect({ + pageUrl: form.get("pageUrl"), + username: form.get("username"), + accessToken: form.get("accessToken"), + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `Pages collected from Confluence space ${data.spaceKey}. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( +