From 6369d5f4627065f5285feecb54adbdde16bfd998 Mon Sep 17 00:00:00 2001 From: Predrag Stojadinovic Date: Sat, 11 May 2024 18:43:55 +0200 Subject: [PATCH 1/3] chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones --- .../utils/extensions/Confluence/index.js | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 5a473f654cc..63094a1f99e 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -2,20 +2,30 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); +const UrlPattern = require("url-pattern"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); -const { - ConfluencePagesLoader, -} = require("langchain/document_loaders/web/confluence"); +const { ConfluencePagesLoader } = require("langchain/document_loaders/web/confluence"); function validSpaceUrl(spaceUrl = "") { - const UrlPattern = require("url-pattern"); - const pattern = new UrlPattern( - "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" - ); - const match = pattern.match(spaceUrl); - if (!match) return { valid: false, result: null }; - return { valid: true, result: match }; + // Atlassian default URL match + const atlassianPattern = new UrlPattern("https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*"); + const atlassianMatch = atlassianPattern.match(spaceUrl); + if (atlassianMatch) { + return { valid: true, result: atlassianMatch }; + } + + // Custom Confluence URL match + const customPattern = new UrlPattern("https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*"); + const customMatch = customPattern.match(spaceUrl); + if (customMatch) { + customMatch.customDomain = (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // + (`${customMatch.domain}.${customMatch.tld}`); + return { valid: true, result: customMatch, custom: true }; + } + + // No match + return { valid: false, result: null }; } async function loadConfluence({ pageUrl, username, accessToken }) { @@ -31,15 +41,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) { if (!validSpace.result) { return { success: false, - reason: - "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + reason: "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", }; } - const { subdomain, spaceKey } = validSpace.result; - console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const { subdomain, customDomain, spaceKey } = validSpace.result; + let baseUrl = `https://${subdomain}.atlassian.net/wiki`; + if (customDomain) { + baseUrl = `https://${customDomain}/wiki`; + } + + console.log(`-- Working Confluence ${baseUrl} --`); const loader = new ConfluencePagesLoader({ - baseUrl: `https://${subdomain}.atlassian.net/wiki`, + baseUrl, spaceKey, username, accessToken, From c73c5087e2f7a724d00a1186922f46c07622bc57 Mon Sep 17 00:00:00 2001 From: Predrag Stojadinovic Date: Sat, 11 May 2024 19:04:28 +0200 Subject: [PATCH 2/3] chore: formatting as per yarn lint --- .../utils/extensions/Confluence/index.js | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 63094a1f99e..5bb44deeb75 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -5,22 +5,29 @@ const { v4 } = require("uuid"); const UrlPattern = require("url-pattern"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); -const { ConfluencePagesLoader } = require("langchain/document_loaders/web/confluence"); +const { + ConfluencePagesLoader, +} = require("langchain/document_loaders/web/confluence"); function validSpaceUrl(spaceUrl = "") { // Atlassian default URL match - const atlassianPattern = new UrlPattern("https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*"); + const atlassianPattern = new UrlPattern( + "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*" + ); const atlassianMatch = atlassianPattern.match(spaceUrl); if (atlassianMatch) { return { valid: true, result: atlassianMatch }; } // Custom Confluence URL match - const customPattern = new UrlPattern("https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*"); + const customPattern = new UrlPattern( + "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*" + ); const customMatch = customPattern.match(spaceUrl); if (customMatch) { - customMatch.customDomain = (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // - (`${customMatch.domain}.${customMatch.tld}`); + customMatch.customDomain = + (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // + `${customMatch.domain}.${customMatch.tld}`; return { valid: true, result: customMatch, custom: true }; } @@ -41,7 +48,8 @@ async function loadConfluence({ pageUrl, username, accessToken }) { if (!validSpace.result) { return { success: false, - reason: "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", + reason: + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", }; } From ec4177340b097286a6924bb58b9b6113ba1d5cb6 Mon Sep 17 00:00:00 2001 From: Predrag Stojadinovic Date: Thu, 16 May 2024 18:20:53 +0200 Subject: [PATCH 3/3] chore: adding /display/ url matching to confluence data connector --- collector/utils/extensions/Confluence/index.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index da918b2d219..ff91773cbe3 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -24,7 +24,14 @@ function validSpaceUrl(spaceUrl = "") { "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*" ); const customMatch = customPattern.match(spaceUrl); - if (customMatch) { + + // Custom "display" Confluence URL match + const customDisplayPattern = new UrlPattern( + "https\\://(:subdomain.):domain.:tld/display/(:spaceKey)/*" + ); + const customDisplayMatch = customDisplayPattern.match(spaceUrl); + + if (customMatch || customDisplayMatch) { customMatch.customDomain = (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // `${customMatch.domain}.${customMatch.tld}`;