θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 93 additions & 40 deletions collector/utils/extensions/Confluence/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,6 @@ const {
ConfluencePagesLoader,
} = require("langchain/document_loaders/web/confluence");

function validSpaceUrl(spaceUrl = "") {
// Atlassian default URL match
const atlassianPattern = new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
);
const atlassianMatch = atlassianPattern.match(spaceUrl);
if (atlassianMatch) {
return { valid: true, result: atlassianMatch };
}

let customMatch = null;
[
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*", // Custom Confluence space
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*", // Custom Confluence space + Human-readable space tag.
].forEach((matchPattern) => {
if (!!customMatch) return;
const pattern = new UrlPattern(matchPattern);
customMatch = pattern.match(spaceUrl);
});

if (customMatch) {
customMatch.customDomain =
(customMatch.subdomain ? `${customMatch.subdomain}.` : "") + //
`${customMatch.domain}.${customMatch.tld}`;
return { valid: true, result: customMatch, custom: true };
}

// No match
return { valid: false, result: null };
}

async function loadConfluence({ pageUrl, username, accessToken }) {
if (!pageUrl || !username || !accessToken) {
return {
Expand All @@ -49,21 +18,16 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
};
}

const validSpace = validSpaceUrl(pageUrl);
if (!validSpace.result) {
const { valid, result } = validSpaceUrl(pageUrl);
if (!valid) {
return {
success: false,
reason:
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*",
"Confluence space URL is not in the expected format of one of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/* or https://customDomain/display/~SPACEID/*",
};
}

const { subdomain, customDomain, spaceKey } = validSpace.result;
let baseUrl = `https://${subdomain}.atlassian.net/wiki`;
if (customDomain) {
baseUrl = `https://${customDomain}/wiki`;
}

const { apiBase: baseUrl, spaceKey, subdomain } = result;
console.log(`-- Working Confluence ${baseUrl} --`);
const loader = new ConfluencePagesLoader({
baseUrl,
Expand Down Expand Up @@ -142,4 +106,93 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
};
}

/**
* A match result for a url-pattern of a Confluence URL
* @typedef {Object} ConfluenceMatchResult
* @property {string} subdomain - the subdomain of an organization's Confluence space
* @property {string} spaceKey - the spaceKey of an organization that determines the documents to collect.
* @property {string} apiBase - the correct REST API url to use for loader.
*/

/**
* Generates the correct API base URL for interfacing with the Confluence REST API
* depending on the URL pattern being used since there are various ways to host/access a
* Confluence space.
* @param {ConfluenceMatchResult} matchResult - result from `url-pattern`.match
* @param {boolean} isCustomDomain - determines if we need to coerce the subpath of the provided URL
* @returns {string} - the resulting REST API URL
*/
function generateAPIBaseUrl(matchResult = {}, isCustomDomain = false) {
const { subdomain } = matchResult;
let subpath = isCustomDomain ? `` : `/wiki`;
if (isCustomDomain) return `https://${customDomain}${subpath}`;
return `https://${subdomain}.atlassian.net${subpath}`;
}

/**
* Validates and parses the correct information from a given Confluence URL
* @param {string} spaceUrl - The organization's Confluence URL to parse
* @returns {{
* valid: boolean,
* result: (ConfluenceMatchResult|null),
* }}
*/
function validSpaceUrl(spaceUrl = "") {
let matchResult;
const patterns = {
default: new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
),
subdomain: new UrlPattern(
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*"
),
custom: new UrlPattern(
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*"
),
};

// If using the default Atlassian Confluence URL pattern.
// We can proceed because the Library/API can use this base url scheme.
matchResult = patterns.default.match(spaceUrl);
if (matchResult)
return {
valid: matchResult.hasOwnProperty("spaceKey"),
result: {
...matchResult,
apiBase: generateAPIBaseUrl(matchResult),
},
};

// If using a custom subdomain Confluence URL pattern.
// We need to attach the customDomain as a property to the match result
// so we can form the correct REST API base from the subdomain.
matchResult = patterns.subdomain.match(spaceUrl);
if (matchResult) {
return {
valid: matchResult.hasOwnProperty("spaceKey"),
result: {
...matchResult,
apiBase: generateAPIBaseUrl(matchResult),
},
};
}

// If using a base FQDN Confluence URL pattern.
// We need to attach the customDomain as a property to the match result
// so we can form the correct REST API base from the root domain since /display/ is basically a URL mask.
matchResult = patterns.custom.match(spaceUrl);
if (matchResult) {
return {
valid: matchResult.hasOwnProperty("spaceKey"),
result: {
...matchResult,
apiBase: generateAPIBaseUrl(matchResult, true),
},
};
}

// No match
return { valid: false, result: null };
}

module.exports = loadConfluence;