这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions collector/extensions/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const { reqBody } = require("../utils/http");

function extensions(app) {
if (!app) return;

app.post("/ext/github-repo", async function (request, response) {
try {
const loadGithubRepo = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(reqBody(request));
response.status(200).json({
success,
reason,
data
});
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
reason: e.message || "A processing error occurred.",
data: {},
});
}
return;
});

// gets all branches for a specific repo
app.post("/ext/github-repo/branches", async function (request, response) {
try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches()
response.status(200).json({
success: true,
reason: null,
data: {
branches: allBranches
}
});
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
branches: []
}
});
}
return;
});
}

module.exports = extensions;
3 changes: 3 additions & 0 deletions collector/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const { reqBody } = require("./utils/http");
const { processSingleFile } = require("./processSingleFile");
const { processLink } = require("./processLink");
const { wipeCollectorStorage } = require("./utils/files");
const extensions = require("./extensions");
const app = express();

app.use(cors({ origin: true }));
Expand Down Expand Up @@ -57,6 +58,8 @@ app.post("/process-link", async function (request, response) {
return;
});

extensions(app);

app.get("/accepts", function (_, response) {
response.status(200).json(ACCEPTED_MIMES);
});
Expand Down
4 changes: 3 additions & 1 deletion collector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"express": "^4.18.2",
"extract-zip": "^2.0.1",
"fluent-ffmpeg": "^2.1.2",
"ignore": "^5.3.0",
"js-tiktoken": "^1.0.8",
"langchain": "0.0.201",
"mammoth": "^1.6.0",
Expand All @@ -35,11 +36,12 @@
"pdf-parse": "^1.1.1",
"puppeteer": "^21.6.1",
"slugify": "^1.6.6",
"url-pattern": "^1.0.3",
"uuid": "^9.0.0",
"wavefile": "^11.0.0"
},
"devDependencies": {
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}
149 changes: 149 additions & 0 deletions collector/utils/extensions/GithubRepo/RepoLoader/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
class RepoLoader {
constructor(args = {}) {
this.ready = false;
this.repo = args?.repo;
this.branch = args?.branch;
this.accessToken = args?.accessToken || null;
this.ignorePaths = args?.ignorePaths || [];

this.author = null;
this.project = null;
this.branches = [];
}

#validGithubUrl() {
const UrlPattern = require("url-pattern");
const pattern = new UrlPattern("https\\://github.com/(:author)/(:project)");
const match = pattern.match(this.repo);
if (!match) return false;

this.author = match.author;
this.project = match.project;
return true;
}

// Ensure the branch provided actually exists
// and if it does not or has not been set auto-assign to primary branch.
async #validBranch() {
await this.getRepoBranches();
if (!!this.branch && this.branches.includes(this.branch)) return;

console.log(
"[Github Loader]: Branch not set! Auto-assigning to a default branch."
);
this.branch = this.branches.includes("main") ? "main" : "master";
console.log(`[Github Loader]: Branch auto-assigned to ${this.branch}.`);
return;
}

async #validateAccessToken() {
if (!this.accessToken) return;
const valid = await fetch("https://api.github.com/octocat", {
method: "GET",
headers: {
Authorization: `Bearer ${this.accessToken}`,
"X-GitHub-Api-Version": "2022-11-28",
},
})
.then((res) => {
if (!res.ok) throw new Error(res.statusText);
return res.ok;
})
.catch((e) => {
console.error(
"Invalid Github Access Token provided! Access token will not be used",
e.message
);
return false;
});

if (!valid) this.accessToken = null;
return;
}

async init() {
if (!this.#validGithubUrl()) return;
await this.#validBranch();
await this.#validateAccessToken();
this.ready = true;
return this;
}

async recursiveLoader() {
if (!this.ready) throw new Error("[Github Loader]: not in ready state!");
const {
GithubRepoLoader: LCGithubLoader,
} = require("langchain/document_loaders/web/github");

if (this.accessToken)
console.log(
`[Github Loader]: Access token set! Recursive loading enabled!`
);

const loader = new LCGithubLoader(this.repo, {
accessToken: this.accessToken,
branch: this.branch,
recursive: !!this.accessToken, // Recursive will hit rate limits.
maxConcurrency: 5,
unknown: "ignore",
ignorePaths: this.ignorePaths,
});

const docs = [];
for await (const doc of loader.loadAsStream()) docs.push(doc);
return docs;
}

// Sort branches to always show either main or master at the top of the result.
#branchPrefSort(branches = []) {
const preferredSort = ["main", "master"];
return branches.reduce((acc, branch) => {
if (preferredSort.includes(branch)) return [branch, ...acc];
return [...acc, branch];
}, []);
}

// Get all branches for a given repo.
async getRepoBranches() {
if (!this.#validGithubUrl() || !this.author || !this.project) return [];
await this.#validateAccessToken(); // Ensure API access token is valid for pre-flight

let page = 0;
let polling = true;
const branches = [];

while (polling) {
console.log(`Fetching page ${page} of branches for ${this.project}`);
await fetch(
`https://api.github.com/repos/${this.author}/${this.project}/branches?per_page=100&page=${page}`,
{
method: "GET",
headers: {
...(this.accessToken
? { Authorization: `Bearer ${this.accessToken}` }
: {}),
"X-GitHub-Api-Version": "2022-11-28",
},
}
)
.then((res) => {
if (res.ok) return res.json();
throw new Error(`Invalid request to Github API: ${res.statusText}`);
})
.then((branchObjects) => {
polling = branchObjects.length > 0;
branches.push(branchObjects.map((branch) => branch.name));
page++;
})
.catch((err) => {
polling = false;
console.log(`RepoLoader.branches`, err);
});
}

this.branches = [...new Set(branches.flat())];
return this.#branchPrefSort(this.branches);
}
}

module.exports = RepoLoader;
78 changes: 78 additions & 0 deletions collector/utils/extensions/GithubRepo/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
const RepoLoader = require("./RepoLoader");
const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");

async function loadGithubRepo(args) {
const repo = new RepoLoader(args);
await repo.init();

if (!repo.ready)
return {
success: false,
reason: "Could not prepare Github repo for loading! Check URL",
};

console.log(
`-- Working Github ${repo.author}/${repo.project}:${repo.branch} --`
);
const docs = await repo.recursiveLoader();
if (!docs.length) {
return {
success: false,
reason: "No files were found for those settings.",
};
}

console.log(`[Github Loader]: Found ${docs.length} source files. Saving...`);
const outFolder = slugify(
`${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
).toLowerCase();
const outFolderPath = path.resolve(
__dirname,
`../../../../server/storage/documents/${outFolder}`
);
fs.mkdirSync(outFolderPath);

for (const doc of docs) {
if (!doc.pageContent) continue;
const data = {
id: v4(),
url: "github://" + doc.metadata.source,
title: doc.metadata.source,
docAuthor: repo.author,
description: "No description found.",
docSource: repo.repo,
chunkSource: doc.metadata.source,
published: new Date().toLocaleString(),
wordCount: doc.pageContent.split(" ").length,
pageContent: doc.pageContent,
token_count_estimate: tokenizeString(doc.pageContent).length,
};
console.log(
`[Github Loader]: Saving ${doc.metadata.source} to ${outFolder}`
);
writeToServerDocuments(
data,
`${slugify(doc.metadata.source)}-${data.id}`,
outFolderPath
);
}

return {
success: true,
reason: null,
data: {
author: repo.author,
repo: repo.project,
branch: repo.branch,
files: docs.length,
destination: outFolder,
},
};
}

module.exports = loadGithubRepo;
10 changes: 10 additions & 0 deletions collector/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,11 @@ ignore-by-default@^1.0.1:
resolved "https://registry.yarnpkg.com/ignore-by-default/-/ignore-by-default-1.0.1.tgz#48ca6d72f6c6a3af00a9ad4ae6876be3889e2b09"
integrity sha512-Ius2VYcGNk7T90CppJqcIkS5ooHUZyIQK+ClZfMfMNFEF9VSE73Fq+906u/CWu92x4gzZMWOwfFYckPObzdEbA==

ignore@^5.3.0:
version "5.3.0"
resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.0.tgz#67418ae40d34d6999c95ff56016759c718c82f78"
integrity sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==

immediate@~3.0.5:
version "3.0.6"
resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b"
Expand Down Expand Up @@ -3127,6 +3132,11 @@ unpipe@1.0.0, unpipe@~1.0.0:
resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
integrity sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==

url-pattern@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/url-pattern/-/url-pattern-1.0.3.tgz#0409292471b24f23c50d65a47931793d2b5acfc1"
integrity sha512-uQcEj/2puA4aq1R3A2+VNVBgaWYR24FdWjl7VNW83rnWftlhyzOZ/tBjezRiC2UkIzuxC8Top3IekN3vUf1WxA==

url-template@^2.0.8:
version "2.0.8"
resolved "https://registry.yarnpkg.com/url-template/-/url-template-2.0.8.tgz#fc565a3cccbff7730c775f5641f9555791439f21"
Expand Down
1 change: 1 addition & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"react-loading-icons": "^1.1.0",
"react-loading-skeleton": "^3.1.0",
"react-router-dom": "^6.3.0",
"react-tag-input-component": "^2.0.2",
"react-toastify": "^9.1.3",
"text-case": "^1.0.9",
"truncate": "^3.0.0",
Expand Down
15 changes: 15 additions & 0 deletions frontend/src/App.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ const GeneralExportImport = lazy(() =>
import("@/pages/GeneralSettings/ExportImport")
);
const GeneralSecurity = lazy(() => import("@/pages/GeneralSettings/Security"));
const DataConnectors = lazy(() =>
import("@/pages/GeneralSettings/DataConnectors")
);
const DataConnectorSetup = lazy(() =>
import("@/pages/GeneralSettings/DataConnectors/Connectors")
);
const OnboardingFlow = lazy(() => import("@/pages/OnboardingFlow"));

export default function App() {
Expand Down Expand Up @@ -103,6 +109,15 @@ export default function App() {
path="/settings/workspaces"
element={<ManagerRoute Component={AdminWorkspaces} />}
/>
<Route
path="/settings/data-connectors"
element={<ManagerRoute Component={DataConnectors} />}
/>
<Route
path="/settings/data-connectors/:connector"
element={<ManagerRoute Component={DataConnectorSetup} />}
/>

{/* Onboarding Flow */}
<Route path="/onboarding" element={<OnboardingFlow />} />
</Routes>
Expand Down
Loading