θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
11497d4
create gitlab copies of github files
DipFlip Jul 10, 2024
341bd04
Added collector utilities for interacting with Gitlab repositories.
DipFlip Jul 10, 2024
d7dbc25
The GitHub methods have been converted to work with GitLab.
DipFlip Jul 10, 2024
7754723
Added GitLab option to the UI.
DipFlip Jul 10, 2024
33ee73b
change GitLab to Gitlab
DipFlip Jul 10, 2024
ef698ed
add gitlab file
DipFlip Jul 10, 2024
78a1421
Added GitLab as a new data connector option.
DipFlip Jul 10, 2024
b77fcc5
set to ollama
DipFlip Jul 11, 2024
8ea5fc6
Adapted the GitHub importer to work with GitLab repositories.
DipFlip Jul 11, 2024
2b12df2
Added the GitLab equivalent methods to the dataConnector.js file.
DipFlip Jul 11, 2024
076bd68
Added missing GitLab parts to the extensions/index.js file.
DipFlip Jul 11, 2024
ce80f16
Added missing GitLab parts to the extensions/index.js file.
DipFlip Jul 11, 2024
3392c77
Added GitLab endpoints to match the existing GitHub endpoints in serv…
DipFlip Jul 11, 2024
5cf3065
Replaced the GitLab RepoLoader's dependency on axios with the built-i…
DipFlip Jul 11, 2024
4ed56f5
Simplified the GitLab implementation to more closely match the GitHub…
DipFlip Jul 11, 2024
79f66b0
Refactor GitLabRepo implementation to match GitHub methods and endpoints
DipFlip Jul 22, 2024
fc85307
update loading of files within subfolders
DipFlip Jul 22, 2024
13b9aca
give logo a white background
DipFlip Jul 22, 2024
a1da17c
fix subfolder naming
DipFlip Jul 22, 2024
a253ce6
revert unecessary change
DipFlip Jul 22, 2024
f8653a3
correct gitlab repo name
DipFlip Jul 22, 2024
0c2b02e
run yarn lint
DipFlip Jul 22, 2024
7ce0b82
remove aider addition in gitignore
DipFlip Jul 22, 2024
9e69aa6
revert change to .env.example
DipFlip Jul 22, 2024
d4416dc
Merge branch 'master' into 812-gitlab-connector
timothycarambat Jul 22, 2024
ba83191
enable collecting multiple repos
DipFlip Jul 23, 2024
9c22ecb
refactor
timothycarambat Jul 23, 2024
6f15dc0
fix package
timothycarambat Jul 23, 2024
4f3094a
move middleware to correct utils dir
timothycarambat Jul 23, 2024
9d3434b
forgot file
timothycarambat Jul 23, 2024
9a22fbc
test dev build
timothycarambat Jul 23, 2024
f768123
stub dev build
timothycarambat Jul 23, 2024
d7f5f28
Merge branch 'master' into 812-gitlab-connector
timothycarambat Jul 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:

on:
push:
branches: ['1915-docker-perms'] # master branch only. Do not modify.
branches: ['-dev'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'
Expand Down
14 changes: 8 additions & 6 deletions collector/extensions/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const { setDataSigner } = require("../middleware/setDataSigner");
const { verifyPayloadIntegrity } = require("../middleware/verifyIntegrity");
const { resolveRepoLoader, resolveRepoLoaderFunction } = require("../utils/extensions/RepoLoader");
const { reqBody } = require("../utils/http");
const { validURL } = require("../utils/url");
const RESYNC_METHODS = require("./resync");
Expand Down Expand Up @@ -28,15 +29,16 @@ function extensions(app) {
)

app.post(
"/ext/github-repo",
"/ext/:repo_platform-repo",
[verifyPayloadIntegrity, setDataSigner],
async function (request, response) {
try {
const { loadGithubRepo } = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(
const loadRepo = resolveRepoLoaderFunction(request.params.repo_platform);
const { success, reason, data } = await loadRepo(
reqBody(request),
response,
);
console.log({ success, reason, data })
response.status(200).json({
success,
reason,
Expand All @@ -56,12 +58,12 @@ function extensions(app) {

// gets all branches for a specific repo
app.post(
"/ext/github-repo/branches",
"/ext/:repo_platform-repo/branches",
[verifyPayloadIntegrity],
async function (request, response) {
try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await new GithubRepoLoader(
const RepoLoader = resolveRepoLoader(request.params.repo_platform);
const allBranches = await new RepoLoader(
reqBody(request)
).getRepoBranches();
response.status(200).json({
Expand Down
2 changes: 1 addition & 1 deletion collector/extensions/resync/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async function resyncGithub({ chunkSource }, response) {
// Github file data is `payload` encrypted (might contain PAT). So we need to expand its
// encrypted payload back into query params so we can reFetch the page with same access token/params.
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { fetchGithubFile } = require("../../utils/extensions/GithubRepo");
const { fetchGithubFile } = require("../../utils/extensions/RepoLoader/GithubRepo");
const { success, reason, content } = await fetchGithubFile({
repoUrl: `https:${source.pathname}`, // need to add back the real protocol
branch: source.searchParams.get('branch'),
Expand Down
3 changes: 2 additions & 1 deletion collector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"mammoth": "^1.6.0",
"mbox-parser": "^1.0.1",
"mime": "^3.0.0",
"minimatch": "5.1.0",
"moment": "^2.29.4",
"multer": "^1.4.5-lts.1",
"node-html-parser": "^6.1.13",
Expand All @@ -50,4 +51,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
class RepoLoader {
/**
* @typedef {Object} RepoLoaderArgs
* @property {string} repo - The GitHub repository URL.
* @property {string} [branch] - The branch to load from (optional).
* @property {string} [accessToken] - GitHub access token for authentication (optional).
* @property {string[]} [ignorePaths] - Array of paths to ignore when loading (optional).
*/

/**
* @class
* @classdesc Loads and manages GitHub repository content.
*/
class GitHubRepoLoader {
/**
* Creates an instance of RepoLoader.
* @param {RepoLoaderArgs} [args] - The configuration options.
* @returns {GitHubRepoLoader}
*/
constructor(args = {}) {
this.ready = false;
this.repo = args?.repo;
Expand Down Expand Up @@ -67,6 +84,10 @@ class RepoLoader {
return;
}

/**
* Initializes the RepoLoader instance.
* @returns {Promise<RepoLoader>} The initialized RepoLoader instance.
*/
async init() {
if (!this.#validGithubUrl()) return;
await this.#validBranch();
Expand All @@ -75,6 +96,11 @@ class RepoLoader {
return this;
}

/**
* Recursively loads the repository content.
* @returns {Promise<Array<Object>>} An array of loaded documents.
* @throws {Error} If the RepoLoader is not in a ready state.
*/
async recursiveLoader() {
if (!this.ready) throw new Error("[Github Loader]: not in ready state!");
const {
Expand Down Expand Up @@ -109,7 +135,10 @@ class RepoLoader {
}, []);
}

// Get all branches for a given repo.
/**
* Retrieves all branches for the repository.
* @returns {Promise<string[]>} An array of branch names.
*/
async getRepoBranches() {
if (!this.#validGithubUrl() || !this.author || !this.project) return [];
await this.#validateAccessToken(); // Ensure API access token is valid for pre-flight
Expand Down Expand Up @@ -151,6 +180,11 @@ class RepoLoader {
return this.#branchPrefSort(this.branches);
}

/**
* Fetches the content of a single file from the repository.
* @param {string} sourceFilePath - The path to the file in the repository.
* @returns {Promise<string|null>} The content of the file, or null if fetching fails.
*/
async fetchSingleFile(sourceFilePath) {
try {
return fetch(
Expand Down Expand Up @@ -182,4 +216,4 @@ class RepoLoader {
}
}

module.exports = RepoLoader;
module.exports = GitHubRepoLoader;
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
const { writeToServerDocuments } = require("../../../files");
const { tokenizeString } = require("../../../tokenizer");

/**
* Load in a Github Repo recursively or just the top level if no PAT is provided
Expand Down Expand Up @@ -42,7 +42,7 @@ async function loadGithubRepo(args, response) {
process.env.NODE_ENV === "development"
? path.resolve(
__dirname,
`../../../../server/storage/documents/${outFolder}`
`../../../../../server/storage/documents/${outFolder}`
)
: path.resolve(process.env.STORAGE_DIR, `documents/${outFolder}`);

Expand Down
Loading