θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion collector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"lint": "yarn prettier --ignore-path ../.prettierignore --write ./processSingleFile ./processLink ./utils index.js"
},
"dependencies": {
"@langchain/community": "^0.2.23",
"@xenova/transformers": "^2.11.0",
"bcrypt": "^5.1.0",
"body-parser": "^1.20.2",
Expand Down Expand Up @@ -48,4 +49,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,26 +104,25 @@ class GitHubRepoLoader {
async recursiveLoader() {
if (!this.ready) throw new Error("[Github Loader]: not in ready state!");
const {
GithubRepoLoader: LCGithubLoader,
} = require("langchain/document_loaders/web/github");
GithubRepoLoader,
} = require("@langchain/community/document_loaders/web/github");

if (this.accessToken)
console.log(
`[Github Loader]: Access token set! Recursive loading enabled!`
);

const loader = new LCGithubLoader(this.repo, {
accessToken: this.accessToken,
const loader = new GithubRepoLoader(this.repo, {
branch: this.branch,
recursive: !!this.accessToken, // Recursive will hit rate limits.
maxConcurrency: 5,
unknown: "ignore",
unknown: "warn",
accessToken: this.accessToken,
ignorePaths: this.ignorePaths,
verbose: true,
});

const docs = [];
for await (const doc of loader.loadAsStream()) docs.push(doc);
const docs = await loader.load();
return docs;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const minimatch = require("minimatch");
const ignore = require("ignore");

/**
* @typedef {Object} RepoLoaderArgs
Expand Down Expand Up @@ -34,6 +34,7 @@ class GitLabRepoLoader {
this.branch = args?.branch;
this.accessToken = args?.accessToken || null;
this.ignorePaths = args?.ignorePaths || [];
this.ignoreFilter = ignore().add(this.ignorePaths);
this.withIssues = args?.fetchIssues || false;

this.projectId = null;
Expand Down Expand Up @@ -137,7 +138,7 @@ class GitLabRepoLoader {
console.log(`[Gitlab Loader]: Fetched ${files.length} files.`);

for (const file of files) {
if (this.ignorePaths.some((path) => file.path.includes(path))) continue;
if (this.ignoreFilter.ignores(file.path)) continue;

docs.push({
pageContent: file.content,
Expand Down Expand Up @@ -216,13 +217,8 @@ class GitLabRepoLoader {
// Fetch all the files that are not ignored in parallel.
pagePromises = filesPage
.filter((file) => {
if (file.type !== "blob") {
return false;
}
const isIgnored = this.ignorePaths.some((ignorePattern) =>
minimatch(file.path, ignorePattern, { matchBase: true })
);
return !isIgnored;
if (file.type !== "blob") return false;
return !this.ignoreFilter.ignores(file.path);
})
.map(async (file) => {
const content = await this.fetchSingleFileContents(file.path);
Expand Down
109 changes: 109 additions & 0 deletions collector/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@
resolved "https://registry.yarnpkg.com/@huggingface/jinja/-/jinja-0.2.2.tgz#faeb205a9d6995089bef52655ddd8245d3190627"
integrity sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==

"@langchain/community@^0.2.23":
version "0.2.23"
resolved "https://registry.yarnpkg.com/@langchain/community/-/community-0.2.23.tgz#20560e107bcc8432c42e499f1b9292d41a3732f2"
integrity sha512-p1n/zZ1F+O5l51RzeoUeJyhpzq6Wp11tkqKOj8oThKOQJgLhO7q6iFIvmKThzL7mZCNNuPM5r1OPnU4wO6iF/A==
dependencies:
"@langchain/core" ">=0.2.16 <0.3.0"
"@langchain/openai" ">=0.1.0 <0.3.0"
binary-extensions "^2.2.0"
expr-eval "^2.0.2"
flat "^5.0.2"
js-yaml "^4.1.0"
langchain "~0.2.3"
langsmith "~0.1.30"
uuid "^10.0.0"
zod "^3.22.3"
zod-to-json-schema "^3.22.5"

"@langchain/community@~0.0.47":
version "0.0.53"
resolved "https://registry.yarnpkg.com/@langchain/community/-/community-0.0.53.tgz#a9aaedffa0ed2977e8d302d74e9f90a49a6da037"
Expand All @@ -78,6 +95,23 @@
zod "^3.22.3"
zod-to-json-schema "^3.22.5"

"@langchain/core@>=0.2.11 <0.3.0", "@langchain/core@>=0.2.16 <0.3.0":
version "0.2.20"
resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.2.20.tgz#5115781b0a86db3ce4b697e473405892c09621ca"
integrity sha512-WPBjrzOj79/yqjloDUIw1GDhuRQfHis07TyyDj+qS81nHh0svSasetKcqAZ3L5JoPcBmEL7rRBtM+OcyC3mLVg==
dependencies:
ansi-styles "^5.0.0"
camelcase "6"
decamelize "1.2.0"
js-tiktoken "^1.0.12"
langsmith "~0.1.39"
mustache "^4.2.0"
p-queue "^6.6.2"
p-retry "4"
uuid "^10.0.0"
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

"@langchain/core@~0.1", "@langchain/core@~0.1.56", "@langchain/core@~0.1.60":
version "0.1.61"
resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.1.61.tgz#9313363e04f1c6981a938b2909c44ce6fceb2736"
Expand All @@ -96,6 +130,17 @@
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

"@langchain/openai@>=0.1.0 <0.3.0":
version "0.2.5"
resolved "https://registry.yarnpkg.com/@langchain/openai/-/openai-0.2.5.tgz#e85b983986a7415ea743d4c854bb0674134334d4"
integrity sha512-gQXS5VBFyAco0jgSnUVan6fYVSIxlffmDaeDGpXrAmz2nQPgiN/h24KYOt2NOZ1zRheRzRuO/CfRagMhyVUaFA==
dependencies:
"@langchain/core" ">=0.2.16 <0.3.0"
js-tiktoken "^1.0.12"
openai "^4.49.1"
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

"@langchain/openai@~0.0.28":
version "0.0.28"
resolved "https://registry.yarnpkg.com/@langchain/openai/-/openai-0.0.28.tgz#afaeec61b44816935db9ae937496c964c81ab571"
Expand Down Expand Up @@ -1769,6 +1814,13 @@ js-tiktoken@^1.0.11, js-tiktoken@^1.0.7, js-tiktoken@^1.0.8:
dependencies:
base64-js "^1.5.1"

js-tiktoken@^1.0.12:
version "1.0.12"
resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.12.tgz#af0f5cf58e5e7318240d050c8413234019424211"
integrity sha512-L7wURW1fH9Qaext0VzaUDpFGVQgjkdE3Dgsy9/+yXyGEpBKnylTd0mU0bfbNkKDlXRb6TEsZkwuflu1B8uQbJQ==
dependencies:
base64-js "^1.5.1"

js-tokens@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
Expand Down Expand Up @@ -1844,6 +1896,28 @@ langchain@0.1.36:
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

langchain@~0.2.3:
version "0.2.12"
resolved "https://registry.yarnpkg.com/langchain/-/langchain-0.2.12.tgz#3fac0b9519a070689b6dd679d5854abc57824dcf"
integrity sha512-ZHtJrHUpridZ7IQu7N/wAQ6iMAAO7VLzkupHqKP79S6p+alrPbn1BjRnh+PeGm92YiY5DafTCuvchmujxx7bCQ==
dependencies:
"@langchain/core" ">=0.2.11 <0.3.0"
"@langchain/openai" ">=0.1.0 <0.3.0"
"@langchain/textsplitters" "~0.0.0"
binary-extensions "^2.2.0"
js-tiktoken "^1.0.12"
js-yaml "^4.1.0"
jsonpointer "^5.0.1"
langchainhub "~0.0.8"
langsmith "~0.1.30"
ml-distance "^4.0.0"
openapi-types "^12.1.3"
p-retry "4"
uuid "^10.0.0"
yaml "^2.2.1"
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

langchainhub@~0.0.8:
version "0.0.8"
resolved "https://registry.yarnpkg.com/langchainhub/-/langchainhub-0.0.8.tgz#fd4b96dc795e22e36c1a20bad31b61b0c33d3110"
Expand All @@ -1860,6 +1934,18 @@ langsmith@~0.1.1, langsmith@~0.1.7:
p-retry "4"
uuid "^9.0.0"

langsmith@~0.1.30, langsmith@~0.1.39:
version "0.1.40"
resolved "https://registry.yarnpkg.com/langsmith/-/langsmith-0.1.40.tgz#9708889386a5b9d0eb43dd3a9eba93513b57101d"
integrity sha512-11E2WLbh/+41+Qc0w8fJJTC/iz91BA+zXRMX/Wz0KSstnfzIPBoiWa++Kp2X8yCIDNywWWLJhy/B8gYzm7VKig==
dependencies:
"@types/uuid" "^9.0.1"
commander "^10.0.1"
p-queue "^6.6.2"
p-retry "4"
semver "^7.6.3"
uuid "^9.0.0"

leac@^0.6.0:
version "0.6.0"
resolved "https://registry.yarnpkg.com/leac/-/leac-0.6.0.tgz#dcf136e382e666bd2475f44a1096061b70dc0912"
Expand Down Expand Up @@ -2417,6 +2503,19 @@ openai@^4.32.1:
node-fetch "^2.6.7"
web-streams-polyfill "^3.2.1"

openai@^4.49.1:
version "4.54.0"
resolved "https://registry.yarnpkg.com/openai/-/openai-4.54.0.tgz#eeb209c6892b997e524181b6ddb7e27bf4d09389"
integrity sha512-e/12BdtTtj+tXs7iHm+Dm7H7WjEWnw7O52B2wSfCQ6lD5F6cvjzo7cANXy5TJ1Q3/qc8YRPT5wBTTFtP5sBp1g==
dependencies:
"@types/node" "^18.11.18"
"@types/node-fetch" "^2.6.4"
abort-controller "^3.0.0"
agentkeepalive "^4.2.1"
form-data-encoder "1.7.2"
formdata-node "^4.3.2"
node-fetch "^2.6.7"

openapi-types@^12.1.3:
version "12.1.3"
resolved "https://registry.yarnpkg.com/openapi-types/-/openapi-types-12.1.3.tgz#471995eb26c4b97b7bd356aacf7b91b73e777dd3"
Expand Down Expand Up @@ -2863,6 +2962,11 @@ semver@^7.3.5, semver@^7.5.4:
dependencies:
lru-cache "^6.0.0"

semver@^7.6.3:
version "7.6.3"
resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143"
integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==

semver@~7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e"
Expand Down Expand Up @@ -3336,6 +3440,11 @@ utils-merge@1.0.1:
resolved "https://registry.yarnpkg.com/utils-merge/-/utils-merge-1.0.1.tgz#9f95710f50a267947b2ccc124741c1028427e713"
integrity sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==

uuid@^10.0.0:
version "10.0.0"
resolved "https://registry.yarnpkg.com/uuid/-/uuid-10.0.0.tgz#5a95aa454e6e002725c79055fd42aaba30ca6294"
integrity sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==

uuid@^9.0.0:
version "9.0.1"
resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.1.tgz#e188d4c8853cc722220392c424cd637f32293f30"
Expand Down