θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions .github/workflows/build-and-push-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ jobs:
shell: bash
run: echo "repo=${GITHUB_REPOSITORY,,}" >> $GITHUB_OUTPUT
id: lowercase_repo

- name: Check if DockerHub build needed
shell: bash
run: |
# Check if the secret for USERNAME is set (don't even check for the password)
if [[ -z "${{ secrets.DOCKER_USERNAME }}" ]]; then
echo "DockerHub build not needed"
echo "enabled=false" >> $GITHUB_OUTPUT
else
echo "DockerHub build needed"
echo "enabled=true" >> $GITHUB_OUTPUT
fi
id: dockerhub

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
Expand All @@ -45,6 +58,8 @@ jobs:

- name: Log in to Docker Hub
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
# Only login to the Docker Hub if the repo is mintplex/anythingllm, to allow for forks to build on GHCR
if: steps.dockerhub.outputs.enabled == 'true'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
Expand All @@ -61,18 +76,24 @@ jobs:
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: |
mintplexlabs/anythingllm
${{ steps.dockerhub.outputs.enabled == 'true' && 'mintplexlabs/anythingllm' || '' }}
ghcr.io/${{ github.repository }}

tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=sha
type=ref,event=branch
type=ref,event=tag
type=ref,event=pr


- name: Build and push multi-platform Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./docker/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
${{ steps.meta.outputs.tags }}
${{ github.ref_name == 'master' && 'mintplexlabs/anythingllm:latest' || '' }}
${{ github.ref_name == 'master' && format('ghcr.io/{0}:{1}', steps.lowercase_repo.outputs.repo, 'latest') || '' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
4 changes: 2 additions & 2 deletions collector/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ app.post("/process", async function (request, response) {
const targetFilename = path
.normalize(filename)
.replace(/^(\.\.(\/|\\|$))+/, "");
const { success, reason } = await processSingleFile(targetFilename);
response.status(200).json({ filename: targetFilename, success, reason });
const {document, success, reason } = await processSingleFile(targetFilename);
response.status(200).json({ document: document, success, reason });
} catch (e) {
console.error(e);
response.status(200).json({
Expand Down
4 changes: 3 additions & 1 deletion collector/processSingleFile/convert/asAudio.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,14 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content).length,
};

const { pageContent, token_count_estimate, ...responseData } = data;

writeToServerDocuments(data, `${slugify(filename)}-${data.id}`);
trashFile(fullFilePath);
console.log(
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`
);
return { success: true, reason: null };
return { success: true, reason: null, document: data };
}

async function convertToWavAudioData(sourcePath) {
Expand Down
12 changes: 7 additions & 5 deletions collector/processSingleFile/convert/asDocx.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
const loader = new DocxLoader(fullFilePath);

console.log(`-- Working ${filename} --`);
let pageContent = [];
let docxPageContent = [];
const docs = await loader.load();
for (const doc of docs) {
console.log(doc.metadata);
console.log(`-- Parsing content from docx page --`);
if (!doc.pageContent.length) continue;
pageContent.push(doc.pageContent);
docxPageContent.push(doc.pageContent);
}

if (!pageContent.length) {
if (!docxPageContent.length) {
console.error(`Resulting text content was empty for ${filename}.`);
trashFile(fullFilePath);
return { success: false, reason: `No text content found in ${filename}.` };
}

const content = pageContent.join("");
const content = docxPageContent.join("");
const data = {
id: v4(),
url: "file://" + fullFilePath,
Expand All @@ -42,10 +42,12 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content).length,
};

const { pageContent, token_count_estimate, ...responseData } = data;

writeToServerDocuments(data, `${slugify(filename)}-${data.id}`);
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null };
return { success: true, reason: null, document: data };
}

module.exports = asDocX;
4 changes: 3 additions & 1 deletion collector/processSingleFile/convert/asMbox.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
writeToServerDocuments(data, `${slugify(filename)}-${data.id}-msg-${item}`);
}

const { pageContent, token_count_estimate, ...responseData } = data;

trashFile(fullFilePath);
console.log(
`[SUCCESS]: ${filename} messages converted & ready for embedding.\n`
);
return { success: true, reason: null };
return { success: true, reason: null, document: data };
}

module.exports = asMbox;
4 changes: 3 additions & 1 deletion collector/processSingleFile/convert/asOfficeMime.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content).length,
};

const { pageContent, token_count_estimate, ...responseData } = data;

writeToServerDocuments(data, `${slugify(filename)}-${data.id}`);
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null };
return { success: true, reason: null, document: data };
}

module.exports = asOfficeMime;
13 changes: 8 additions & 5 deletions collector/processSingleFile/convert/asPDF.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
});

console.log(`-- Working ${filename} --`);
const pageContent = [];
const pdfPageContent = [];
const docs = await pdfLoader.load();
for (const doc of docs) {
console.log(
Expand All @@ -23,16 +23,16 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
} --`
);
if (!doc.pageContent.length) continue;
pageContent.push(doc.pageContent);
pdfPageContent.push(doc.pageContent);
}

if (!pageContent.length) {
if (!pdfPageContent.length) {
console.error(`Resulting text content was empty for ${filename}.`);
trashFile(fullFilePath);
return { success: false, reason: `No text content found in ${filename}.` };
}

const content = pageContent.join("");
const content = pdfPageContent.join("");
const data = {
id: v4(),
url: "file://" + fullFilePath,
Expand All @@ -47,10 +47,13 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content).length,
};

const { pageContent, token_count_estimate, ...responseData } = data;

writeToServerDocuments(data, `${slugify(filename)}-${data.id}`);
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null };

return { success: true, reason: null, document: responseData };
}

module.exports = asPDF;
5 changes: 4 additions & 1 deletion collector/processSingleFile/convert/asTxt.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content).length,
};

const { pageContent, token_count_estimate, ...responseData } = data;

writeToServerDocuments(data, `${slugify(filename)}-${data.id}`);
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null };

return { success: true, reason: null, document: responseData};
}

module.exports = asTxt;
34 changes: 23 additions & 11 deletions server/endpoints/api/document/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function apiDocumentEndpoints(app) {
[validApiKey],
handleUploads.single("file"),
async (request, response) => {
/*
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.'

Expand Down Expand Up @@ -47,11 +47,22 @@ function apiDocumentEndpoints(app) {
example: {
success: true,
error: null,
"document": {
"id": "115f2bab-957b-42e7-b5d0-16cac2379bce",
"url": "file:///home/user/Workspace/anything-llm/collector/hotdir/file.txt",
"title": "file.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "Bfile.txt",
"published": "11/01/2024, 16:25:09",
"wordCount": 17653
}
}
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
Expand All @@ -72,16 +83,17 @@ function apiDocumentEndpoints(app) {
.end();
}

const { success, reason } = await processDocument(originalname);
const { document, success, reason } = await processDocument(originalname);
if (!success) {
response.status(500).json({ success: false, error: reason }).end();
}


console.log(
`Document ${originalname} uploaded processed and successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("document_uploaded");
response.status(200).json({ success: true, error: null });
response.status(200).json({ success: success, error: null, document: document});
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
Expand All @@ -90,7 +102,7 @@ function apiDocumentEndpoints(app) {
);

app.get("/v1/documents", [validApiKey], async (_, response) => {
/*
/*
#swagger.tags = ['Documents']
#swagger.description = 'List of all locally-stored documents in instance'
#swagger.responses[200] = {
Expand All @@ -115,9 +127,9 @@ function apiDocumentEndpoints(app) {
}
}
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
Expand All @@ -137,7 +149,7 @@ function apiDocumentEndpoints(app) {
"/v1/document/accepted-file-types",
[validApiKey],
async (_, response) => {
/*
/*
#swagger.tags = ['Documents']
#swagger.description = 'Check available filetypes and MIMEs that can be uploaded.'
#swagger.responses[200] = {
Expand Down Expand Up @@ -166,9 +178,9 @@ function apiDocumentEndpoints(app) {
}
}
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
Expand Down
13 changes: 12 additions & 1 deletion server/swagger/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,18 @@
"type": "object",
"example": {
"success": true,
"error": null
"error": null,
"document": {
"id": "115f2bab-957b-42e7-b5d0-16cac2379bce",
"url": "file://home/user/Workspace/anything-llm/collector/hotdir/file.txt",
"title": "file.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "Bfile.txt",
"published": "11/01/2024, 16:25:09",
"wordCount": 17653
}
}
}
}
Expand Down