这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions collector/processLink/convert/generic.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ async function scrapeGenericUrl({
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`url-${slugify(filename)}-${data.id}`
);
filename: `url-${slugify(filename)}-${data.id}`,
});
console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
}
Expand Down
6 changes: 3 additions & 3 deletions collector/processRawText/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ async function processRawText(textContent, metadata) {
token_count_estimate: tokenizeString(textContent),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`raw-${stripAndSlug(metadata.title)}-${data.id}`
);
filename: `raw-${stripAndSlug(metadata.title)}-${data.id}`,
});
console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
}
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asAudio.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asDocx.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asEPub.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asImage.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asMbox.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
};

item++;
const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}-msg-${item}`
);
filename: `${slugify(filename)}-${data.id}-msg-${item}`,
});
documents.push(document);
}

Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asOfficeMime.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asPDF/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
6 changes: 3 additions & 3 deletions collector/processSingleFile/convert/asTxt.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
const document = writeToServerDocuments({
data,
`${slugify(filename)}-${data.id}`
);
filename: `${slugify(filename)}-${data.id}`,
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] };
Expand Down
10 changes: 5 additions & 5 deletions collector/processSingleFile/convert/asXlsx.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content),
};

const document = writeToServerDocuments(
sheetData,
`sheet-${slugify(name)}`,
outFolderPath
);
const document = writeToServerDocuments({
data: sheetData,
filename: `sheet-${slugify(name)}`,
destinationOverride: outFolderPath,
});
documents.push(document);
console.log(
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.`
Expand Down
6 changes: 5 additions & 1 deletion collector/utils/extensions/Confluence/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,11 @@ async function loadConfluence(
const fileName = sanitizeFileName(
`${slugify(doc.metadata.title)}-${data.id}`
);
writeToServerDocuments(data, fileName, outFolderPath);
writeToServerDocuments({
data,
filename: fileName,
destinationOverride: outFolderPath,
});
});

return {
Expand Down
6 changes: 5 additions & 1 deletion collector/utils/extensions/DrupalWiki/DrupalWiki/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,11 @@ class DrupalWiki {
console.log(
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
);
writeToServerDocuments(data, fileName, this.storagePath);
writeToServerDocuments({
data,
filename: fileName,
destinationOverride: this.storagePath,
});
}

/**
Expand Down
6 changes: 5 additions & 1 deletion collector/utils/extensions/ObsidianVault/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ async function loadObsidianVault({ files = [] }) {
const targetFileName = sanitizeFileName(
`${slugify(file.name)}-${data.id}`
);
writeToServerDocuments(data, targetFileName, outFolderPath);
writeToServerDocuments({
data,
filename: targetFileName,
destinationOverride: outFolderPath,
});
results.push({ file: file.path, status: "success" });
} catch (e) {
console.error(`Failed to process ${file.path}:`, e);
Expand Down
8 changes: 4 additions & 4 deletions collector/utils/extensions/RepoLoader/GithubRepo/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ async function loadGithubRepo(args, response) {
console.log(
`[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}`
);
writeToServerDocuments(
writeToServerDocuments({
data,
`${slugify(doc.metadata.source)}-${data.id}`,
outFolderPath
);
filename: `${slugify(doc.metadata.source)}-${data.id}`,
destinationOverride: outFolderPath,
});
}

return {
Expand Down
8 changes: 4 additions & 4 deletions collector/utils/extensions/RepoLoader/GitlabRepo/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ async function loadGitlabRepo(args, response) {
`[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}`
);

writeToServerDocuments(
writeToServerDocuments({
data,
sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
outFolderPath
);
filename: sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
destinationOverride: outFolderPath,
});
}

return {
Expand Down
6 changes: 5 additions & 1 deletion collector/utils/extensions/WebsiteDepth/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ async function bulkScrapePages(links, outFolderPath) {
token_count_estimate: tokenizeString(content),
};

writeToServerDocuments(data, data.title, outFolderPath);
writeToServerDocuments({
data,
filename: data.title,
destinationOverride: outFolderPath,
});
scrapedData.push(data);

console.log(`Successfully scraped ${link}.`);
Expand Down
8 changes: 4 additions & 4 deletions collector/utils/extensions/YoutubeTranscript/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ async function loadYouTubeTranscript({ url }) {
};

console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
writeToServerDocuments(
writeToServerDocuments({
data,
sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
outFolderPath
);
filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
destinationOverride: outFolderPath,
});

return {
success: true,
Expand Down
18 changes: 14 additions & 4 deletions collector/utils/files/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,21 @@ function createdDate(filepath) {
}
}

function writeToServerDocuments(
/**
* Writes a document to the server documents folder.
* @param {Object} params - The parameters for the function.
* @param {Object} params.data - The data to write to the file. Must look like a document object.
* @param {string} params.filename - The name of the file to write to.
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
* @returns {Object} - The data with the location added.
*/
function writeToServerDocuments({
data = {},
filename,
destinationOverride = null
) {
filename = null,
destinationOverride = null,
}) {
if (!filename) throw new Error("Filename is required!");

let destination = null;
if (destinationOverride) destination = path.resolve(destinationOverride);
else destination = path.resolve(documentsFolder, "custom-documents");
Expand Down