Mintplex-Labs · timothycarambat · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/collector/index.js b/collector/index.js
@@ -25,7 +25,7 @@ app.use(
 );
 
 app.post("/process", async function (request, response) {
-  const { filename } = reqBody(request);
+  const { filename, options = {} } = reqBody(request);
   try {
     const targetFilename = path
       .normalize(filename)
@@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
       success,
       reason,
       documents = [],
-    } = await processSingleFile(targetFilename);
+    } = await processSingleFile(targetFilename, options);
     response
       .status(200)
       .json({ filename: targetFilename, success, reason, documents });

diff --git a/collector/package.json b/collector/package.json
@@ -33,6 +33,7 @@
     "moment": "^2.29.4",
     "multer": "^1.4.5-lts.1",
     "officeparser": "^4.0.5",
+    "openai": "^3.2.1",
     "pdf-parse": "^1.1.1",
     "puppeteer": "~21.5.2",
     "slugify": "^1.6.6",
@@ -46,4 +47,4 @@
     "nodemon": "^2.0.22",
     "prettier": "^2.4.1"
   }
-}
+}
diff --git a/collector/processSingleFile/convert/asAudio.js b/collector/processSingleFile/convert/asAudio.js
@@ -1,5 +1,3 @@
-const fs = require("fs");
-const path = require("path");
 const { v4 } = require("uuid");
 const {
   createdDate,
@@ -9,39 +7,35 @@ const {
 const { tokenizeString } = require("../../utils/tokenizer");
 const { default: slugify } = require("slugify");
 const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
+const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
 
-async function asAudio({ fullFilePath = "", filename = "" }) {
-  const whisper = new LocalWhisper();
+const WHISPER_PROVIDERS = {
+  openai: OpenAiWhisper,
+  local: LocalWhisper,
+};
+
+async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
+  const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
+    options?.whisperProvider
+  )
+    ? WHISPER_PROVIDERS[options?.whisperProvider]
+    : WHISPER_PROVIDERS.local;
 
   console.log(`-- Working ${filename} --`);
-  const transcriberPromise = new Promise((resolve) =>
-    whisper.client().then((client) => resolve(client))
-  );
-  const audioDataPromise = new Promise((resolve) =>
-    convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
-  );
-  const [audioData, transcriber] = await Promise.all([
-    audioDataPromise,
-    transcriberPromise,
-  ]);
+  const whisper = new WhisperProvider({ options });
+  const { content, error } = await whisper.processFile(fullFilePath, filename);
 
-  if (!audioData) {
-    console.error(`Failed to parse content from ${filename}.`);
+  if (!!error) {
+    console.error(`Error encountered for parsing of ${filename}.`);
     trashFile(fullFilePath);
     return {
       success: false,
-      reason: `Failed to parse content from ${filename}.`,
+      reason: error,
       documents: [],
     };
   }
 
-  console.log(`[Model Working]: Transcribing audio data to text`);
-  const { text: content } = await transcriber(audioData, {
-    chunk_length_s: 30,
-    stride_length_s: 5,
-  });
-
-  if (!content.length) {
+  if (!content?.length) {
     console.error(`Resulting text content was empty for ${filename}.`);
     trashFile(fullFilePath);
     return {
@@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
   return { success: true, reason: null, documents: [document] };
 }
 
-async function convertToWavAudioData(sourcePath) {
-  try {
-    let buffer;
-    const wavefile = require("wavefile");
-    const ffmpeg = require("fluent-ffmpeg");
-    const outFolder = path.resolve(__dirname, `../../storage/tmp`);
-    if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
-
-    const fileExtension = path.extname(sourcePath).toLowerCase();
-    if (fileExtension !== ".wav") {
-      console.log(
-        `[Conversion Required] ${fileExtension} file detected - converting to .wav`
-      );
-      const outputFile = path.resolve(outFolder, `${v4()}.wav`);
-      const convert = new Promise((resolve) => {
-        ffmpeg(sourcePath)
-          .toFormat("wav")
-          .on("error", (error) => {
-            console.error(`[Conversion Error] ${error.message}`);
-            resolve(false);
-          })
-          .on("progress", (progress) =>
-            console.log(
-              `[Conversion Processing]: ${progress.targetSize}KB converted`
-            )
-          )
-          .on("end", () => {
-            console.log("[Conversion Complete]: File converted to .wav!");
-            resolve(true);
-          })
-          .save(outputFile);
-      });
-      const success = await convert;
-      if (!success)
-        throw new Error(
-          "[Conversion Failed]: Could not convert file to .wav format!"
-        );
-
-      const chunks = [];
-      const stream = fs.createReadStream(outputFile);
-      for await (let chunk of stream) chunks.push(chunk);
-      buffer = Buffer.concat(chunks);
-      fs.rmSync(outputFile);
-    } else {
-      const chunks = [];
-      const stream = fs.createReadStream(sourcePath);
-      for await (let chunk of stream) chunks.push(chunk);
-      buffer = Buffer.concat(chunks);
-    }
-
-    const wavFile = new wavefile.WaveFile(buffer);
-    wavFile.toBitDepth("32f");
-    wavFile.toSampleRate(16000);
-
-    let audioData = wavFile.getSamples();
-    if (Array.isArray(audioData)) {
-      if (audioData.length > 1) {
-        const SCALING_FACTOR = Math.sqrt(2);
-
-        // Merge channels into first channel to save memory
-        for (let i = 0; i < audioData[0].length; ++i) {
-          audioData[0][i] =
-            (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
-        }
-      }
-      audioData = audioData[0];
-    }
-
-    return audioData;
-  } catch (error) {
-    console.error(`convertToWavAudioData`, error);
-    return null;
-  }
-}
-
 module.exports = asAudio;
diff --git a/collector/processSingleFile/index.js b/collector/processSingleFile/index.js
@@ -7,7 +7,7 @@ const {
 const { trashFile, isTextType } = require("../utils/files");
 const RESERVED_FILES = ["__HOTDIR__.md"];
 
-async function processSingleFile(targetFilename) {
+async function processSingleFile(targetFilename, options = {}) {
   const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
   if (RESERVED_FILES.includes(targetFilename))
     return {
@@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
   return await FileTypeProcessor({
     fullFilePath,
     filename: targetFilename,
+    options,
   });
 }
 

diff --git a/collector/utils/WhisperProviders/OpenAiWhisper.js b/collector/utils/WhisperProviders/OpenAiWhisper.js
@@ -0,0 +1,44 @@
+const fs = require("fs");
+
+class OpenAiWhisper {
+  constructor({ options }) {
+    const { Configuration, OpenAIApi } = require("openai");
+    if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
+
+    const config = new Configuration({
+      apiKey: options.openAiKey,
+    });
+    this.openai = new OpenAIApi(config);
+    this.model = "whisper-1";
+    this.temperature = 0;
+    this.#log("Initialized.");
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
+  }
+
+  async processFile(fullFilePath) {
+    return await this.openai
+      .createTranscription(
+        fs.createReadStream(fullFilePath),
+        this.model,
+        undefined,
+        "text",
+        this.temperature
+      )
+      .then((res) => {
+        if (res.hasOwnProperty("data"))
+          return { content: res.data, error: null };
+        return { content: "", error: "No content was able to be transcribed." };
+      })
+      .catch((e) => {
+        this.#log(`Could not get any response from openai whisper`, e.message);
+        return { content: "", error: e.message };
+      });
+  }
+}
+
+module.exports = {
+  OpenAiWhisper,
+};
diff --git a/collector/utils/WhisperProviders/localWhisper.js b/collector/utils/WhisperProviders/localWhisper.js
@@ -1,5 +1,6 @@
-const path = require("path");
 const fs = require("fs");
+const path = require("path");
+const { v4 } = require("uuid");
 
 class LocalWhisper {
   constructor() {
@@ -16,12 +17,94 @@ class LocalWhisper {
     // Make directory when it does not exist in existing installations
     if (!fs.existsSync(this.cacheDir))
       fs.mkdirSync(this.cacheDir, { recursive: true });
+
+    this.#log("Initialized.");
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
+  }
+
+  async #convertToWavAudioData(sourcePath) {
+    try {
+      let buffer;
+      const wavefile = require("wavefile");
+      const ffmpeg = require("fluent-ffmpeg");
+      const outFolder = path.resolve(__dirname, `../../storage/tmp`);
+      if (!fs.existsSync(outFolder))
+        fs.mkdirSync(outFolder, { recursive: true });
+
+      const fileExtension = path.extname(sourcePath).toLowerCase();
+      if (fileExtension !== ".wav") {
+        this.#log(
+          `File conversion required! ${fileExtension} file detected - converting to .wav`
+        );
+        const outputFile = path.resolve(outFolder, `${v4()}.wav`);
+        const convert = new Promise((resolve) => {
+          ffmpeg(sourcePath)
+            .toFormat("wav")
+            .on("error", (error) => {
+              this.#log(`Conversion Error! ${error.message}`);
+              resolve(false);
+            })
+            .on("progress", (progress) =>
+              this.#log(
+                `Conversion Processing! ${progress.targetSize}KB converted`
+              )
+            )
+            .on("end", () => {
+              this.#log(`Conversion Complete! File converted to .wav!`);
+              resolve(true);
+            })
+            .save(outputFile);
+        });
+        const success = await convert;
+        if (!success)
+          throw new Error(
+            "[Conversion Failed]: Could not convert file to .wav format!"
+          );
+
+        const chunks = [];
+        const stream = fs.createReadStream(outputFile);
+        for await (let chunk of stream) chunks.push(chunk);
+        buffer = Buffer.concat(chunks);
+        fs.rmSync(outputFile);
+      } else {
+        const chunks = [];
+        const stream = fs.createReadStream(sourcePath);
+        for await (let chunk of stream) chunks.push(chunk);
+        buffer = Buffer.concat(chunks);
+      }
+
+      const wavFile = new wavefile.WaveFile(buffer);
+      wavFile.toBitDepth("32f");
+      wavFile.toSampleRate(16000);
+
+      let audioData = wavFile.getSamples();
+      if (Array.isArray(audioData)) {
+        if (audioData.length > 1) {
+          const SCALING_FACTOR = Math.sqrt(2);
+
+          // Merge channels into first channel to save memory
+          for (let i = 0; i < audioData[0].length; ++i) {
+            audioData[0][i] =
+              (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
+          }
+        }
+        audioData = audioData[0];
+      }
+
+      return audioData;
+    } catch (error) {
+      console.error(`convertToWavAudioData`, error);
+      return null;
+    }
   }
 
   async client() {
     if (!fs.existsSync(this.modelPath)) {
-      console.log(
-        "\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n"
+      this.#log(
+        `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
       );
     }
 
@@ -48,10 +131,45 @@ class LocalWhisper {
           : {}),
       });
     } catch (error) {
-      console.error("Failed to load the native whisper model:", error);
+      this.#log("Failed to load the native whisper model:", error);
       throw error;
     }
   }
+
+  async processFile(fullFilePath, filename) {
+    try {
+      const transcriberPromise = new Promise((resolve) =>
+        this.client().then((client) => resolve(client))
+      );
+      const audioDataPromise = new Promise((resolve) =>
+        this.#convertToWavAudioData(fullFilePath).then((audioData) =>
+          resolve(audioData)
+        )
+      );
+      const [audioData, transcriber] = await Promise.all([
+        audioDataPromise,
+        transcriberPromise,
+      ]);
+
+      if (!audioData) {
+        this.#log(`Failed to parse content from ${filename}.`);
+        return {
+          content: null,
+          error: `Failed to parse content from ${filename}.`,
+        };
+      }
+
+      this.#log(`Transcribing audio data to text...`);
+      const { text } = await transcriber(audioData, {
+        chunk_length_s: 30,
+        stride_length_s: 5,
+      });
+
+      return { content: text, error: null };
+    } catch (error) {
+      return { content: null, error: error.message };
+    }
+  }
 }
 
 module.exports = {