From 78259f26c5ac9fe4b2f4221b1decc9935ecea890 Mon Sep 17 00:00:00 2001
From: Vaibhav Raj <vaibhavmonu8309@gmail.com>
Date: Tue, 9 Sep 2025 02:13:05 +0530
Subject: [PATCH 1/4] refactor: add convertBuffer to MarkItDown

---
 src/converters/bingserp.ts     |   7 ++-
 src/converters/docx.ts         |  25 ++++----
 src/converters/html.ts         |  16 +++--
 src/converters/image.ts        |  63 ++++++++++++--------
 src/converters/ipynb.ts        |   8 ++-
 src/converters/media.ts        |   6 +-
 src/converters/mp3.ts          |  74 ++++++++++++++---------
 src/converters/pdf.ts          |   4 +-
 src/converters/plain-text.ts   |   9 ++-
 src/converters/wav.ts          |  68 ++++++++++++---------
 src/converters/wikipedia.ts    |  17 ++++--
 src/converters/xlsx.ts         |  15 +++--
 src/converters/xml-rss-atom.ts |   7 ++-
 src/converters/youtube.ts      |   7 ++-
 src/converters/zip.ts          | 105 +++++++++++++++++----------------
 src/markitdown.ts              |  63 ++++++++++----------
 src/types.ts                   |   2 +-
 17 files changed, 290 insertions(+), 206 deletions(-)
diff --git a/src/converters/bingserp.ts b/src/converters/bingserp.ts
index 720f730..018e162 100644
--- a/src/converters/bingserp.ts
+++ b/src/converters/bingserp.ts
@@ -6,7 +6,7 @@ import { CustomTurnDown } from "../custom-turndown";
 
 export class BingSerpConverter implements DocumentConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -19,7 +19,10 @@ export class BingSerpConverter implements DocumentConverter {
     }
 
     try {
-      const htmlContent = fs.readFileSync(localPath, { encoding: "utf-8" });
+      const htmlContent =
+        typeof source === "string"
+          ? fs.readFileSync(source, { encoding: "utf-8" })
+          : Buffer.from(source).toString("utf-8");
       return this._convert(htmlContent, url);
     } catch (error) {
       console.error("Bing SERP Parsing Error:", error);
diff --git a/src/converters/docx.ts b/src/converters/docx.ts
index 6e16694..9c47ce4 100644
--- a/src/converters/docx.ts
+++ b/src/converters/docx.ts
@@ -4,25 +4,26 @@ import { HtmlConverter } from "./html";
 import Mammoth from "mammoth";
 
 export class DocxConverter extends HtmlConverter {
-  async convert(local_path: string, options: ConverterOptions): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult> {
     const fileExtension = options.file_extension || "";
     if (![".docx"].includes(fileExtension.toLowerCase())) {
       return null;
     }
 
     try {
-      let exists = fs.existsSync(local_path);
-      if (!exists) {
-        throw new Error("File does'nt exists");
-      }
-      let htmlContent = await Mammoth.convertToHtml(
-        {
-          path: local_path
-        },
-        {
-          ...options
+      let mammothInput: { path: string } | { buffer: Buffer };
+      if (typeof source === "string") {
+        if (!fs.existsSync(source)) {
+          throw new Error("File does'nt exists");
         }
-      );
+        mammothInput = { path: source };
+      } else {
+        mammothInput = { buffer: Buffer.from(source) };
+      }
+
+      let htmlContent = await Mammoth.convertToHtml(mammothInput, {
+        ...options
+      });
 
       return await this._convert(htmlContent.value);
     } catch (e) {
diff --git a/src/converters/html.ts b/src/converters/html.ts
index fd5c0ca..192507b 100644
--- a/src/converters/html.ts
+++ b/src/converters/html.ts
@@ -4,18 +4,24 @@ import { CustomTurnDown } from "../custom-turndown";
 import { ConverterOptions, ConverterResult, DocumentConverter } from "../types";
 
 export class HtmlConverter implements DocumentConverter {
-  async convert(local_path: string, options: ConverterOptions): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult> {
     const extension = options.file_extension || "";
     if (![".html", ".htm"].includes(extension.toLowerCase())) {
       return null;
     }
 
     try {
-      let exists = fs.existsSync(local_path);
-      if (!exists) {
-        throw new Error("File does'nt exists");
+      let content;
+      if (typeof source === "string") {
+        let exists = fs.existsSync(source);
+        if (!exists) {
+          throw new Error("File does'nt exists");
+        }
+        content = fs.readFileSync(source, { encoding: "utf-8" });
+      } else {
+        content = source.toString("utf-8");
       }
-      let content = fs.readFileSync(local_path, { encoding: "utf-8" });
+
       return await this._convert(content);
     } catch (e) {
       console.error(e);
diff --git a/src/converters/image.ts b/src/converters/image.ts
index 4bb8069..292d785 100644
--- a/src/converters/image.ts
+++ b/src/converters/image.ts
@@ -5,7 +5,7 @@ import { generateText } from "ai";
 
 export class ImageConverter extends MediaConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -14,49 +14,62 @@ export class ImageConverter extends MediaConverter {
     }
 
     try {
-      return this._convert(localPath, options);
+      return this._convert(source, options);
     } catch (error) {
       console.error("Image Conversion Error:", error);
       return null;
     }
   }
-  private async _convert(localPath: string, options: ConverterOptions): Promise<ConverterResult> {
+  private async _convert(
+    source: string | Buffer,
+    options: ConverterOptions
+  ): Promise<ConverterResult> {
     let mdContent = "";
 
-    const metadata = await this._getMetadata(localPath);
-    if (metadata) {
-      for (const f of [
-        "ImageSize",
-        "Title",
-        "Caption",
-        "Description",
-        "Keywords",
-        "Artist",
-        "Author",
-        "DateTimeOriginal",
-        "CreateDate",
-        "GPSPosition"
-      ]) {
-        if (metadata[f]) {
-          mdContent += `${f}: ${metadata[f]}\n`;
+    if (typeof source === "string") {
+      const metadata = await this._getMetadata(source);
+      if (metadata) {
+        for (const f of [
+          "ImageSize",
+          "Title",
+          "Caption",
+          "Description",
+          "Keywords",
+          "Artist",
+          "Author",
+          "DateTimeOriginal",
+          "CreateDate",
+          "GPSPosition"
+        ]) {
+          if (metadata[f]) {
+            mdContent += `${f}: ${metadata[f]}\n`;
+          }
         }
       }
+    } else {
+      console.warn(
+        "Metadata extraction is skipped for Buffer inputs as it requires a file path for exiftool."
+      );
     }
+
     if (options.llmModel) {
-      mdContent += `\n# Description:\n${(
-        await this._getLLMDescription(localPath, options)
-      ).trim()}\n`;
+      const imageBuffer =
+        typeof source === "string" ? fs.readFileSync(source) : Buffer.from(source);
+      mdContent += `\n# Description:\n${(await this._getLLMDescription(imageBuffer, options)).trim()}\n`;
     }
     return {
       title: null,
       text_content: mdContent.trim()
     };
   }
-  private async _getLLMDescription(localPath: string, options: ConverterOptions): Promise<string> {
+  private async _getLLMDescription(
+    imageBuffer: Buffer,
+    options: ConverterOptions
+  ): Promise<string> {
     if (!options.llmPrompt || options.llmPrompt.trim() === "") {
       options.llmPrompt = "Write a detailed caption for this image.";
     }
-    const imageFile = fs.readFileSync(localPath).toString("base64");
+    const imageFileAsBase64 = imageBuffer.toString("base64");
 
     const result = await generateText({
       model: options.llmModel!,
@@ -67,7 +80,7 @@ export class ImageConverter extends MediaConverter {
             { type: "text", text: options.llmPrompt },
             {
               type: "image",
-              image: imageFile
+              image: imageFileAsBase64
             }
           ]
         }
diff --git a/src/converters/ipynb.ts b/src/converters/ipynb.ts
index b5cb976..03d2290 100644
--- a/src/converters/ipynb.ts
+++ b/src/converters/ipynb.ts
@@ -3,7 +3,7 @@ import * as fs from "fs";
 
 export class IpynbConverter implements DocumentConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -11,7 +11,11 @@ export class IpynbConverter implements DocumentConverter {
       return null;
     }
     try {
-      const notebookContent = JSON.parse(fs.readFileSync(localPath, { encoding: "utf-8" }));
+      const contentStirng =
+        typeof source === "string"
+          ? fs.readFileSync(source, { encoding: "utf-8" })
+          : source.toString("utf-8");
+      const notebookContent = JSON.parse(contentStirng);
       return this._convert(notebookContent);
     } catch (error) {
       console.error("Error converting .ipynb file:", error);
diff --git a/src/converters/media.ts b/src/converters/media.ts
index d72ef43..57ff01c 100644
--- a/src/converters/media.ts
+++ b/src/converters/media.ts
@@ -5,16 +5,16 @@ import * as util from "util";
 const exec = util.promisify(childProcess.exec);
 
 export abstract class MediaConverter implements DocumentConverter {
-  abstract convert(localPath: string, options: ConverterOptions): Promise<ConverterResult | null>;
+  abstract convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult | null>;
 
-  async _getMetadata(localPath: string): Promise<{ [key: string]: string } | null> {
+  async _getMetadata(local_path: string): Promise<{ [key: string]: string } | null> {
     const exiftool = await this._which("exiftool");
     if (!exiftool) {
       console.error("exiftool is not found on this system so metadata cannot be extracted");
       return null;
     }
     try {
-      const result = await exec(`"${exiftool}" -json "${localPath}"`);
+      const result = await exec(`"${exiftool}" -json "${local_path}"`);
       return JSON.parse(result.stdout)[0];
     } catch (error) {
       console.error("Exiftool error:", error);
diff --git a/src/converters/mp3.ts b/src/converters/mp3.ts
index 621180d..9715d98 100644
--- a/src/converters/mp3.ts
+++ b/src/converters/mp3.ts
@@ -6,7 +6,7 @@ import * as path from "path";
 
 export class Mp3Converter extends WavConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -14,46 +14,62 @@ export class Mp3Converter extends WavConverter {
       return null;
     }
     try {
-      return await this._convert$(localPath, options);
+      return await this._convert$(source, options);
     } catch (error) {
       console.error("MP3 Conversion Error:", error);
       return null;
     }
   }
 
-  private async _convert$(localPath: string, options: ConverterOptions): Promise<ConverterResult> {
+  private async _convert$(
+    source: string | Buffer,
+    options: ConverterOptions
+  ): Promise<ConverterResult> {
     let mdContent = "";
-    const metadata = await this._getMetadata(localPath);
-    if (metadata) {
-      for (const f of [
-        "Title",
-        "Artist",
-        "Author",
-        "Band",
-        "Album",
-        "Genre",
-        "Track",
-        "DateTimeOriginal",
-        "CreateDate",
-        "Duration"
-      ]) {
-        if (metadata[f]) {
-          mdContent += `${f}: ${metadata[f]}\n`;
+
+    if (typeof source === "string") {
+      const metadata = await this._getMetadata(source);
+      if (metadata) {
+        for (const f of [
+          "Title",
+          "Artist",
+          "Author",
+          "Band",
+          "Album",
+          "Genre",
+          "Track",
+          "DateTimeOriginal",
+          "CreateDate",
+          "Duration"
+        ]) {
+          if (metadata[f]) {
+            mdContent += `${f}: ${metadata[f]}\n`;
+          }
         }
       }
+    } else {
+      console.warn(
+        "Metadata extraction is skipped for Buffer inputs as it requires a file path for exiftool."
+      );
     }
 
-    const tempPath = await fs.mkdtemp(path.join(os.tmpdir(), "temp_"));
-    const wavPath = path.join(tempPath, "audio.wav");
-    try {
-      const transcript = await super._transcribeAudio(wavPath);
-      mdContent += `\n\n### Audio Transcript:\n${transcript == "" ? "[No speech detected]" : transcript}`;
-    } catch (e) {
-      mdContent += "\n\n### Audio Transcript:\nError. Could not transcribe this audio.";
-    } finally {
-      await fs.unlink(wavPath);
-      await fs.rmdir(tempPath);
+    if (typeof source === "string") {
+      const tempPath = await fs.mkdtemp(path.join(os.tmpdir(), "temp_"));
+      const wavPath = path.join(tempPath, "audio.wav");
+      try {
+        const transcript = await super._transcribeAudio(wavPath);
+        mdContent += `\n\n### Audio Transcript:\n${transcript == "" ? "[No speech detected]" : transcript}`;
+      } catch (e) {
+        mdContent += "\n\n### Audio Transcript:\nError. Could not transcribe this audio.";
+      } finally {
+        await fs.unlink(wavPath);
+        await fs.rmdir(tempPath);
+      }
+    } else {
+      mdContent +=
+        "\n\n### Audio Transcript:\n[Audio conversion and transcription are not supported for Buffer inputs.]";
     }
+
     return {
       title: null,
       text_content: mdContent.trim()
diff --git a/src/converters/pdf.ts b/src/converters/pdf.ts
index 4c7b416..11e174c 100644
--- a/src/converters/pdf.ts
+++ b/src/converters/pdf.ts
@@ -4,7 +4,7 @@ import { pdfToText } from "pdf-ts";
 
 export class PdfConverter implements DocumentConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -13,7 +13,7 @@ export class PdfConverter implements DocumentConverter {
     }
 
     try {
-      const pdfContent = fs.readFileSync(localPath);
+      const pdfContent = typeof source === "string" ? fs.readFileSync(source) : Buffer.from(source);
       return this._convert(pdfContent);
     } catch (error) {
       console.error("PDF Parsing Error:", error);
diff --git a/src/converters/plain-text.ts b/src/converters/plain-text.ts
index 350c11c..ffaf346 100644
--- a/src/converters/plain-text.ts
+++ b/src/converters/plain-text.ts
@@ -4,7 +4,7 @@ import * as mime from "mime-types";
 import fs from "fs";
 
 export class PlainTextConverter implements DocumentConverter {
-  async convert(local_path: string, options: ConverterOptions = {}): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions = {}): Promise<ConverterResult> {
     const fileExtension = options.file_extension || "";
     const contentType = mime.lookup(fileExtension);
 
@@ -14,7 +14,12 @@ export class PlainTextConverter implements DocumentConverter {
       return null;
     }
 
-    const content = fs.readFileSync(local_path, { encoding: "utf-8" });
+    let content: string;
+    if (typeof source === "string") {
+      content = fs.readFileSync(source, { encoding: "utf-8" });
+    } else {
+      content = Buffer.from(source).toString("utf-8");
+    }
 
     return {
       title: null,
diff --git a/src/converters/wav.ts b/src/converters/wav.ts
index 2e9a833..af59740 100644
--- a/src/converters/wav.ts
+++ b/src/converters/wav.ts
@@ -3,7 +3,7 @@ import { MediaConverter } from "./media";
 
 export class WavConverter extends MediaConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -11,43 +11,57 @@ export class WavConverter extends MediaConverter {
       return null;
     }
     try {
-      return this._convert(localPath, options);
+      return this._convert(source, options);
     } catch (error) {
       console.error("WAV Conversion Error:", error);
       return null;
     }
   }
-  private async _convert(localPath: string, _: ConverterOptions): Promise<ConverterResult> {
+
+  private async _convert(source: string | Buffer, _: ConverterOptions): Promise<ConverterResult> {
     let mdContent = "";
-    const metadata = await this._getMetadata(localPath);
-    if (metadata) {
-      for (const f of [
-        "Title",
-        "Artist",
-        "Author",
-        "Band",
-        "Album",
-        "Genre",
-        "Track",
-        "DateTimeOriginal",
-        "CreateDate",
-        "Duration"
-      ]) {
-        if (metadata[f]) {
-          mdContent += `${f}: ${metadata[f]}\n`;
+
+    if (typeof source === "string") {
+      const metadata = await this._getMetadata(source);
+      if (metadata) {
+        for (const f of [
+          "Title",
+          "Artist",
+          "Author",
+          "Band",
+          "Album",
+          "Genre",
+          "Track",
+          "DateTimeOriginal",
+          "CreateDate",
+          "Duration"
+        ]) {
+          if (metadata[f]) {
+            mdContent += `${f}: ${metadata[f]}\n`;
+          }
         }
       }
+    } else {
+      console.warn(
+        "Metadata extraction is skipped for Buffer inputs as it requires a file path for exiftool."
+      );
     }
 
-    try {
-      const transcript = await this._transcribeAudio(localPath);
-      mdContent += `\n\n### Audio Transcript:\n${
-        transcript === "" ? "[No speech detected]" : transcript
-      }`;
-    } catch (error) {
-      console.error("Error loading speech recognition module:", error);
-      mdContent += "\n\n### Audio Transcript:\nError. Could not transcribe this audio.";
+    if (typeof source === "string") {
+      try {
+        const transcript = await this._transcribeAudio(source);
+        mdContent += `\n\n### Audio Transcript:\n${
+          transcript === "" ? "[No speech detected]" : transcript
+        }`;
+      } catch (error) {
+        console.error("Error loading speech recognition module:", error);
+        mdContent += "\n\n### Audio Transcript:\nError. Could not transcribe this audio.";
+      }
+    } else {
+      mdContent +=
+        "\n\n### Audio Transcript:\n[Audio transcription is not supported for Buffer inputs in this version.]";
     }
+
     return {
       title: null,
       text_content: mdContent.trim()
diff --git a/src/converters/wikipedia.ts b/src/converters/wikipedia.ts
index d9896f8..998bdb3 100644
--- a/src/converters/wikipedia.ts
+++ b/src/converters/wikipedia.ts
@@ -3,19 +3,26 @@ import { JSDOM } from "jsdom";
 import { CustomTurnDown } from "../custom-turndown";
 import { ConverterOptions, ConverterResult, DocumentConverter } from "../types";
 
+const WIKIPEDIA_REGEX = /^https?:\/\/[a-zA-Z]{2,3}\.wikipedia\.org\//;
+const BODY_SELECTOR_QUERY = "div#mw-content-text";
+const TITLE_SELECTOR_QUERY = "span.mw-page-title-main";
+
 export class WikipediaConverter implements DocumentConverter {
-  async convert(localPath: string, options: ConverterOptions = {}): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions = {}): Promise<ConverterResult> {
     const fileExtension = options.file_extension || "";
     if (![".html", ".htm"].includes(fileExtension.toLowerCase())) {
       return null;
     }
     const url = options.url || "";
-    if (!/^https?:\/\/[a-zA-Z]{2,3}\.wikipedia\.org\//.test(url)) {
+    if (!WIKIPEDIA_REGEX.test(url)) {
       return null;
     }
 
     try {
-      const htmlContent = fs.readFileSync(localPath, { encoding: "utf-8" });
+      const htmlContent =
+        typeof source === "string"
+          ? fs.readFileSync(source, { encoding: "utf-8" })
+          : source.toString("utf-8");
       return this._convert(htmlContent);
     } catch (error) {
       console.error("Wikipedia Parsing Error:", error);
@@ -31,8 +38,8 @@ export class WikipediaConverter implements DocumentConverter {
       script.remove();
     });
 
-    const bodyElm = doc.querySelector("div#mw-content-text");
-    const titleElm = doc.querySelector("span.mw-page-title-main");
+    const bodyElm = doc.querySelector(BODY_SELECTOR_QUERY);
+    const titleElm = doc.querySelector(TITLE_SELECTOR_QUERY);
 
     let webpageText = "";
     let mainTitle = doc.title;
diff --git a/src/converters/xlsx.ts b/src/converters/xlsx.ts
index a3eeeae..3636dce 100644
--- a/src/converters/xlsx.ts
+++ b/src/converters/xlsx.ts
@@ -4,18 +4,23 @@ import * as fs from "fs";
 import * as XLSX from "xlsx";
 
 export class XlsxConverter extends HtmlConverter {
-  async convert(local_path: string, options: ConverterOptions): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult> {
     const extension = options.file_extension || "";
     if (![".xlsx"].includes(extension.toLowerCase())) {
       return null;
     }
 
     try {
-      let exists = fs.existsSync(local_path);
-      if (!exists) {
-        throw new Error("File does'nt exists");
+      let workbook: XLSX.WorkBook;
+      if (typeof source === "string") {
+        if (!fs.existsSync(source)) {
+          throw new Error("File does'nt exists");
+        }
+        workbook = XLSX.readFile(source);
+      } else {
+        workbook = XLSX.read(source, { type: "buffer" });
       }
-      let workbook = XLSX.readFile(local_path);
+
       let mdContent = "";
 
       for (const sheetName of workbook.SheetNames) {
diff --git a/src/converters/xml-rss-atom.ts b/src/converters/xml-rss-atom.ts
index 4f2a91d..9adfca2 100644
--- a/src/converters/xml-rss-atom.ts
+++ b/src/converters/xml-rss-atom.ts
@@ -5,14 +5,17 @@ import * as fs from "fs";
 import { JSDOM } from "jsdom";
 
 export class RSSConverter implements DocumentConverter {
-  async convert(localPath: string, options: ConverterOptions = {}): Promise<ConverterResult> {
+  async convert(source: string | Buffer, options: ConverterOptions = {}): Promise<ConverterResult> {
     const fileExtension = options.file_extension || "";
     if (![".xml", ".rss", ".atom"].includes(fileExtension.toLowerCase())) {
       return null;
     }
 
     try {
-      const xmlString = fs.readFileSync(localPath, { encoding: "utf-8" });
+      const xmlString =
+        typeof source === "string"
+          ? fs.readFileSync(source, { encoding: "utf-8" })
+          : source.toString("utf-8");
       const doc = new DOMParser().parseFromString(xmlString, "text/xml");
 
       let result;
diff --git a/src/converters/youtube.ts b/src/converters/youtube.ts
index e9d5fbd..465dda7 100644
--- a/src/converters/youtube.ts
+++ b/src/converters/youtube.ts
@@ -5,7 +5,7 @@ import { ConverterOptions, ConverterResult, DocumentConverter } from "../types";
 
 export class YouTubeConverter implements DocumentConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -17,7 +17,10 @@ export class YouTubeConverter implements DocumentConverter {
       return null;
     }
     try {
-      const htmlContent = fs.readFileSync(localPath, { encoding: "utf-8" });
+      const htmlContent =
+        typeof source === "string"
+          ? fs.readFileSync(source, { encoding: "utf-8" })
+          : source.toString("utf-8");
       return this._convert(htmlContent, url, options);
     } catch (error) {
       console.error("YouTube Parsing Error:", error);
diff --git a/src/converters/zip.ts b/src/converters/zip.ts
index a9c370d..dd79576 100644
--- a/src/converters/zip.ts
+++ b/src/converters/zip.ts
@@ -1,10 +1,12 @@
 import { ConverterOptions, ConverterResult, DocumentConverter } from "../types";
-import * as fs from "fs/promises";
+import * as fs from "fs";
 import * as path from "path";
+import { PassThrough } from "stream";
+import unzipper from "unzipper";
 
 export class ZipConverter implements DocumentConverter {
   async convert(
-    localPath: string,
+    source: string | Buffer,
     options: ConverterOptions = {}
   ): Promise<ConverterResult | null> {
     const fileExtension = options.file_extension || "";
@@ -15,19 +17,10 @@ export class ZipConverter implements DocumentConverter {
     if (!parentConverters) {
       return {
         title: null,
-        text_content: `[ERROR] No converters available to process zip contents from: ${localPath}`
+        text_content: `[ERROR] No converters available to process zip contents from: ${source}`
       };
     }
-    const extractedZipFolderName = `extracted_${path.basename(localPath).replace(".zip", "_zip")}`;
-    const newFolder = path.normalize(path.join(path.dirname(localPath), extractedZipFolderName));
-    let mdContent = `Content from the zip file \`${path.basename(localPath)}\`:\n\n`;
 
-    if (!newFolder.startsWith(path.dirname(localPath))) {
-      return {
-        title: null,
-        text_content: `[ERROR] Invalid zip file path: ${localPath}`
-      };
-    }
     let unzipper;
     try {
       unzipper = await import("unzipper").then((mod) => mod.default);
@@ -37,38 +30,58 @@ export class ZipConverter implements DocumentConverter {
       );
       return null;
     }
+
     try {
-      await fs.mkdir(newFolder, { recursive: true });
-      const zip = await unzipper.Open.file(localPath);
-      await zip.extract({ path: newFolder });
+      const zipFileName = typeof source === "string" ? path.basename(source) : "archive.zip";
+      let mdContent = `Content from the zip file \`${zipFileName}\`:\n\n`;
+      const mdResults: string[] = [];
 
-      const files = await this._walk(newFolder);
-      for (const { root, name } of files) {
-        const filePath = path.join(root, name);
-        const relativePath = path.relative(newFolder, filePath);
-        const fileExtension = path.extname(name);
+      const processEntry = async (entry: unzipper.Entry) => {
+        const relativePath = entry.path;
+        if (entry.type === "File") {
+          const entryExtension = path.extname(relativePath);
+          const entryBuffer = await entry.buffer();
 
-        const fileOptions = {
-          ...options,
-          file_extension: fileExtension,
-          _parent_converters: parentConverters
-        };
+          const fileOptions = {
+            ...options,
+            file_extension: entryExtension,
+            _parent_converters: parentConverters
+          };
 
-        for (const converter of parentConverters) {
-          if (converter instanceof ZipConverter) {
-            continue;
-          }
-          const result = await converter.convert(filePath, fileOptions);
-          if (result) {
-            mdContent += `\n## File: ${relativePath}\n\n`;
-            mdContent += result.text_content + "\n\n";
-            break;
+          for (const converter of parentConverters) {
+            if (converter instanceof ZipConverter) {
+              continue;
+            }
+            const result = await converter.convert(entryBuffer, fileOptions);
+            if (result) {
+              mdResults.push(`\n## File: ${relativePath}\n\n${result.text_content}\n\n`);
+              break;
+            }
           }
+        } else {
+          entry.autodrain();
         }
-      }
-      if (options.cleanupExtracted !== false) {
-        await fs.rm(newFolder, { recursive: true, force: true });
-      }
+      };
+
+      const inputStream =
+        typeof source === "string" ? fs.createReadStream(source) : new PassThrough().end(source);
+
+      await new Promise((res, rej) => {
+        const parser = unzipper.Parse();
+
+        parser.on("entry", (entry: unzipper.Entry) => {
+          processEntry(entry).catch((err) => {
+            parser.destroy(err);
+            rej(err);
+          });
+        });
+        parser.on("finish", res);
+        parser.on("error", rej);
+
+        inputStream.pipe(parser);
+      });
+
+      mdContent += mdResults.join("");
 
       return {
         title: null,
@@ -78,25 +91,13 @@ export class ZipConverter implements DocumentConverter {
       if (error.message.includes("invalid signature")) {
         return {
           title: null,
-          text_content: `[ERROR] Invalid or corrupted zip file: ${localPath}`
+          text_content: `[ERROR] Invalid or corrupted zip file: ${source}`
         };
       }
       return {
         title: null,
-        text_content: `[ERROR] Failed to process zip file ${localPath}: ${String(error)}`
+        text_content: `[ERROR] Failed to process zip file ${source}: ${String(error)}`
       };
     }
   }
-  private async _walk(dir: string): Promise<{ root: string; name: string }[]> {
-    let results: { root: string; name: string }[] = [];
-    const files = await fs.readdir(dir, { withFileTypes: true });
-    for (const file of files) {
-      if (file.isDirectory()) {
-        results = results.concat(await this._walk(path.join(dir, file.name)));
-      } else {
-        results.push({ root: dir, name: file.name });
-      }
-    }
-    return results;
-  }
 }
diff --git a/src/markitdown.ts b/src/markitdown.ts
index 08e1a12..7e2eb37 100644
--- a/src/markitdown.ts
+++ b/src/markitdown.ts
@@ -37,6 +37,9 @@ export class MarkItDown {
     this.register_converter(new ZipConverter());
   }
 
+  /**
+   * Converts a source from a file path, URL, or Response object.
+   */
   async convert(
     source: string | Response,
     options: ConverterOptions = {}
@@ -55,6 +58,18 @@ export class MarkItDown {
       }
     }
   }
+
+  /**
+   * Converts a source from an in-memory Buffer.
+   */
+  async convertBuffer(
+    source: Buffer,
+    options: ConverterOptions & { file_extension: string }
+  ): Promise<ConverterResult> {
+    const extensions = new Set<string>([options.file_extension]);
+    return this._convert(source, extensions, options);
+  }
+
   private async convert_url(
     source: string,
     { fetch = globalThis.fetch, ...options }: ConverterOptions
@@ -91,38 +106,26 @@ export class MarkItDown {
       extensions.add(path.extname(fname[1]));
     }
 
-    const url_ext = path.extname(new URL(response.url).pathname);
-    extensions.add(url_ext);
-
-    const file = fname ? `/tmp/${fname?.[1]}` : "/tmp/temp";
-    const temp_writeable = fs.createWriteStream(file);
-
-    try {
-      if (response.body == null) {
-        throw new Error("Response body is empty");
-      }
+    if (response.url) {
+      const url_ext = path.extname(new URL(response.url).pathname);
+      extensions.add(url_ext);
+    }
 
-      const reader = response.body.getReader();
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        temp_writeable.write(value);
-      }
+    if (extensions.size === 0) {
+      throw new Error(
+        "Could not determine file type. Please provide a `file_extension` in the options."
+      );
+    }
 
-      temp_writeable.end();
-      return await this._convert(file, extensions, {
-        ...options,
-        url: response.url
-      });
-    } catch (e) {
-      throw new Error(`Could not write to file: ${e}`);
-    } finally {
-      try {
-        temp_writeable.close();
-      } catch (e) {
-        throw new Error(`Could not close file: ${e}`);
-      }
+    if (response.body == null) {
+      throw new Error("Response body is empty");
     }
+
+    const buffer = Buffer.from(await response.arrayBuffer());
+    return await this._convert(buffer, extensions, {
+      ...options,
+      url: response.url
+    });
   }
 
   private async convert_local(source: string, options: ConverterOptions): Promise<ConverterResult> {
@@ -145,7 +148,7 @@ export class MarkItDown {
   }
 
   private async _convert(
-    source: string,
+    source: string | Buffer,
     extensions: Set<string>,
     options: any = {}
   ): Promise<ConverterResult> {
diff --git a/src/types.ts b/src/types.ts
index 1566912..ee3e7cd 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -25,5 +25,5 @@ export type ConverterOptions = {
 type MammothOptions = Parameters<typeof mammoth.convertToHtml>[1];
 
 export interface DocumentConverter {
-  convert(local_path: string, options: ConverterOptions): Promise<ConverterResult>;
+  convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult>;
 }

From a0a10494f7d91532d93091b3c15c58eb3999201b Mon Sep 17 00:00:00 2001
From: Vaibhav Raj <vaibhavmonu8309@gmail.com>
Date: Tue, 9 Sep 2025 02:13:44 +0530
Subject: [PATCH 2/4] test: add tests for buffer and blob conversions

---
 test/index.test.ts | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 test/test.data.ts  |  1 -
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/test/index.test.ts b/test/index.test.ts
index aed14b3..0dfd64f 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -1,6 +1,7 @@
 import { describe, it, expect } from "vitest";
 import { MarkItDown } from "../src/markitdown";
 import * as path from "path";
+import * as fs from "fs";
 import isCi from "is-ci";
 import { openai } from "@ai-sdk/openai";
 import {
@@ -257,4 +258,49 @@ describe("MarkItDown Tests", () => {
       }
     });
   });
+
+  describe("Buffer Conversion", () => {
+    it("should correctly convert a .zip file passed as a buffer", async () => {
+      const zipFilePath = path.join(__dirname, "__files/test_files.zip");
+      const buffer = fs.readFileSync(zipFilePath);
+      const markitdown = new MarkItDown();
+      const result = await markitdown.convertBuffer(buffer, {
+        file_extension: ".zip" // NOTE: this is required for buffer conversions
+      });
+
+      expect(result).not.toBeNull();
+      expect(result).not.toBeUndefined();
+      const textContent = result?.text_content.replace("\\", "");
+
+      expect(textContent).toContain("File: test.docx");
+      for (const testString of DOCX_TEST_STRINGS) {
+        expect(textContent).toContain(testString);
+      }
+    });
+  });
+
+  describe("Blob Conversion", () => {
+    it("should correctly convert a file passed as a Blob via a Response", async () => {
+      const zipFilePath = path.join(__dirname, "__files/test_files.zip");
+      const buffer = fs.readFileSync(zipFilePath);
+      const blob = new Blob([buffer]);
+
+      const response = new Response(blob, {
+        headers: {
+          "Content-Type": "application/zip"
+        }
+      });
+
+      const markitdown = new MarkItDown();
+      const result = await markitdown.convert(response);
+
+      expect(result).not.toBeNull();
+      expect(result).not.toBeUndefined();
+      const textContent = result?.text_content.replace("\\", "");
+
+      for (const testString of DOCX_TEST_STRINGS) {
+        expect(textContent).toContain(testString);
+      }
+    });
+  });
 });
diff --git a/test/test.data.ts b/test/test.data.ts
index 244b5d4..05c2145 100644
--- a/test/test.data.ts
+++ b/test/test.data.ts
@@ -27,7 +27,6 @@ export const YOUTUBE_TEST_STRINGS = [
   "## AutoGen FULL Tutorial with Python (Step-By-Step)",
   "This is an intermediate tutorial for installing and using AutoGen locally",
   "PT15M4S",
-  "the model we&amp;#39;re going to be using today is GPT 3.5 turbo"
 ];
 
 export const IPYNB_TEST_STRINGS = [

From 3670ceebda3082457b771cef4ec40264c8f96cb8 Mon Sep 17 00:00:00 2001
From: Vaibhav Raj <vaibhavmonu8309@gmail.com>
Date: Tue, 9 Sep 2025 02:13:55 +0530
Subject: [PATCH 3/4] chore: update readme

---
 README.md | 104 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index a4c8583..f1fc841 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,9 @@
 
 [![CI](https://github.com/dead8309/markitdown-ts/actions/workflows/ci.yml/badge.svg)](https://github.com/dead8309/markitdown/actions/workflows/ci.yml)
 
-`markitdown-ts` is a TypeScript library designed for converting various file formats to Markdown. This makes it suitable for indexing, text analysis, and other applications that benefit from structured text. It is a TypeScript implementation of the original `markitdown` [Python library.](https://github.com/microsoft/markitdown)
+`markitdown-ts` is a TypeScript library designed for converting various file formats to Markdown. It can process fiiles from local paths, URLs, or directly from in-memory buffers, making it ideal for serverless and edge environments like Supabase Functions or Cloudflare Workers.
+
+It is a TypeScript implementation of the original `markitdown` [Python library.](https://github.com/microsoft/markitdown) and is suitable for indexing, text analysis, and other applications that benefit from structured text.
 
 It supports:
 
@@ -32,12 +34,21 @@ pnpm add markitdown-ts
 
 ## Usage
 
+### Basic Usage (from a File Path)
+
+The simplest way to use the library is by providing a local file path or a URL.
+
 ```typescript
 import { MarkItDown } from "markitdown-ts";
 
 const markitdown = new MarkItDown();
 try {
+  // Convert a local file
   const result = await markitdown.convert("path/to/your/file.pdf");
+
+  // Or convert from a URL
+  const result = await markitdown.convert("https://arxiv.org/pdf/2308.08155v2.pdf");
+
   if (result) {
     console.log(result.text_content);
   }
@@ -46,7 +57,57 @@ try {
 }
 ```
 
-Pass additional options as needed for specific functionality.
+### Advanced Usage (from Buffers, Blobs, or Responses)
+
+For use in serverless environments where you can't rely on a persistent filesystem, you can convert data directly from memory.
+
+> [!IMPORTANT]
+>
+> This is the recommended approach for environments like **Supabase Edge Functions**, **Cloudflare Workers**, or **AWS Lambda**.
+
+#### From a Buffer
+
+If you have your file content in a `Buffer`, use the `convertBuffer` method. You **must** provide the `file_extension` in the options so the library knows which converter to use.
+
+```typescript
+import { MarkItDown } from "markitdown-ts";
+import * as fs from "fs";
+
+const markitdown = new MarkItDown();
+try {
+  const buffer = fs.readFileSync("path/to/your/file.docx");
+  const result = await markitdown.convertBuffer(buffer, {
+    file_extension: ".docx"
+  });
+  console.log(result?.text_content);
+} catch (error) {
+  console.error("Conversion failed:", error);
+}
+```
+
+#### From a Response or Blob
+
+You can pass a standard `Response` object directly to the `convert` method. This is perfect for handling file uploads from a request body.
+
+```typescript
+import { MarkItDown } from "markitdown-ts";
+
+const markitdown = new MarkItDown();
+
+// Example: Simulating a file upload by creating a Blob and a Response
+const buffer = fs.readFileSync("path/to/archive.zip");
+const blob = new Blob([buffer]);
+const response = new Response(blob, {
+  headers: { "Content-Type": "application/zip" }
+});
+
+try {
+  const result = await markitdown.convert(response);
+  console.log(result?.text_content);
+} catch (error) {
+  console.error("Conversion failed:", error);
+}
+```
 
 ## YouTube Transcript Support
 
@@ -76,11 +137,22 @@ const result = await markitdown.convert("test.jpg", {
 
 ## API
 
-The library uses a single function `convert` for all conversions, with the options and the response type defined as such:
+The library exposes a `MarkItDown` class with two primary conversion methods.
 
 ```typescript
-export interface DocumentConverter {
-  convert(local_path: string, options: ConverterOptions): Promise<ConverterResult>;
+class MarkItDown {
+  /**
+   * Converts a source from a file path, URL, or Response object.
+   */
+  async convert(source: string | Response, options?: ConverterOptions): Promise<ConverterResult>;
+
+  /**
+   * Converts a source from an in-memory Buffer.
+   */
+  async convertBuffer(
+    source: Buffer,
+    options: ConverterOptions & { file_extension: string }
+  ): Promise<ConverterResult>;
 }
 
 export type ConverterResult =
@@ -92,16 +164,28 @@ export type ConverterResult =
   | undefined;
 
 export type ConverterOption = {
+  // Required when using convertBuffer
   file_extension?: string;
+
+  // For URL-based converters (e.g., Wikipedia, Bing SERP)
   url?: string;
+
+  // Provide a custom fetch implementation
   fetch?: typeof fetch;
-  enableYoutubeTranscript?: boolean; // false by default
-  youtubeTranscriptLanguage?: string; // "en" by default
-  llmModel: string;
+
+  // YouTube-specific options
+  enableYoutubeTranscript?: boolean; // Default: false
+  youtubeTranscriptLanguage?: string; // Default: "en"
+
+  // Image-specific LLM options
+  llmModel?: LanguageModel;
   llmPrompt?: string;
+
+  // Options for .docx conversion (passed to mammoth.js)
   styleMap?: string | Array<string>;
-  _parent_converters?: DocumentConverter[];
-  cleanup_extracted?: boolean;
+
+  // Options for .zip conversion
+  cleanupExtracted?: boolean; // Default: true
 };
 ```
 

From 9628c006b19c58710e4af9645703c2512354e520 Mon Sep 17 00:00:00 2001
From: Vaibhav Raj <vaibhavmonu8309@gmail.com>
Date: Tue, 9 Sep 2025 02:14:34 +0530
Subject: [PATCH 4/4] chore: format

---
 src/converters/media.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/converters/media.ts b/src/converters/media.ts
index 57ff01c..bcb2ec5 100644
--- a/src/converters/media.ts
+++ b/src/converters/media.ts
@@ -5,7 +5,10 @@ import * as util from "util";
 const exec = util.promisify(childProcess.exec);
 
 export abstract class MediaConverter implements DocumentConverter {
-  abstract convert(source: string | Buffer, options: ConverterOptions): Promise<ConverterResult | null>;
+  abstract convert(
+    source: string | Buffer,
+    options: ConverterOptions
+  ): Promise<ConverterResult | null>;
 
   async _getMetadata(local_path: string): Promise<{ [key: string]: string } | null> {
     const exiftool = await this._which("exiftool");