diff --git a/Makefile b/Makefile
index c28dc5c..1071091 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ FOFI_DIR = $(XDIR)fofi/
 GOO_DIR = $(XDIR)goo/
 XPDF_DIR = $(XDIR)xpdf/
 
-PY_SRC = src/xpydf/PdfLoader.cc src/xpydf/ImageInfoDev.cc src/xpydf/ImageDataDev.cc
+PY_SRC = src/xpydf/PdfLoader.cc src/xpydf/ImageInfoDev.cc src/xpydf/ImageDataDev.cc src/xpydf/FontOutputDev.cc
 SPLASH_SRC = $(wildcard $(SPLASH_DIR)*.cc)
 FOFI_SRC = $(wildcard $(FOFI_DIR)*.cc)
 
diff --git a/src/xpdf-4.04/xpdf/GfxFont.cc b/src/xpdf-4.04/xpdf/GfxFont.cc
index 30e8bd1..75913bc 100644
--- a/src/xpdf-4.04/xpdf/GfxFont.cc
+++ b/src/xpdf-4.04/xpdf/GfxFont.cc
@@ -1283,7 +1283,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
       }
     }
   }
-
+  
   // construct the char code -> Unicode mapping object
   ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 
@@ -1291,7 +1291,9 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
   // existing entries in ctu, i.e., the ToUnicode CMap takes
   // precedence, but the other encoding info is allowed to fill in any
   // holes
-  readToUnicodeCMap(fontDict, 8, ctu);
+  if (globalParams->getReadUnicodeCMap()) {
+    readToUnicodeCMap(fontDict, 8, ctu);
+  }
 
   // look for a Unicode-to-Unicode mapping
   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
diff --git a/src/xpdf-4.04/xpdf/GlobalParams.cc b/src/xpdf-4.04/xpdf/GlobalParams.cc
index 5fd4393..64deb76 100644
--- a/src/xpdf-4.04/xpdf/GlobalParams.cc
+++ b/src/xpdf-4.04/xpdf/GlobalParams.cc
@@ -749,6 +749,7 @@ GlobalParams::GlobalParams(const char *cfgFileName) {
   defaultPrinter = NULL;
   mapNumericCharNames = gTrue;
   mapUnknownCharNames = gFalse;
+  readUnicodeCMap = gTrue;
   mapExtTrueTypeFontsViaUnicode = gTrue;
   useTrueTypeUnicodeMapping = gFalse;
   droppedFonts = new GHash(gTrue);
@@ -3268,6 +3269,16 @@ GBool GlobalParams::getMapUnknownCharNames() {
   return map;
 }
 
+GBool GlobalParams::getReadUnicodeCMap() {
+  GBool read;
+
+  lockGlobalParams;
+  read = readUnicodeCMap;
+  unlockGlobalParams;
+  
+  return read;
+}
+
 GBool GlobalParams::getMapExtTrueTypeFontsViaUnicode() {
   GBool map;
 
@@ -3793,6 +3804,12 @@ void GlobalParams::setMapUnknownCharNames(GBool map) {
   unlockGlobalParams;
 }
 
+void GlobalParams::setReadUnicodeCMap(GBool read) {
+  lockGlobalParams;
+  readUnicodeCMap = read;
+  unlockGlobalParams;
+}
+
 void GlobalParams::setMapExtTrueTypeFontsViaUnicode(GBool map) {
   lockGlobalParams;
   mapExtTrueTypeFontsViaUnicode = map;
diff --git a/src/xpdf-4.04/xpdf/GlobalParams.h b/src/xpdf-4.04/xpdf/GlobalParams.h
index b8299fc..fadac85 100644
--- a/src/xpdf-4.04/xpdf/GlobalParams.h
+++ b/src/xpdf-4.04/xpdf/GlobalParams.h
@@ -338,6 +338,7 @@ class GlobalParams {
   GString *getDefaultPrinter();
   GBool getMapNumericCharNames();
   GBool getMapUnknownCharNames();
+  GBool getReadUnicodeCMap();
   GBool getMapExtTrueTypeFontsViaUnicode();
   GBool getUseTrueTypeUnicodeMapping();
   GBool isDroppedFont(const char *fontName);
@@ -400,6 +401,7 @@ class GlobalParams {
   void setOverprintPreview(GBool preview);
   void setMapNumericCharNames(GBool map);
   void setMapUnknownCharNames(GBool map);
+  void setReadUnicodeCMap(GBool map);
   void setMapExtTrueTypeFontsViaUnicode(GBool map);
   void setTabStateFile(char *tabStateFileA);
   void setPrintCommands(GBool printCommandsA);
@@ -589,6 +591,7 @@ class GlobalParams {
 				//   from the viewer)
   GBool mapNumericCharNames;	// map numeric char names (from font subsets)?
   GBool mapUnknownCharNames;	// map unknown char names?
+  GBool readUnicodeCMap;	// Read the unicode c map?
   GBool mapExtTrueTypeFontsViaUnicode;  // map char codes to GID via Unicode
 				        //   for external TrueType fonts?
   GBool useTrueTypeUnicodeMapping;	// use the Unicode cmaps in TrueType
diff --git a/src/xpdf-4.04/xpdf/TextOutputDev.cc b/src/xpdf-4.04/xpdf/TextOutputDev.cc
index 0411db8..9a59da5 100644
--- a/src/xpdf-4.04/xpdf/TextOutputDev.cc
+++ b/src/xpdf-4.04/xpdf/TextOutputDev.cc
@@ -203,38 +203,6 @@ static inline double dmax(double x, double y) {
 // TextChar
 //------------------------------------------------------------------------
 
-class TextChar {
-public:
-
-  TextChar(Unicode cA, int charPosA, int charLenA,
-	   double xMinA, double yMinA, double xMaxA, double yMaxA,
-	   int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
-	   TextFontInfo *fontA, double fontSizeA,
-	   double colorRA, double colorGA, double colorBA);
-
-  static int cmpX(const void *p1, const void *p2);
-  static int cmpY(const void *p1, const void *p2);
-  static int cmpCharPos(const void *p1, const void *p2);
-
-  Unicode c;
-  int charPos;
-  int charLen;
-  double xMin, yMin, xMax, yMax;
-  TextFontInfo *font;
-  double fontSize;
-  double colorR,
-         colorG,
-         colorB;
-
-  // group the byte-size fields to minimize object size
-  Guchar rot;
-  char rotated;
-  char clipped;
-  char invisible;
-  char spaceAfter;
-  char overlap;
-};
-
 TextChar::TextChar(Unicode cA, int charPosA, int charLenA,
 		   double xMinA, double yMinA, double xMaxA, double yMaxA,
 		   int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
@@ -1559,13 +1527,13 @@ void TextPage::addChar(GfxState *state, double x, double y,
       } else {
 	j = i;
       }
-      chars->append(new TextChar(uBuf[j], charPos, nBytes,
-				 xMin, yMin, xMax, yMax,
-				 curRot, rotated, clipped,
-				 state->getRender() == 3 || alpha < 0.001,
-				 curFont, curFontSize,
-				 colToDbl(rgb.r), colToDbl(rgb.g),
-				 colToDbl(rgb.b)));
+      chars->append(textCharType(uBuf[j], charPos, nBytes,
+        xMin, yMin, xMax, yMax,
+        curRot, rotated, clipped,
+        state->getRender() == 3 || alpha < 0.001,
+        curFont, curFontSize,
+        colToDbl(rgb.r), colToDbl(rgb.g),
+        colToDbl(rgb.b)));
     }
   }
 
diff --git a/src/xpdf-4.04/xpdf/TextOutputDev.h b/src/xpdf-4.04/xpdf/TextOutputDev.h
index 302975c..5413b1c 100644
--- a/src/xpdf-4.04/xpdf/TextOutputDev.h
+++ b/src/xpdf-4.04/xpdf/TextOutputDev.h
@@ -137,6 +137,42 @@ class TextFontInfo {
   friend class TextWord;
 };
 
+//------------------------------------------------------------------------
+// TextChar
+//------------------------------------------------------------------------
+
+class TextChar {
+public:
+
+  TextChar(Unicode cA, int charPosA, int charLenA,
+    double xMinA, double yMinA, double xMaxA, double yMaxA,
+    int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
+    TextFontInfo *fontA, double fontSizeA,
+    double colorRA, double colorGA, double colorBA);
+
+  static int cmpX(const void *p1, const void *p2);
+  static int cmpY(const void *p1, const void *p2);
+  static int cmpCharPos(const void *p1, const void *p2);
+
+  Unicode c;
+  int charPos;
+  int charLen;
+  double xMin, yMin, xMax, yMax;
+  TextFontInfo *font;
+  double fontSize;
+  double colorR,
+         colorG,
+         colorB;
+
+  // group the byte-size fields to minimize object size
+  Guchar rot;
+  char rotated;
+  char clipped;
+  char invisible;
+  char spaceAfter;
+  char overlap;
+};
+
 //------------------------------------------------------------------------
 // TextWord
 //------------------------------------------------------------------------
@@ -235,7 +271,7 @@ class TextLine {
   double getEdge(int idx) { return edge[idx]; }
   GBool getHyphenated() { return hyphenated; }
 
-private:
+public:
 
   static int cmpX(const void *p1, const void *p2);
 
@@ -386,7 +422,7 @@ class TextPage {
 public:
 
   TextPage(TextOutputControl *controlA);
-  ~TextPage();
+  virtual ~TextPage();
 
   // Write contents of page to a stream.
   void write(void *outputStream, TextOutputFunc outputFunc);
@@ -492,20 +528,25 @@ class TextPage {
   void removeChars(double xMin, double yMin, double xMax, double yMax,
 		   double xOverlapThresh, double yOverlapThresh);
 
-private:
+public:
+  virtual TextChar *textCharType(Unicode cA, int charPosA, int charLenA,
+    double xMinA, double yMinA, double xMaxA, double yMaxA,
+    int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
+    TextFontInfo *fontA, double fontSizeA,
+    double colorRA, double colorGA, double colorBA
+  ) {
+    return new TextChar(cA, charPosA, charLenA, xMinA, yMinA, xMaxA, yMaxA,
+      rotA, rotatedA, clippedA, invisibleA, fontA, fontSizeA,
+      colorRA, colorGA, colorBA);
+  }
+  virtual void encodeFragment(Unicode *text, int len, UnicodeMap *uMap,
+		      GBool primaryLR, GString *s);
+
+  virtual void computeLinePhysWidth(TextLine *line, UnicodeMap *uMap);
 
-  void startPage(GfxState *state);
-  void clear();
-  void updateFont(GfxState *state);
   void addChar(GfxState *state, double x, double y,
 	       double dx, double dy,
 	       CharCode c, int nBytes, Unicode *u, int uLen);
-  void incCharCount(int nChars);
-  void beginActualText(GfxState *state, Unicode *u, int uLen);
-  void endActualText(GfxState *state);
-  void addUnderline(double x0, double y0, double x1, double y1);
-  void addLink(double xMin, double yMin, double xMax, double yMax,
-	       Link *link);
 
   // output
   void writeReadingOrder(void *outputStream,
@@ -538,8 +579,18 @@ class TextPage {
 		UnicodeMap *uMap,
 		char *space, int spaceLen,
 		char *eol, int eolLen);
-  void encodeFragment(Unicode *text, int len, UnicodeMap *uMap,
-		      GBool primaryLR, GString *s);
+
+private:
+
+  void startPage(GfxState *state);
+  void clear();
+  void updateFont(GfxState *state);
+  void incCharCount(int nChars);
+  void beginActualText(GfxState *state, Unicode *u, int uLen);
+  void endActualText(GfxState *state);
+  void addUnderline(double x0, double y0, double x1, double y1);
+  void addLink(double xMin, double yMin, double xMax, double yMax,
+	       Link *link);
   GBool unicodeEffectiveTypeLOrNum(Unicode u, Unicode left, Unicode right);
   GBool unicodeEffectiveTypeR(Unicode u, Unicode left, Unicode right);
 
@@ -593,7 +644,6 @@ class TextPage {
   int getCharDirection(TextChar *ch, TextChar *left, TextChar *right);
   int assignPhysLayoutPositions(GList *columns);
   void assignLinePhysPositions(GList *columns);
-  void computeLinePhysWidth(TextLine *line, UnicodeMap *uMap);
   int assignColumnPhysPositions(GList *columns);
   void buildSuperLines(TextBlock *blk, GList *superLines);
   void assignSimpleLayoutPositions(GList *superLines, UnicodeMap *uMap);
@@ -784,6 +834,10 @@ class TextOutputDev: public OutputDev {
   // Turn extra processing for HTML conversion on or off.
   void enableHTMLExtras(GBool html) { control.html = html; }
 
+protected:
+  TextPage *text;		// text for the current page
+  TextOutputControl control;	// formatting parameters
+
 private:
 
   void generateBOM();
@@ -792,8 +846,6 @@ class TextOutputDev: public OutputDev {
   void *outputStream;		// output stream
   GBool needClose;		// need to close the output file?
 				//   (only if outputStream is a FILE*)
-  TextPage *text;		// text for the current page
-  TextOutputControl control;	// formatting parameters
   GBool ok;			// set up ok?
 };
 
diff --git a/src/xpydf/FontOutputDev.cc b/src/xpydf/FontOutputDev.cc
new file mode 100644
index 0000000..d78981c
--- /dev/null
+++ b/src/xpydf/FontOutputDev.cc
@@ -0,0 +1,163 @@
+#include <math.h>
+
+#include "Error.h"
+#include "GList.h"
+#include "GlobalParams.h"
+#include "UnicodeMap.h"
+#include "UnicodeRemapping.h"
+#include "UnicodeTypeTable.h"
+#include "GfxState.h"
+
+#include "FontOutputDev.h"
+
+
+bool operator<(const FontSpec& l, const FontSpec& r) {
+  return (
+    l.fontNameId < r.fontNameId
+    || (l.fontNameId == r.fontNameId && l.fontTypeId < r.fontTypeId)
+    || (l.fontNameId == r.fontNameId && l.fontTypeId == r.fontTypeId && l.fontSize < r.fontSize)
+  );
+}
+
+bool operator==(const FontSpec& l, const FontSpec& r) {
+  return (l.fontNameId == r.fontNameId && l.fontTypeId == r.fontTypeId && l.fontSize == r.fontSize);
+}
+
+bool operator!=(const FontSpec& l, const FontSpec& r) {
+  return (l.fontNameId != r.fontNameId || l.fontTypeId != r.fontTypeId || l.fontSize != r.fontSize);
+}
+
+TextPageFont::TextPageFont(TextOutputControl *controlA) : TextPage(controlA) {
+  if (!(uMap = globalParams->getTextEncoding())) {
+    fprintf(stderr, "WARNING: Encoding not found");
+    return;
+  }
+
+  spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
+  eolLen = 0; // make gcc happy
+
+  switch (globalParams->getTextEOL()) {
+  case eolUnix:
+    eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
+    break;
+  case eolDOS:
+    eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
+    eolLen += uMap->mapUnicode(0x0a, eol + eolLen, (int)sizeof(eol) - eolLen);
+    break;
+  case eolMac:
+    eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
+    break;
+  }
+
+  if (eolLen != 1 || spaceLen != 1) {
+    fprintf(stderr, "WARNING: Unexpected delimiter lengths: [spacelen] = %d, [eollen] = %d\n", spaceLen, eolLen);
+  }
+
+  fontNameIds["__space__"] = -1U;
+  fontTypeIds["__space__"] = -1U;
+  FontSpec dummy = {-1U, -1U, 0};
+  fontSpecIds[dummy] = space[0];
+
+  fontNameIds["__eol__"] = -2U;
+  fontTypeIds["__eol__"] = -2U;
+  dummy = (FontSpec){-2U, -2U, 0};
+  fontSpecIds[dummy] = eol[0];
+
+  fontNameIds["__invalid__"] = -3U;
+  fontTypeIds["__invalid__"] = -3U;
+  dummy = (FontSpec){-3U, -3U, 0};
+  fontSpecIds[dummy] = FONT_INVALID;
+
+  fontNameIds["__unknown__"] = -4U;
+  fontTypeIds["__unknown__"] = -4U;
+  dummy = (FontSpec){-4U, -4U, 0};
+  fontSpecIds[dummy] = FONT_UNKNOWN;
+
+
+  for(int i = 0; i <= 253; i++) {
+    if (i != space[0] && i != eol[0] && i != FONT_UNKNOWN) {
+      availableIds.push(i);
+    }
+  };
+};
+
+TextChar *TextPageFont::textCharType(Unicode cA, int charPosA, int charLenA,
+  double xMinA, double yMinA, double xMaxA, double yMaxA,
+  int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
+  TextFontInfo *fontA, double fontSizeA,
+  double colorRA, double colorGA, double colorBA)
+{
+  GString *name = fontA->getFontName();
+  Unicode fontId = FONT_UNKNOWN;
+
+  char buf[8];
+  int n = uMap->mapUnicode(cA, buf, sizeof(buf));
+  
+  if (n == 0) {
+    fontId = FONT_INVALID;
+  }
+  else if (name) {
+    char fontCode[1000], fontName[1000], fontType[1000];
+
+    if (sscanf(name->getCString(), "%[^+]+%[^-]-%s", fontCode, fontName, fontType) != EOF) {
+      if (fontNameIds.find(fontName) == fontNameIds.end()) {
+        fontNameIds[fontName] = fontNameIds.size();
+      }
+
+      if (fontTypeIds.find(fontType) == fontTypeIds.end()) {
+        fontTypeIds[fontType] = fontTypeIds.size();
+      }
+
+      FontSpec spec = {fontNameIds[fontName], fontTypeIds[fontType], (unsigned int)fontSizeA};
+
+      if (fontSpecIds.find(spec) == fontSpecIds.end()) {
+        if (availableIds.size() == 0) {
+          fprintf(stderr, "ERROR: Font id overflow\n");
+          error(errInternal, -1, "ERROR: Font id overflow");
+        }
+        fontSpecIds[spec] = availableIds.front();
+        availableIds.pop();
+      }
+
+      fontId = fontSpecIds[spec];
+    }
+  }
+
+  fontId = fontId ^ (n << 16);
+
+  return new TextChar(fontId, charPosA, charLenA, xMinA, yMinA, xMaxA, yMaxA,
+    rotA, rotatedA, clippedA, invisibleA, fontA, fontSizeA,
+    colorRA, colorGA, colorBA);
+}
+
+void TextPageFont::encodeFragment(Unicode *text, int len, UnicodeMap *uMap, GBool primaryLR, GString *s) {
+  char buf[8];
+  int n;
+
+  for (int i = 0; i < len; ++i) {
+    if (text[i] != FONT_INVALID) {
+      n = (text[i] >> 16);
+      if (n == 0) n = 1;
+      buf[0] = (char)text[i];
+      for (int j = 0; j < n; j++) {
+        s->append(buf, 1);
+      }
+    }
+  }
+}
+
+void TextPageFont::computeLinePhysWidth(TextLine *line, UnicodeMap *uMap) {
+  int n;
+  if (uMap->isUnicode()) {
+    line->pw = line->len;
+  } else {
+    line->pw = 0;
+    for (int i = 0; i < line->len; ++i) {
+      if (line->text[i] != FONT_INVALID) {
+        n = (line->text[i] >> 16);
+        if (n == 0) n = 1;
+        line->pw += n;
+      }
+    }
+  }
+}
diff --git a/src/xpydf/FontOutputDev.h b/src/xpydf/FontOutputDev.h
new file mode 100644
index 0000000..6a728aa
--- /dev/null
+++ b/src/xpydf/FontOutputDev.h
@@ -0,0 +1,92 @@
+#ifndef FONT_OUTPUT_DEV_H
+#define FONT_OUTPUT_DEV_H
+
+#include <map>
+#include <queue>
+#include <string>
+
+#include "UnicodeMap.h"
+#include "TextOutputDev.h"
+
+#define FONT_INVALID 256
+#define FONT_UNKNOWN 255
+
+typedef struct FontSpec {
+  unsigned int fontNameId, fontTypeId, fontSize;
+} FontSpec;
+
+typedef struct NamedFontSpec {
+  std::string fontName, fontType;
+  unsigned int fontSize;
+} NamedFontSpec;
+
+bool operator<(const FontSpec& l, const FontSpec& r);
+bool operator==(const FontSpec& l, const FontSpec& r);
+bool operator!=(const FontSpec& l, const FontSpec& r);
+
+class TextPageFont: public TextPage {
+public:
+  TextPageFont(TextOutputControl *controlA);
+
+  std::map<unsigned int, NamedFontSpec> getFontSpecs() {
+    std::map<unsigned int, NamedFontSpec> result;
+
+    std::map<unsigned int, std::string> fontNames;
+    std::map<unsigned int, std::string> fontTypes;
+
+    for (auto pair : fontNameIds) {
+      fontNames[pair.second] = pair.first;
+    }
+
+    for (auto pair : fontTypeIds) {
+      fontTypes[pair.second] = pair.first;
+    }
+
+    for (auto pair : fontSpecIds) {
+      result[pair.second] = (NamedFontSpec) {
+        fontNames[pair.first.fontNameId],
+        fontTypes[pair.first.fontTypeId],
+        pair.first.fontSize
+      };
+    }
+
+    return result;
+  }
+
+protected:
+  TextChar *textCharType(Unicode cA, int charPosA, int charLenA,
+    double xMinA, double yMinA, double xMaxA, double yMaxA,
+    int rotA, GBool rotatedA, GBool clippedA, GBool invisibleA,
+    TextFontInfo *fontA, double fontSizeA,
+    double colorRA, double colorGA, double colorBA) override;
+  
+  void encodeFragment(Unicode *text, int len, UnicodeMap *uMap,
+    GBool primaryLR, GString *s) override;
+  
+  void computeLinePhysWidth(TextLine *line, UnicodeMap *uMap) override;
+private:
+  std::map<std::string, unsigned int> fontNameIds;
+  std::map<std::string, unsigned int> fontTypeIds;
+  std::map<FontSpec, unsigned int> fontSpecIds;
+
+  std::queue<unsigned int> availableIds;
+
+  char space[8], eol[16];
+  int spaceLen, eolLen;
+  UnicodeMap *uMap;
+};
+
+class FontOutputDev: public TextOutputDev {
+public:
+  FontOutputDev(TextOutputFunc func, void *stream,
+    TextOutputControl *controlA) : TextOutputDev(func, stream, controlA) {
+    delete text;
+    text = new TextPageFont(&control);
+  }
+
+  std::map<unsigned int, NamedFontSpec> getFontSpecs() {
+    return ((TextPageFont *)text)->getFontSpecs();
+  }
+};
+
+#endif
diff --git a/src/xpydf/PdfLoader.cc b/src/xpydf/PdfLoader.cc
index e3a248e..0fc766c 100644
--- a/src/xpydf/PdfLoader.cc
+++ b/src/xpydf/PdfLoader.cc
@@ -1,4 +1,6 @@
 #include <aconf.h>
+#include <map>
+#include <set>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
@@ -6,6 +8,8 @@
 #include <sstream>
 #include <vector>
 
+#include <math.h>
+
 #include "gmem.h"
 #include "gmempp.h"
 #include "parseargs.h"
@@ -28,13 +32,39 @@
 #include "config.h"
 #include "SplashOutputDev.h"
 #include "SplashBitmap.h"
+#include "Annot.h"
+#include "AcroForm.h"
 
 #include "PdfLoader.h"
+#include "FontOutputDev.h"
 #include "ImageDataDev.h"
 #include "ImageInfoDev.h"
 
+#define PAGES 1062
+#define PAGEE 1062
+
 
 static void outputToStringStream(void *stream, const char *text, int len) {
+  // fprintf(stderr, "Adding %d characters\n", len);
+  // if (len > 1)
+  //   fprintf(stderr, "(%.3d) %s\n", len, text);
+  ((std::stringstream *)stream)->write(text, len);
+}
+
+static void outputToStringStream2(void *stream, const char *text, int len) {
+  // fprintf(stderr, "Adding %d characters\n", len);
+  // if (len > 1) {
+  //   fprintf(stderr, "(%.3d) ", len);
+  //   for (int i = 0; i < len; i++) {
+  //     if (text[i] == ' ') {
+  //       fprintf(stderr, " ");
+  //     } else {
+  //       fprintf(stderr, "x");
+  //     }
+  //   }
+
+  //   fprintf(stderr, "\n");
+  // }
   ((std::stringstream *)stream)->write(text, len);
 }
 
@@ -48,6 +78,7 @@ PdfLoader::PdfLoader(LoaderConfig config, char *fileName, char *ownerPw, char *u
   globalParams->setErrQuiet(config.quiet);
   globalParams->setMapNumericCharNames(config.mapNumericCharNames);
   globalParams->setMapUnknownCharNames(config.mapUnknownCharNames);
+  globalParams->setReadUnicodeCMap(config.readUnicodeCMap);
   globalParams->setupBaseFonts(NULL);
 
   switch (config.mode) {
@@ -111,6 +142,8 @@ std::vector<std::string> PdfLoader::extractText() {
     goto err;
   }
 
+  firstPage = PAGES;
+  lastPage = PAGEE;
   firstPage = 1;
   lastPage = doc->getNumPages();
   
@@ -118,6 +151,7 @@ std::vector<std::string> PdfLoader::extractText() {
 
   if (textOut->isOk()) {
     for (int page = firstPage; page <= lastPage; page++) {
+      // fprintf(stderr, "Processing page %d\n", page);
       stream->str("");
       doc->displayPages(textOut, page, page, 72, 72, 0, gFalse, gTrue, gFalse);
       pages.push_back(stream->str());
@@ -134,6 +168,44 @@ std::vector<std::string> PdfLoader::extractText() {
   return pages;
 }
 
+std::vector<std::string> PdfLoader::extractFontMap(std::map<unsigned int, NamedFontSpec> &fontSpecs) {
+  FontOutputDev *fontOut;
+  std::stringstream *stream = new std::stringstream();
+  std::vector<std::string> pages;
+  int firstPage, lastPage;
+
+  if (!doc->isOk()) {
+    goto err;
+  }
+
+  firstPage = PAGES;
+  lastPage = PAGEE;
+  firstPage = 1;
+  lastPage = doc->getNumPages();
+  
+  fontOut = new FontOutputDev(&outputToStringStream2, stream, &textOutControl);
+
+  if (fontOut->isOk()) {
+    for (int page = firstPage; page <= lastPage; page++) {
+      // fprintf(stderr, "Processing page %d\n", page);
+      stream->str("");
+      doc->displayPages(fontOut, page, page, 72, 72, 0, gFalse, gTrue, gFalse);
+      pages.push_back(stream->str());
+    }
+  }
+
+  fontSpecs = fontOut->getFontSpecs();
+
+  delete fontOut;
+err:
+  delete stream;
+
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return pages;
+}
+
 std::vector<PageImageInfo> PdfLoader::extractPageInfo() {
   ImageInfoDev *imageOut;
   int firstPage, lastPage;
@@ -170,7 +242,7 @@ std::vector<PageImageInfo> PdfLoader::extractPageInfo() {
   }
 
   delete imageOut;
- err:
+err:
 
   Object::memCheck(stderr);
   gMemReport(stderr);
@@ -178,6 +250,266 @@ std::vector<PageImageInfo> PdfLoader::extractPageInfo() {
   return pagesInfo;
 }
 
+static Ref *fonts;
+static int fontsLen;
+static int fontsSize;
+
+static char *seenObjs;
+static int numObjects;
+
+void PdfLoader::scanFonts(Object *obj) {
+  Object obj2;
+
+  if (checkFontObject(obj, &obj2) && obj2.isDict()) {
+    scanFonts(obj2.getDict());
+  }
+  obj2.free();
+}
+
+void PdfLoader::scanFonts(Dict *resDict) {
+  Object fontDict1, fontDict2, xObjDict1, xObjDict2, xObj1, xObj2;
+  Object patternDict1, patternDict2, pattern1, pattern2;
+  Object gsDict1, gsDict2, gs1, gs2, smask1, smask2, smaskGroup1, smaskGroup2;
+  Object resObj;
+  Ref r;
+  GfxFontDict *gfxFontDict;
+  GfxFont *font;
+  int i;
+
+  // scan the fonts in this resource dictionary
+  gfxFontDict = NULL;
+  resDict->lookupNF("Font", &fontDict1);
+  if (checkFontObject(&fontDict1, &fontDict2) && fontDict2.isDict()) {
+    if (fontDict1.isRef()) {
+      r = fontDict1.getRef();
+      gfxFontDict = new GfxFontDict(doc->getXRef(), &r, fontDict2.getDict());
+    } else {
+      gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, fontDict2.getDict());
+    }
+    if (gfxFontDict) {
+      for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+        if ((font = gfxFontDict->getFont(i))) {
+          scanFont(font);
+        }
+      }
+      delete gfxFontDict;
+    }
+  }
+
+  fontDict2.free();
+  fontDict1.free();
+
+  // recursively scan any resource dictionaries in XObjects in this
+  // resource dictionary
+  resDict->lookupNF("XObject", &xObjDict1);
+  if (checkFontObject(&xObjDict1, &xObjDict2) && xObjDict2.isDict()) {
+    for (i = 0; i < xObjDict2.dictGetLength(); ++i) {
+      xObjDict2.dictGetValNF(i, &xObj1);
+      if (checkFontObject(&xObj1, &xObj2) && xObj2.isStream()) {
+        xObj2.streamGetDict()->lookupNF("Resources", &resObj);
+        scanFonts(&resObj);
+        resObj.free();
+      }
+      xObj2.free();
+      xObj1.free();
+    }
+  }
+  xObjDict2.free();
+  xObjDict1.free();
+
+  // recursively scan any resource dictionaries in Patterns in this
+  // resource dictionary
+  resDict->lookupNF("Pattern", &patternDict1);
+  if (checkFontObject(&patternDict1, &patternDict2) && patternDict2.isDict()) {
+    for (i = 0; i < patternDict2.dictGetLength(); ++i) {
+      patternDict2.dictGetValNF(i, &pattern1);
+      if (checkFontObject(&pattern1, &pattern2) && pattern2.isStream()) {
+        pattern2.streamGetDict()->lookupNF("Resources", &resObj);
+        scanFonts(&resObj);
+        resObj.free();
+      }
+      pattern2.free();
+      pattern1.free();
+    }
+  }
+  patternDict2.free();
+  patternDict1.free();
+
+  // recursively scan any resource dictionaries in ExtGStates in this
+  // resource dictionary
+  resDict->lookupNF("ExtGState", &gsDict1);
+  if (checkFontObject(&gsDict1, &gsDict2) && gsDict2.isDict()) {
+    for (i = 0; i < gsDict2.dictGetLength(); ++i) {
+      gsDict2.dictGetValNF(i, &gs1);
+      if (checkFontObject(&gs1, &gs2) && gs2.isDict()) {
+        gs2.dictLookupNF("SMask", &smask1);
+        if (checkFontObject(&smask1, &smask2) && smask2.isDict()) {
+          smask2.dictLookupNF("G", &smaskGroup1);
+          if (checkFontObject(&smaskGroup1, &smaskGroup2) &&
+              smaskGroup2.isStream()) {
+            smaskGroup2.streamGetDict()->lookupNF("Resources", &resObj);
+            scanFonts(&resObj);
+            resObj.free();
+          }
+          smaskGroup2.free();
+          smaskGroup1.free();
+        }
+        smask2.free();
+        smask1.free();
+      }
+      gs2.free();
+      gs1.free();
+    }
+  }
+  gsDict2.free();
+  gsDict1.free();
+}
+
+std::map<std::string, std::set<std::string>> fontDict;
+
+void PdfLoader::scanFont(GfxFont *font) {
+  Ref fontRef;
+  Object fontObj, toUnicodeObj;
+  GString *name;
+  int i;
+
+  fontRef = *font->getID();
+
+  // check for an already-seen font
+  for (i = 0; i < fontsLen; ++i) {
+    if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) {
+      return;
+    }
+  }
+
+  // font name
+  name = font->getName();
+
+  // print the font info
+  if (name) {
+    char fontCode[1000], fontName[1000], fontType[1000];
+
+    if (sscanf(name->getCString(), "%[^+]+%[^-]-%s", fontCode, fontName, fontType) != EOF) {
+      if (fontDict.find(fontName) == fontDict.end()) {
+        fontDict[fontName] = std::set<std::string>();
+      }
+
+      fontDict[fontName].insert(fontType);
+    }
+  }
+
+  // add this font to the list
+  if (fontsLen == fontsSize) {
+    if (fontsSize <= INT_MAX - 32) {
+      fontsSize += 32;
+    } else {
+      // let greallocn throw an exception
+      fontsSize = -1;
+    }
+    fonts = (Ref *)greallocn(fonts, fontsSize, sizeof(Ref));
+  }
+  fonts[fontsLen++] = *font->getID();
+}
+
+GBool PdfLoader::checkFontObject(Object *in, Object *out) {
+  int objNum;
+
+  if (!in->isRef()) {
+    in->copy(out);
+    return gTrue;
+  }
+  objNum = in->getRefNum();
+  if (objNum < 0 || objNum >= numObjects) {
+    out->initNull();
+    return gTrue;
+  }
+  if (seenObjs[objNum]) {
+    out->initNull();
+    return gFalse;
+  }
+  seenObjs[objNum] = (char)1;
+  in->fetch(doc->getXRef(), out);
+  return gTrue;
+}
+
+std::vector<std::string> PdfLoader::extractFonts() {
+  int firstPage, lastPage;
+  std::vector<std::string> fontInfo;
+
+  Dict *resDict;
+  Annots *annots;
+  AcroForm *form;
+  Object obj1, obj2;
+
+  if (!doc->isOk()) {
+    goto err;
+  }
+
+  firstPage = 1;
+  lastPage = doc->getNumPages();
+
+  fonts = NULL;
+  fontsLen = fontsSize = 0;
+  numObjects = doc->getXRef()->getNumObjects();
+  seenObjs = (char *)gmalloc(numObjects);
+  memset(seenObjs, 0, numObjects);
+
+  for (int page = firstPage; page <= lastPage; page++) {
+    Page *pdfPage = doc->getCatalog()->getPage(page);
+
+    if ((resDict = pdfPage->getResourceDict())) {
+      // fprintf(stderr, "scanFonts\n");
+      scanFonts(resDict);
+    }
+
+    annots = new Annots(doc, pdfPage->getAnnots(&obj1));
+    obj1.free();
+
+    for (int i = 0; i < annots->getNumAnnots(); i++) {
+      if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+        obj1.streamGetDict()->lookupNF("Resources", &obj2);
+        scanFonts(&obj2);
+        obj2.free();
+      }
+      obj1.free();
+    }
+    delete annots;
+  }
+  if ((form = doc->getCatalog()->getForm())) {
+    for (int i = 0; i < form->getNumFields(); ++i) {
+      form->getField(i)->getResources(&obj1);
+      if (obj1.isArray()) {
+        for (int j = 0; j < obj1.arrayGetLength(); ++j) {
+          obj1.arrayGetNF(j, &obj2);
+          scanFonts(&obj2);
+          obj2.free();
+        }
+      } else if (obj1.isDict()) {
+        scanFonts(obj1.getDict());
+      }
+      obj1.free();
+    }
+  }
+
+  for (auto pair : fontDict) {
+    fprintf(stderr, "%s has types:\n", pair.first.c_str());
+    for (auto ft : pair.second) {
+      fprintf(stderr, " - %s\n", ft.c_str());
+    }
+  }
+
+  fprintf(stderr, "Found %d fonts\n", fontsSize);
+
+  gfree(fonts);
+  gfree(seenObjs);
+
+err:
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return fontInfo;
+}
+
 std::vector<Image> PdfLoader::extractImages(int pageNum) {
   ImageDataDev *imageOut;
   std::vector<Image> images;
@@ -237,7 +569,7 @@ Image PdfLoader::pageToImage(int pageNum, int dpi) {
   memcpy(pageImage.data, bitmap->getDataPtr(), pageImage.size);
 
   delete splashOut;
- err:
+err:
 
   Object::memCheck(stderr);
   gMemReport(stderr);
@@ -255,3 +587,35 @@ bool PdfLoader::isOk() {
 int PdfLoader::getErrorCode() {
   return (int)doc->getErrorCode();
 }
+
+#include <iostream>
+
+using namespace std;
+
+int main() {
+  LoaderConfig config;
+  map<unsigned int, NamedFontSpec> fontSpecs;
+
+  PdfLoader *l = new PdfLoader(config, "skf.pdf");
+  vector<string> pageText = l->extractText();
+  vector<string> fontMap = l->extractFontMap(fontSpecs);
+
+  // fprintf(stderr, "Read %lu pages text, %lu pages fontmap\n", pageText.size(), fontMap.size());
+
+  int diff = 0;
+
+  for (int i = 0; i < pageText.size(); i++) {
+    diff += fabs((int)pageText[i].length() - (int)fontMap[i].length());
+    if (pageText[i].length() != fontMap[i].length()) {
+      fprintf(stderr, "Page %d mismatch: %lu text, %lu font\n", i, pageText[i].length(), fontMap[i].length());
+    }
+  }
+
+  fprintf(stderr, "Total diff %d\n", diff);
+
+  // cerr << pageText[0] << endl;
+
+  // for (auto pair : fontSpecs) {
+  //   fprintf(stderr, "Font id %d had name '%s', type '%s', size %d\n", pair.first, pair.second.fontName.c_str(), pair.second.fontType.c_str(), pair.second.fontSize);
+  // }
+}
diff --git a/src/xpydf/PdfLoader.h b/src/xpydf/PdfLoader.h
index ac583e6..e898dd3 100644
--- a/src/xpydf/PdfLoader.h
+++ b/src/xpydf/PdfLoader.h
@@ -9,6 +9,7 @@
 #include "PDFDoc.h"
 #include "TextOutputDev.h"
 
+#include "FontOutputDev.h"
 #include "ImageDataDev.h"
 #include "ImageInfoDev.h"
 
@@ -22,6 +23,7 @@ typedef struct LoaderConfig {
   GBool quiet = gTrue;
   GBool mapNumericCharNames = gFalse;
   GBool mapUnknownCharNames = gTrue;
+  GBool readUnicodeCMap = gTrue;
   unsigned int mode = 0;
 } LoaderConfig;
 
@@ -36,7 +38,9 @@ class PdfLoader {
     PdfLoader(LoaderConfig config, char *fileName, char *ownerPw = NULL, char *userPw = NULL);
     ~PdfLoader();
     std::vector<std::string> extractText();
+    std::vector<std::string> extractFontMap(std::map<unsigned int, NamedFontSpec> &fontSpecs);
     std::vector<PageImageInfo> extractPageInfo();
+    std::vector<std::string> extractFonts();
     std::vector<Image> extractImages(int pageNum);
     Image pageToImage(int pageNum, int dpi);
     bool isOk();
@@ -45,6 +49,11 @@ class PdfLoader {
   TextOutputControl textOutControl;
   PDFDoc *doc;
   GString *textFileName;
+  
+  GBool checkFontObject(Object *in, Object *out);
+  void scanFont(GfxFont *font);
+  void scanFonts(Object *obj);
+  void scanFonts(Dict *resDict);
 };
 
 #endif
diff --git a/src/xpydf/PdfLoaderWrapper.cc b/src/xpydf/PdfLoaderWrapper.cc
index c1f3bf2..06b8ceb 100644
--- a/src/xpydf/PdfLoaderWrapper.cc
+++ b/src/xpydf/PdfLoaderWrapper.cc
@@ -23,15 +23,16 @@ PyObject *construct(PyObject *self, PyObject *args) {
     char *ownerPw = NULL;
     char *userPw = NULL;
 
-    PyArg_ParseTuple(args, "Opppppppbzz", &pobj0,
+    PyArg_ParseTuple(args, "OppppppppIzz", &pobj0,
         &(config.clipText),
         &(config.discardDiag),
         &(config.discardRotatedText),
         &(config.verbose),
         &(config.quiet),
-        &(config.mode),
         &(config.mapNumericCharNames),
         &(config.mapUnknownCharNames),
+        &(config.readUnicodeCMap),
+        &(config.mode),
         &ownerPw,
         &userPw
     );
@@ -112,6 +113,22 @@ PyObject *extractText(PyObject *self, PyObject *args) {
     return Py_BuildValue("O", converted);
 }
 
+PyObject *extractFontMap(PyObject *self, PyObject *args) {
+    vector<string> res;
+    
+    PyObject *loaderCapsule;
+    PyArg_ParseTuple(args, "O", &loaderCapsule);
+
+    PdfLoader *loader = (PdfLoader *)PyCapsule_GetPointer(loaderCapsule, "loaderPtr");
+    map<unsigned int, NamedFontSpec> fontSpecs;
+    vector<string> result = loader->extractFontMap(fontSpecs);
+    
+    PyObject *fontMap = vectorStringToList(result);
+    PyObject *fontDict = mapFontSpecsToDict(fontSpecs);
+
+    return Py_BuildValue("OO", fontMap, fontDict);
+}
+
 PyObject *extractPageInfo(PyObject *self, PyObject *args) {
     vector<string> res;
     
@@ -125,6 +142,19 @@ PyObject *extractPageInfo(PyObject *self, PyObject *args) {
     return Py_BuildValue("O", converted);
 }
 
+PyObject *extractFonts(PyObject *self, PyObject *args) {
+    vector<string> res;
+    
+    PyObject *loaderCapsule;
+    PyArg_ParseTuple(args, "O", &loaderCapsule);
+
+    PdfLoader *loader = (PdfLoader *)PyCapsule_GetPointer(loaderCapsule, "loaderPtr");
+    vector<string> result = loader->extractFonts();
+    
+    PyObject *converted = vectorStringToList(result);
+    return Py_BuildValue("O", converted);
+}
+
 PyObject *extractImages(PyObject *self, PyObject *args) {
     vector<string> res;
     
@@ -210,10 +240,18 @@ PyMethodDef cXpdfPythonFunctions[] = {
       extractText, METH_VARARGS,
      "Extract text as bytes"},
     
+    {"extractFontMap",
+      extractFontMap, METH_VARARGS,
+     "Extract font map as bytes"},
+    
     {"extractPageInfo",
       extractPageInfo, METH_VARARGS,
      "Extract image metadata"},
     
+    {"extractFonts",
+      extractFonts, METH_VARARGS,
+     "Extract font metadata"},
+    
     {"extractImages",
       extractImages, METH_VARARGS,
      "Extract images"},
diff --git a/src/xpydf/PyCppConversion.cc b/src/xpydf/PyCppConversion.cc
index 540d6b4..3e824a7 100644
--- a/src/xpydf/PyCppConversion.cc
+++ b/src/xpydf/PyCppConversion.cc
@@ -64,3 +64,25 @@ PyObject *vectorImagesToList(const std::vector<ImageInfo> &data) {
 
     return listObj;
 }
+
+PyObject *mapFontSpecsToDict(const std::map<unsigned int, NamedFontSpec> &data) {
+    PyObject *dict = PyDict_New();
+    if (!dict) throw logic_error("Unable to allocate memory for Python dict");
+
+    for (auto pair : data) {
+        PyObject *item = PyDict_New();
+        if (!item) throw logic_error("Unable to allocate memory for Python dict");
+
+        PyObject *id = PyLong_FromLong(pair.first);
+        PyObject *name = PyUnicode_FromString(pair.second.fontName.c_str());
+        PyObject *type = PyUnicode_FromString(pair.second.fontType.c_str());
+
+        PyDict_SetItemString(item, "name", name);
+        PyDict_SetItemString(item, "type", type);
+        PyDict_SetItemString(item, "size", PyLong_FromLong(pair.second.fontSize));
+
+        PyDict_SetItem(dict, id, item);
+    }
+
+    return dict;
+}
diff --git a/src/xpydf/PyCppConversion.h b/src/xpydf/PyCppConversion.h
index 169523c..6e6f48c 100644
--- a/src/xpydf/PyCppConversion.h
+++ b/src/xpydf/PyCppConversion.h
@@ -1,6 +1,7 @@
 #ifndef PY_CPP_CONVERSION_H
 #define PY_CPP_CONVERSION_H
 
+#include <map>
 #include <string>
 #include <vector>
 
@@ -16,4 +17,6 @@ PyObject *vectorPagesToList(const std::vector<PageImageInfo> &data);
 
 PyObject *vectorImagesToList(const std::vector<ImageInfo> &data);
 
+PyObject *mapFontSpecsToDict(const std::map<unsigned int, NamedFontSpec> &data);
+
 #endif
\ No newline at end of file
diff --git a/src/xpydf/cXpdfPython.pyi b/src/xpydf/cXpdfPython.pyi
index a2766d7..0eccc99 100644
--- a/src/xpydf/cXpdfPython.pyi
+++ b/src/xpydf/cXpdfPython.pyi
@@ -1,11 +1,16 @@
 
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Tuple, TypedDict
 import numpy.typing as npt
 
 from xpydf.pdf_loader import PageInfo
 
 class XpdfPythonCapsule: ...
 
+class Font(TypedDict):
+    name: str
+    type: str
+    size: str
+
 def construct(
     filename: str,
     cliptext: bool,
@@ -13,14 +18,17 @@ def construct(
     discard_rotated_text: bool,
     verbose: bool,
     quiet: bool,
-    mode: int,
     mapNumericCharNames: bool = False,
     mapUnknownCharNames: bool = True,
+    readUnicodeCMap: bool = True,
+    mode: int = 0,
     ownerPw: Optional[str] = None,
     userPw: Optional[str] = None,
 ) -> XpdfPythonCapsule: ...
 def extractText(capsule: XpdfPythonCapsule) -> List[bytes]: ...
+def extractFontMap(capsule: XpdfPythonCapsule) -> Tuple[List[bytes], Dict[int, Font]]: ...
 def extractPageInfo(capsule: XpdfPythonCapsule) -> List[PageInfo]: ...
+def extractFonts(capsule: XpdfPythonCapsule) -> List[str]: ...
 def extractImages(capsule: XpdfPythonCapsule, page_number: int) -> List[npt.NDArray[Any]]: ...
 def pageToImage(capsule: XpdfPythonCapsule, page_number: int, dpi: int) -> npt.NDArray[Any]: ...
 def deleteObject(capsule: XpdfPythonCapsule) -> None: ...
diff --git a/src/xpydf/pdf_loader.py b/src/xpydf/pdf_loader.py
index 76c9748..f6a8a2e 100644
--- a/src/xpydf/pdf_loader.py
+++ b/src/xpydf/pdf_loader.py
@@ -1,8 +1,12 @@
-from typing import Any, List, Optional, TypedDict
+from typing import Any, Dict, List, Optional, Tuple, TypedDict
 
 import cXpdfPython
 import numpy.typing as npt
 
+class Font(TypedDict):
+    name: str
+    type: str
+    size: str
 
 class ImageInfo(TypedDict):
     """Container for image metadata
@@ -60,9 +64,10 @@ def __init__(
         discard_rotated_text: bool = True,
         verbose: bool = False,
         quiet: bool = True,
-        mode: str = "table",
         map_numeric_char_names: bool = False,
         map_unknown_char_names: bool = True,
+        read_unicode_cmap: bool = True,
+        mode: str = "table",
         owner_password: Optional[str] = None,
         user_password: Optional[str] = None,
     ):
@@ -105,7 +110,18 @@ def __init__(
 
         self.filename = filename
         self.capsule = cXpdfPython.construct(
-            filename, cliptext, discard_diag, discard_rotated_text, verbose, quiet, xpdf_mode, map_numeric_char_names, map_unknown_char_names, owner_password, user_password
+            filename,
+            cliptext,
+            discard_diag,
+            discard_rotated_text,
+            verbose,
+            quiet,
+            map_numeric_char_names,
+            map_unknown_char_names,
+            read_unicode_cmap,
+            xpdf_mode,
+            owner_password,
+            user_password
         )
 
     def extract_bytes(self) -> List[bytes]:
@@ -121,6 +137,12 @@ def extract_bytes(self) -> List[bytes]:
             pages = cXpdfPython.extractText(self.capsule)
 
         return pages
+    
+    def extract_font_map(self) -> Tuple[List[bytes], Dict[int, Font]]:
+        if self.capsule is not None:
+            pages, fonts = cXpdfPython.extractFontMap(self.capsule)
+
+        return pages, fonts
 
     def extract_strings(self) -> List[str]:
         """Extract and decode text from the pdf
@@ -147,6 +169,20 @@ def extract_page_info(self) -> List[PageInfo]:
 
         return images
 
+    def extract_fonts(self) -> List[str]:
+        """Return image related metadata from the pdf
+
+        Returns
+        -------
+        List[PageInfo]
+            A PageInfo object for each page
+        """
+        images: List[str] = []
+        if self.capsule is not None:
+            images = cXpdfPython.extractFonts(self.capsule)
+
+        return images
+
     def extract_images(self, page_number: int) -> List[npt.NDArray[Any]]:
         """Extract raw image data from a page, as a numpy array.
 
diff --git a/src/xpydf/pdf_loader.pyi b/src/xpydf/pdf_loader.pyi
index ec3c7c4..c594491 100644
--- a/src/xpydf/pdf_loader.pyi
+++ b/src/xpydf/pdf_loader.pyi
@@ -1,4 +1,4 @@
-from typing import Any, List, Optional, TypedDict
+from typing import Any, Dict, List, Optional, Tuple, TypedDict
 
 import numpy.typing as npt
 
@@ -14,6 +14,11 @@ class PageInfo(TypedDict):
     height: float
     images: List[ImageInfo]
 
+class Font(TypedDict):
+    name: str
+    type: str
+    size: str
+
 class PdfLoader:
     filename: str
     capsule: Optional[XpdfPythonCapsule] = None
@@ -28,15 +33,18 @@ class PdfLoader:
         insert_bom: bool = False,
         verbose: bool = False,
         quiet: bool = True,
-        mode: str = "table",
         map_numeric_char_names: bool = False,
         map_unknown_char_names: bool = True,
+        read_unicode_cmap: bool = True,
+        mode: str = "table",
         owner_password: Optional[str] = None,
         user_password: Optional[str] = None,
     ) -> None: ...
     def extract_bytes(self) -> List[bytes]: ...
+    def extract_font_map(self) -> Tuple[List[bytes], Dict[int, Font]]: ...
     def extract_strings(self) -> List[str]: ...
     def extract_page_info(self) -> List[PageInfo]: ...
+    def extract_fonts(self) -> List[str]: ...
     def extract_images(self, page_number: int) -> List[npt.NDArray[Any]]: ...
     def page_to_image(self, page_number: int, dpi: int = 150) -> npt.NDArray[Any]: ...
     def __del__(self) -> None: ...