ngx-scanner-text

This library is built to support optical character recognition (OCR) from images provided as urls.
The core is based on Tesseract, supporting over 100 national languages worldwide.
This demo codesandbox, github.

Installation ⚙️

npm install ngx-scanner-text@<version> --save

Usage 💉

import { NgxScannerTextModule } from "ngx-scanner-text";

@NgModule({
    imports: [NgxScannerTextModule],
})
export class AppModule {}

AppComponent 🛠️

<ngx-scanner-text #scanner="scanner" [configs]="configs" (event)="onData($event)"></ngx-scanner-text>
<button (click)="onScanOCR(scanner)">Scan</button>

<textarea>{{ scanner?.logger$ | async | json }}</textarea>
<textarea>{{ text }}</textarea>

import { ChangeDetectorRef, Component } from "@angular/core";
import { NgxScannerTextComponent, Configs, Page } from "ngx-scanner-text";

@Component({
  selector: "app-root",
  templateUrl: "./app.component.html",
  styleUrls: ["./app.component.scss"]
})
export class AppComponent {
  public text: string;

  public configs: Configs = {
    src: 'https://raw.githubusercontent.com/id1945/ngx-scanner-text/master/ngx-scanner-text-origin.png',
    languages: ['eng'],
    color: 'red',
    isAuto: true,
    isImage: false,
    options: {
      rectangle: {
        left: 70,
        top: 100,
        width: 700,
        height: 200
      }
    }
  };

  constructor(private cdf: ChangeDetectorRef) {}

  onData(data: Page) {
    this.text = data.text;
    this.cdf.detectChanges();
  }

  onScanOCR(scanner: NgxScannerTextComponent) {
    scanner.scanOCR(this.configs).subscribe(console.log);
  }
}

Models 📡

export interface Configs {
  src: string;
  color: string;
  isAuto: boolean;
  isImage: boolean;
  languages: string[];
  jobId?: string;
  output?: Partial<OutputFormats>;
  options?: Partial<RecognizeOptions>;
};

export interface Scheduler {
  addWorker(worker: Worker): string;
  addJob(
    action: 'recognize',
    ...args: Parameters<Worker['recognize']>
  ): Promise<RecognizeResult>;
  addJob(
    action: 'detect',
    ...args: Parameters<Worker['detect']>
  ): Promise<DetectResult>;
  terminate(): Promise<any>;
  getQueueLen(): number;
  getNumWorkers(): number;
}

export interface Worker {
  load(jobId?: string): Promise<ConfigResult>;
  writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>;
  readText(path: string, jobId?: string): Promise<ConfigResult>;
  removeText(path: string, jobId?: string): Promise<ConfigResult>;
  FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>;
  loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult>;
  initialize(
    langs?: string | Lang[],
    oem?: OEM,
    config?: string | Partial<InitOptions>,
    jobId?: string
  ): Promise<ConfigResult>;
  setParameters(
    params: Partial<WorkerParams>,
    jobId?: string
  ): Promise<ConfigResult>;
  getImage(type: imageType): string;
  recognize(
    image: ImageLike,
    options?: Partial<RecognizeOptions>,
    output?: Partial<OutputFormats>,
    jobId?: string
  ): Promise<RecognizeResult>;
  detect(image: ImageLike, jobId?: string): Promise<DetectResult>;
  terminate(jobId?: string): Promise<ConfigResult>;
  getPDF(
    title?: string,
    textonly?: boolean,
    jobId?: string
  ): Promise<GetPDFResult>;
}

export interface Lang {
  code: string;
  data: unknown;
}

export interface InitOptions {
  load_system_dawg: string;
  load_freq_dawg: string;
  load_unambig_dawg: string;
  load_punc_dawg: string;
  load_number_dawg: string;
  load_bigram_dawg: string;
}

export type LoggerMessage = {
  jobId: string;
  progress: number;
  status: string;
  userJobId: string;
  workerId: string;
};

export interface WorkerOptions {
  corePath: string;
  langPath: string;
  cachePath: string;
  dataPath: string;
  workerPath: string;
  cacheMethod: string;
  workerBlobURL: boolean;
  gzip: boolean;
  logger: (arg: LoggerMessage) => void;
  errorHandler: (arg: any) => void;
}

export interface WorkerParams {
  tessedit_ocr_engine_mode: OEM;
  tessedit_pageseg_mode: PSM;
  tessedit_char_whitelist: string;
  preserve_interword_spaces: string;
  user_defined_dpi: string;
  tessjs_create_hocr: string;
  tessjs_create_tsv: string;
  tessjs_create_box: string;
  tessjs_create_unlv: string;
  tessjs_create_osd: string;
}

export interface OutputFormats {
  text: boolean;
  blocks: boolean;
  layoutBlocks: boolean;
  hocr: boolean;
  tsv: boolean;
  box: boolean;
  unlv: boolean;
  osd: boolean;
  pdf: boolean;
  imageColor: boolean;
  imageGrey: boolean;
  imageBinary: boolean;
  debug: boolean;
}

export interface RecognizeOptions {
  rectangle: Rectangle;
  pdfTitle: string;
  pdfTextOnly: boolean;
  rotateAuto: boolean;
  rotateRadians: number;
}

export interface ConfigResult {
  jobId: string;
  data: any;
}

export interface RecognizeResult {
  jobId: string;
  data: Page;
}

export interface GetPDFResult {
  jobId: string;
  data: number[];
}

export interface DetectResult {
  jobId: string;
  data: DetectData;
}

export interface DetectData {
  tesseract_script_id: number | null;
  script: string | null;
  script_confidence: number | null;
  orientation_degrees: number | null;
  orientation_confidence: number | null;
}

export interface Rectangle {
  left: number;
  top: number;
  width: number;
  height: number;
}

export enum OEM {
  TESSERACT_ONLY,
  LSTM_ONLY,
  TESSERACT_LSTM_COMBINED,
  DEFAULT,
}

export enum PSM {
  OSD_ONLY = '0',
  AUTO_OSD = '1',
  AUTO_ONLY = '2',
  AUTO = '3',
  SINGLE_COLUMN = '4',
  SINGLE_BLOCK_VERT_TEXT = '5',
  SINGLE_BLOCK = '6',
  SINGLE_LINE = '7',
  SINGLE_WORD = '8',
  CIRCLE_WORD = '9',
  SINGLE_CHAR = '10',
  SPARSE_TEXT = '11',
  SPARSE_TEXT_OSD = '12',
  RAW_LINE = '13',
}

export const enum imageType {
  COLOR = 0,
  GREY = 1,
  BINARY = 2,
}

export type ImageLike =
  | string
  | HTMLImageElement
  | HTMLCanvasElement
  | HTMLVideoElement
  | CanvasRenderingContext2D
  | File
  | Blob
  | ImageData
  | any;

export interface Block {
  paragraphs: Paragraph[];
  text: string;
  confidence: number;
  baseline: Baseline;
  bbox: Bbox;
  blocktype: string;
  polygon: any;
  page: Page;
  lines: Line[];
  words: Word[];
  symbols: Symbol[];
}

export interface Baseline {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
  has_baseline: boolean;
}

export interface Bbox {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
}

export interface Line {
  words: Word[];
  text: string;
  confidence: number;
  baseline: Baseline;
  bbox: Bbox;
  paragraph: Paragraph;
  block: Block;
  page: Page;
  symbols: Symbol[];
}

export interface Paragraph {
  lines: Line[];
  text: string;
  confidence: number;
  baseline: Baseline;
  bbox: Bbox;
  is_ltr: boolean;
  block: Block;
  page: Page;
  words: Word[];
  symbols: Symbol[];
}

export interface Symbol {
  choices: Choice[];
  image: any;
  text: string;
  confidence: number;
  baseline: Baseline;
  bbox: Bbox;
  is_superscript: boolean;
  is_subscript: boolean;
  is_dropcap: boolean;
  word: Word;
  line: Line;
  paragraph: Paragraph;
  block: Block;
  page: Page;
}

export interface Choice {
  text: string;
  confidence: number;
}

export interface Word {
  symbols: Symbol[];
  choices: Choice[];
  text: string;
  confidence: number;
  baseline: Baseline;
  bbox: Bbox;
  is_numeric: boolean;
  in_dictionary: boolean;
  direction: string;
  language: string;
  is_bold: boolean;
  is_italic: boolean;
  is_underlined: boolean;
  is_monospace: boolean;
  is_serif: boolean;
  is_smallcaps: boolean;
  font_size: number;
  font_id: number;
  font_name: string;
  line: Line;
  paragraph: Paragraph;
  block: Block;
  page: Page;
}

export interface Page {
  blocks: Block[] | null;
  confidence: number;
  lines: Line[];
  oem: string;
  osd: string;
  paragraphs: Paragraph[];
  psm: string;
  symbols: Symbol[];
  text: string;
  version: string;
  words: Word[];
  hocr: string | null;
  tsv: string | null;
  box: string | null;
  unlv: string | null;
  sd: string | null;
  imageColor: string | null;
  imageGrey: string | null;
  imageBinary: string | null;
  rotateRadians: number | null;
  pdf: number[] | null;
}

Language support 📡

[
  {
    "name": "Afrikaans",
    "code": "afr",
    "dir": "ltr"
  },
  {
    "name": "Albanian",
    "code": "qi",
    "dir": "ltr"
  },
  {
    "name": "Amharic",
    "code": "amh",
    "dir": "rtl"
  },
  {
    "name": "Arabic",
    "code": "ara",
    "dir": "rtl"
  },
  {
    "name": "Armenian",
    "code": "hye",
    "dir": "ltr"
  },
  {
    "name": "Azerbaijani",
    "code": "aze",
    "dir": "ltr"
  },
  {
    "name": "Basque",
    "code": "eus",
    "dir": "ltr"
  },
  {
    "name": "Belarusian",
    "code": "bel",
    "dir": "ltr"
  },
  {
    "name": "Bengali",
    "code": "ben",
    "dir": "ltr"
  },
  {
    "name": "Bosnian",
    "code": "bos",
    "dir": "ltr"
  },
  {
    "name": "Bulgarian",
    "code": "bul",
    "dir": "ltr"
  },
  {
    "name": "Catalan",
    "code": "cat",
    "dir": "ltr"
  },
  {
    "name": "Cebuano",
    "code": "ceb",
    "dir": "ltr"
  },
  {
    "name": "Cherokee",
    "code": "chr",
    "dir": "ltr"
  },
  {
    "name": "Chinese (Simplified)",
    "code": "chi_sim",
    "dir": "ltr"
  },
  {
    "name": "Chinese (Traditional)",
    "code": "chi_tra",
    "dir": "ltr"
  },
  {
    "name": "Corsican",
    "code": "cos",
    "dir": "ltr"
  },
  {
    "name": "Croatian",
    "code": "hrv",
    "dir": "ltr"
  },
  {
    "name": "Czech",
    "code": "ces",
    "dir": "ltr"
  },
  {
    "name": "Danish",
    "code": "dan",
    "dir": "ltr"
  },
  {
    "name": "Dutch",
    "code": "nld",
    "dir": "ltr"
  },
  {
    "name": "English",
    "code": "eng",
    "dir": "ltr"
  },
  {
    "name": "Esperanto",
    "code": "epo",
    "dir": "ltr"
  },
  {
    "name": "Estonian",
    "code": "est",
    "dir": "ltr"
  },
  {
    "name": "Finnish",
    "code": "fin",
    "dir": "ltr"
  },
  {
    "name": "French",
    "code": "fra",
    "dir": "ltr"
  },
  {
    "name": "Frisian",
    "code": "fry",
    "dir": "ltr"
  },
  {
    "name": "Galician",
    "code": "glg",
    "dir": "ltr"
  },
  {
    "name": "Georgian",
    "code": "kat",
    "dir": "ltr"
  },
  {
    "name": "German",
    "code": "deu",
    "dir": "ltr"
  },
  {
    "name": "Greek",
    "code": "ell",
    "dir": "ltr"
  },
  {
    "name": "Gujarati",
    "code": "guj",
    "dir": "ltr"
  },
  {
    "name": "Haitian Creole",
    "code": "hat",
    "dir": "ltr"
  },
  {
    "name": "Hausa",
    "code": "hau",
    "dir": "rtl"
  },
  {
    "name": "Hebrew",
    "code": "heb",
    "dir": "rtl"
  },
  {
    "name": "Hindi",
    "code": "hin",
    "dir": "ltr"
  },
  {
    "name": "Hungarian",
    "code": "hun",
    "dir": "ltr"
  },
  {
    "name": "Icelandic",
    "code": "isl",
    "dir": "ltr"
  },
  {
    "name": "Igbo",
    "code": "ibo",
    "dir": "ltr"
  },
  {
    "name": "Indonesian",
    "code": "ind",
    "dir": "ltr"
  },
  {
    "name": "Irish",
    "code": "gle",
    "dir": "ltr"
  },
  {
    "name": "Italian",
    "code": "ita",
    "dir": "ltr"
  },
  {
    "name": "Japanese",
    "code": "jpn",
    "dir": "ltr"
  },
  {
    "name": "Javanese",
    "code": "jav",
    "dir": "ltr"
  },
  {
    "name": "Kannada",
    "code": "kan",
    "dir": "ltr"
  },
  {
    "name": "Kazakh",
    "code": "kaz",
    "dir": "ltr"
  },
  {
    "name": "Khmer",
    "code": "khm",
    "dir": "ltr"
  },
  {
    "name": "Kinyarwanda",
    "code": "kin",
    "dir": "ltr"
  },
  {
    "name": "Korean",
    "code": "kor",
    "dir": "ltr"
  },
  {
    "name": "Kurdish (Kurmanji)",
    "code": "kur_ara",
    "dir": "rtl"
  },
  {
    "name": "Kyrgyz",
    "code": "kir",
    "dir": "ltr"
  },
  {
    "name": "Lao",
    "code": "lao",
    "dir": "ltr"
  },
  {
    "name": "Latin",
    "code": "lat",
    "dir": "ltr"
  },
  {
    "name": "Latvian",
    "code": "lav",
    "dir": "ltr"
  },
  {
    "name": "Lithuanian",
    "code": "lit",
    "dir": "ltr"
  },
  {
    "name": "Luxembourgish",
    "code": "ltz",
    "dir": "ltr"
  },
  {
    "name": "Macedonian",
    "code": "kd",
    "dir": "ltr"
  },
  {
    "name": "Malagasy",
    "code": "lg",
    "dir": "ltr"
  },
  {
    "name": "Malay",
    "code": "sa",
    "dir": "ltr"
  },
  {
    "name": "Malayalam",
    "code": "al",
    "dir": "ltr"
  },
  {
    "name": "Maltese",
    "code": "lt",
    "dir": "ltr"
  },
  {
    "name": "Maori",
    "code": "i",
    "dir": "ltr"
  },
  {
    "name": "Marathi",
    "code": "ar",
    "dir": "ltr"
  },
  {
    "name": "Mongolian",
    "code": "on",
    "dir": "ltr"
  },
  {
    "name": "Myanmar (Burmese)",
    "code": "ya",
    "dir": "ltr"
  },
  {
    "name": "Nepali",
    "code": "nep",
    "dir": "ltr"
  },
  {
    "name": "Norwegian",
    "code": "nor",
    "dir": "ltr"
  },
  {
    "name": "Odia (Oriya)",
    "code": "ori",
    "dir": "ltr"
  },
  {
    "name": "Pashto",
    "code": "pus",
    "dir": "rtl"
  },
  {
    "name": "Persian",
    "code": "fas",
    "dir": "rtl"
  },
  {
    "name": "Polish",
    "code": "pol",
    "dir": "ltr"
  },
  {
    "name": "Portuguese",
    "code": "por",
    "dir": "ltr"
  },
  {
    "name": "Punjabi",
    "code": "pan",
    "dir": "ltr"
  },
  {
    "name": "Romanian",
    "code": "ron",
    "dir": "ltr"
  },
  {
    "name": "Russian",
    "code": "rus",
    "dir": "ltr"
  },
  {
    "name": "Samoan",
    "code": "mo",
    "dir": "ltr"
  },
  {
    "name": "Scots Gaelic",
    "code": "gla",
    "dir": "ltr"
  },
  {
    "name": "Serbian",
    "code": "rp",
    "dir": "ltr"
  },
  {
    "name": "Sesotho",
    "code": "ot",
    "dir": "ltr"
  },
  {
    "name": "Shona",
    "code": "na",
    "dir": "ltr"
  },
  {
    "name": "Sindhi",
    "code": "d",
    "dir": "rtl"
  },
  {
    "name": "Sinhala (Sinhalese)",
    "code": "in",
    "dir": "in"
  },
  {
    "name": "Slovak",
    "code": "k",
    "dir": "ltr"
  },
  {
    "name": "Slovenian",
    "code": "l",
    "dir": "ltr"
  },
  {
    "name": "Somali",
    "code": "o",
    "dir": "ltr"
  },
  {
    "name": "Spanish",
    "code": "pa",
    "dir": "ltr"
  },
  {
    "name": "Sundanese",
    "code": "u",
    "dir": "ltr"
  },
  {
    "name": "Swahili",
    "code": "wa",
    "dir": "ltr"
  },
  {
    "name": "Swedish",
    "code": "we",
    "dir": "ltr"
  },
  {
    "name": "Tajik",
    "code": "tg",
    "dir": "ltr"
  },
  {
    "name": "Tamil",
    "code": "ta",
    "dir": "ltr"
  },
  {
    "name": "Tatar",
    "code": "tt",
    "dir": "ltr"
  },
  {
    "name": "Telugu",
    "code": "te",
    "dir": "ltr"
  },
  {
    "name": "Thai",
    "code": "th",
    "dir": "ltr"
  },
  {
    "name": "Turkish",
    "code": "tur",
    "dir": "ltr"
  },
  {
    "name": "Ukrainian",
    "code": "ukr",
    "dir": "ltr"
  },
  {
    "name": "Urdu",
    "code": "urd",
    "dir": "rtl"
  },
  {
    "name": "Uzbek",
    "code": "uzb",
    "dir": "ltr"
  },
  {
    "name": "Vietnamese",
    "code": "vie",
    "dir": "ltr"
  },
  {
    "name": "Welsh",
    "code": "cym",
    "dir": "ltr"
  },
  {
    "name": "Xhosa",
    "code": "xho",
    "dir": "ltr"
  },
  {
    "name": "Yiddish",
    "code": "yi",
    "dir": "rtl"
  },
  {
    "name": "Yoruba",
    "code": "yo",
    "dir": "ltr"
  },
  {
    "name": "Zulu",
    "code": "zu",
    "dir": "ltr"
  }
]

Note that the `dir` property indicates the direction of the language,
where `ltr` means left-to-right and `rtl` means right-to-left.

API Documentation ⛑️

Input 🔌

Field	Description	Type	Default
[configs]	config	Configs	{}

Ouput 🔌

Field	Description	Type	Default
(event)	result data	BehaviorSubject	{}

Component exports 🔌

Field	Description	Type	Default
data$	result data	BehaviorSubject	{}
logger$	status	BehaviorSubject	{}
image$	actual photo size	BehaviorSubject	{width: 0, height: 0}

Support versions

Support versions
Angular 16	1.1.0
Angular 14	1.0.9

Author Information

Author Information
Author	DaiDH
Phone	+84845882882
Country	Vietnam

If you want donate for me! 💰

Bitcoin

Name		Name	Last commit message	Last commit date
Latest commit History 1 Commit
.github/workflows		.github/workflows
.vscode		.vscode
src		src
.editorconfig		.editorconfig
.gitignore		.gitignore
LICENSE		LICENSE
README.md		README.md
angular.json		angular.json
ngx-scanner-text-mark.png		ngx-scanner-text-mark.png
ngx-scanner-text-origin.png		ngx-scanner-text-origin.png
package-lock.json		package-lock.json
package.json		package.json
tsconfig.app.json		tsconfig.app.json
tsconfig.json		tsconfig.json
tsconfig.spec.json		tsconfig.spec.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

ngx-scanner-text

Installation ⚙️

Usage 💉

API Documentation ⛑️

Input 🔌

Ouput 🔌

Component exports 🔌

Support versions

Author Information

If you want donate for me! 💰

About

Packages

Languages

License

id1945/ngx-scanner-text

Folders and files

Latest commit

History

Repository files navigation

ngx-scanner-text

Installation ⚙️

Usage 💉

API Documentation ⛑️

Input 🔌

Ouput 🔌

Component exports 🔌

Support versions

Author Information

If you want donate for me! 💰

About

Topics

Resources

License

Stars

Watchers

Forks

Packages 0

Languages

Packages