-
Notifications
You must be signed in to change notification settings - Fork 25
examples.md
Kees edited this page Jan 19, 2022
·
11 revisions
- You need trained data in
tessdata
by language - You can get them at https://github.com/tesseract-ocr/tessdata or https://github.com/tesseract-ocr/tessdata_fast
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (var page = engine.Process(img))
{
var text = page.GetText();
Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
Console.WriteLine("Text (GetText): \r\n{0}", text);
Console.WriteLine("Text (iterator):");
}
}
}
using (var fs = new FileStream(filename, FileMode.Open, file_access)) ;
using (var ms = new MemoryStream())
{
fs.CopyTo(ms);
bytes[] fileBytes = ms.ToArray();
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromMemory(fileBytes))
{
using (var page = engine.Process(img))
{
var txt = page.GetText();
}
}
}
}
using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(@"test.pdf", @"./tessdata", false))
{
// PDF Title
using (renderer.BeginDocument("Serachablepdftest"))
{
string configurationFilePath = @"C:\tessdata";
using (TesseractEngine engine = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
{
using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
{
using (var page = engine.Process(img, "Serachablepdftest"))
{
renderer.AddPage(page);
}
}
}
}
}
var tmpPdfLocation = "./tessdata/pdf";
var sep = Path.PathSeparator;
var tmpFile = tmpPdfLocation + sep + Path.GetTempFileName();
var fileBytes = null;
using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(tmpFile, @"./tessdata", false))
using (renderer.BeginDocument("Serachablepdftest"))
{
// string configurationFilePath = @"C:\tessdata";
using (TesseractEngine engine2 = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
{
using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
{
using (var page = engine.Process(img, "Searchablepdftest"))
{
renderer.AddPage(page);
}
}
}
}
// on dispose file should be created
using(var stream = new FileStream(tmpFile, FileMode.Open, FileAccess.Read))
using(var ms = new MemoryStream())
{
stream.CopyTo(ms);
fileBytes = ms.ToArray();
}
// delete tmp file
File.Delete(tmpFile);