Skip to content

examples.md

Kees edited this page Jan 19, 2022 · 11 revisions

Examples

Notes

Basic Text from Image from filepath

using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
    using (var img = Pix.LoadFromFile(testImagePath))
    {
        using (var page = engine.Process(img))
        {
            var text = page.GetText();
            Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
            Console.WriteLine("Text (GetText): \r\n{0}", text);
            Console.WriteLine("Text (iterator):");
        }
    }
}

Basic Text from Image bytes

using (var fs = new FileStream(filename, FileMode.Open, file_access)) ;
using (var ms = new MemoryStream())
{
    fs.CopyTo(ms);
    bytes[] fileBytes = ms.ToArray();
    using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
    {
        using (var img = Pix.LoadFromMemory(fileBytes))
        {
            using (var page = engine.Process(img))
            {
                var txt = page.GetText();
            }
        }
    }
}

Image to txt searchable pdf using paths

using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(@"test.pdf", @"./tessdata", false))
{
    // PDF Title
    using (renderer.BeginDocument("Serachablepdftest"))
    {
        string configurationFilePath = @"C:\tessdata";
        using (TesseractEngine engine = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
        {
            using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
            {
                using (var page = engine.Process(img, "Serachablepdftest"))
                {
                    renderer.AddPage(page);
                }
            }
        }
    }
}

Image to pdf returning file bytes

var tmpPdfLocation = "./tessdata/pdf";
var sep = Path.PathSeparator;
var tmpFile = tmpPdfLocation + sep + Path.GetTempFileName();
var fileBytes = null;

using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(tmpFile, @"./tessdata", false))
using (renderer.BeginDocument("Serachablepdftest"))
{
    // string configurationFilePath = @"C:\tessdata";
    using (TesseractEngine engine2 = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
    {
        using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
        {
            using (var page = engine.Process(img, "Searchablepdftest"))
            {
                renderer.AddPage(page);
            }
        }
    }

}
// on dispose file should be created
using(var stream = new FileStream(tmpFile, FileMode.Open, FileAccess.Read))
using(var ms = new MemoryStream())
{
    stream.CopyTo(ms);
    fileBytes = ms.ToArray();
}
// delete tmp file
File.Delete(tmpFile);
Clone this wiki locally