Skip to content

examples.md

Kees edited this page Jan 19, 2022 · 11 revisions

Examples

Notes

Basic Text from Image from filepath

using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
     using (var img = Pix.LoadFromFile(testImagePath))
     {
         using (var page = engine.Process(img))
         {
             var text = page.GetText();
             Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
             Console.WriteLine("Text (GetText): \r\n{0}", text);
             Console.WriteLine("Text (iterator):");
         }
    }
}

Basic Text from Image bytes

FileStream fs = new FileStream(filename, FileMode.Open, file_access);
var ms = new MemoryStream();
fs.CopyTo(ms);
fs.Close();
bytes[] fileBytes = ms.ToArray();
ms.Close();
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
	using (var img = Pix.LoadFromMemory(fileBytes))
	{
		using (var page = engine.Process(img))
		{
			var txt = page.GetText();
		}
	}
}

Image to txt searchable pdf using paths

using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(@"test.pdf", @"./tessdata", false))
{
	// PDF Title
	using (renderer.BeginDocument("Serachablepdftest"))
	{
		string configurationFilePath = @"C:\tessdata";
		using (TesseractEngine engine = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
		{
			using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
			{
				using (var page = engine.Process(img, "Serachablepdftest"))
				{
					renderer.AddPage(page);
				}
			}
		}
	}
}

Image to pdf returning file bytes

var tmpPdfLocation = "./tessdata/pdf";
var sep = Path.PathSeparator;
var tmpFile = tmpPdfLocation + sep + Path.GetTempFileName();
bytes[] fileBytes = null;
using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(tmpFile, @"./tessdata", false))
{
	// PDF Title
	using (renderer.BeginDocument("Serachablepdftest"))
	{
		// string configurationFilePath = @"C:\tessdata";
		using (TesseractEngine engine2 = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm))
		{
			using (var img = Pix.LoadFromFile(@"C:\file-page1.jpg"))
			{
				using (var page = engine.Process(img, "Searchablepdftest"))
				{
					renderer.AddPage(page);
				}
			}
		}
	   
	}

}
// on dispose file should be created
var stream = new FileStream(tmpFile, FileMode.Open, FileAccess.Read);
MemoryStream ms = new MemoryStream();
stream.CopyTo(ms);
fileBytes = ms.ToArray();
stream.Dispose();
ms.Close();
// delete tmp file
File.Delete(tmpFile);
Clone this wiki locally