Skip to content

Commit cdd7980

Browse files
committed
Revert "DocClassification samples"
This reverts commit 05314b0.
1 parent 05314b0 commit cdd7980

File tree

4 files changed

+1
-216
lines changed

4 files changed

+1
-216
lines changed

Samples/DataExtractionTest/GO/DataExtraction_test.go

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -302,60 +302,6 @@ func GenericKeyValueTest() (err error) {
302302
return nil
303303
}
304304

305-
//---------------------------------------------------------------------------------------
306-
// The following sample illustrates how to extract document classes from PDF documents.
307-
//---------------------------------------------------------------------------------------
308-
309-
func DocClassifierTest() (err error) {
310-
defer catch(&err)
311-
312-
// Test if the add-on is installed
313-
if !DataExtractionModuleIsModuleAvailable(DataExtractionModuleE_DocClassification) {
314-
fmt.Println("")
315-
fmt.Println("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
316-
fmt.Println("-----------------------------------------------------------------------------")
317-
fmt.Println("The Data Extraction suite is an optional add-on, available for download")
318-
fmt.Println("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
319-
fmt.Println("downloaded this module, ensure that the SDK is able to find the required files")
320-
fmt.Println("using the PDFNetAddResourceSearchPath() function.")
321-
fmt.Println("")
322-
return nil
323-
}
324-
325-
// Simple example: classify pages as a JSON file
326-
fmt.Println("Classify pages as a JSON file")
327-
328-
inputFile := inputPath + "Invoice.pdf"
329-
outputFile := outputPath + "Invoice_Classified.json"
330-
DataExtractionModuleExtractData(inputFile, outputFile, DataExtractionModuleE_DocClassification)
331-
332-
fmt.Println("Result saved in " + outputFile)
333-
334-
// Classify pages as a JSON string
335-
fmt.Println("Classify pages as a JSON string")
336-
337-
inputFile = inputPath + "Scientific_Publication.pdf"
338-
outputFile = outputPath + "Scientific_Publication_Classified.json"
339-
json := DataExtractionModuleExtractData(inputFile, DataExtractionModuleE_DocClassification).(string)
340-
WriteTextToFile(outputFile, json)
341-
342-
fmt.Println("Result saved in " + outputFile)
343-
344-
// Example with customized options:
345-
fmt.Println("Classify pages with customized options")
346-
347-
inputFile = inputPath + "Email.pdf"
348-
outputFile = outputPath + "Email_Classified.json"
349-
options := NewDataExtractionOptions()
350-
// Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
351-
options.SetMinimumConfidenceThreshold(0.7)
352-
DataExtractionModuleExtractData(inputFile, outputFile, DataExtractionModuleE_DocClassification, options)
353-
354-
fmt.Println("Result saved in " + outputFile)
355-
356-
return nil
357-
}
358-
359305
//---------------------------------------------------------------------------------------
360306

361307
func TestDataExtraction(t *testing.T) {
@@ -389,22 +335,13 @@ func TestDataExtraction(t *testing.T) {
389335
fmt.Println(fmt.Errorf("Unable to extract form fields data, error: %s", err))
390336
}
391337

392-
//-----------------------------------------------------------------------------------
393-
394338
err = GenericKeyValueTest()
395339
if err != nil {
396340
fmt.Println(fmt.Errorf("Unable to extract key-value pairs, error: %s", err))
397341
}
398342

399343
//-----------------------------------------------------------------------------------
400344

401-
err = DocClassifierTest()
402-
if err != nil {
403-
fmt.Println(fmt.Errorf("Unable to extract document classifications, error: %s", err))
404-
}
405-
406-
//-----------------------------------------------------------------------------------
407-
408345
PDFNetTerminate()
409346
fmt.Println("Done.")
410347
}

Samples/DataExtractionTest/PHP/DataExtractionTest.php

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ function main()
239239
// Example with customized options:
240240
// Extract Keys & Values from pages 2-4, excluding ads
241241
$options = new DataExtractionOptions();
242-
$options->SetPages("2-4");
242+
$options->setPages("2-4");
243243

244244
$p2ExclusionZones = new RectCollection();
245245
// Exclude the ad on page 2
@@ -267,58 +267,6 @@ function main()
267267
}
268268
}
269269

270-
//////////////////////////////////////////////////////////////////////////
271-
// The following sample illustrates how to extract document classes from PDF documents.
272-
//////////////////////////////////////////////////////////////////////////
273-
274-
// Test if the add-on is installed
275-
if (!DataExtractionModule::IsModuleAvailable(DataExtractionModule::e_DocClassification)) {
276-
echo(nl2br("\n"));
277-
echo(nl2br("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.\n"));
278-
echo(nl2br("-----------------------------------------------------------------------------\n"));
279-
echo(nl2br("The Data Extraction suite is an optional add-on, available for download\n"));
280-
echo(nl2br("at https://docs.apryse.com/documentation/core/info/modules/. If you have already\n"));
281-
echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
282-
echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
283-
echo(nl2br("\n"));
284-
}
285-
else {
286-
try {
287-
// Simple example: classify pages as a JSON file
288-
echo(nl2br("Classify pages as a JSON file\n"));
289-
290-
$outputFile = $outputPath."Invoice_Classified.json";
291-
DataExtractionModule::ExtractData($inputPath."Invoice.pdf", $outputFile, DataExtractionModule::e_DocClassification);
292-
293-
echo(nl2br("Result saved in " . $outputFile . "\n"));
294-
295-
///////////////////////////////////////////////////////
296-
// Classify pages as a JSON string
297-
echo(nl2br("Classify pages as a JSON string\n"));
298-
299-
$outputFile = $outputPath."Scientific_Publication_Classified.json";
300-
$json = DataExtractionModule::ExtractData($inputPath."Scientific_Publication.pdf", DataExtractionModule::e_DocClassification);
301-
WriteTextToFile($outputFile, $json);
302-
303-
echo(nl2br("Result saved in " . $outputFile . "\n"));
304-
305-
///////////////////////////////////////////////////////
306-
// Example with customized options:
307-
echo(nl2br("Classify pages with customized options\n"));
308-
309-
$options = new DataExtractionOptions();
310-
// Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
311-
$options->SetMinimumConfidenceThreshold(0.7);
312-
$outputFile = $outputPath."Email_Classified.json";
313-
DataExtractionModule::ExtractData($inputPath."Email.pdf", $outputFile, DataExtractionModule::e_DocClassification, $options);
314-
315-
echo(nl2br("Result saved in " . $outputFile . "\n"));
316-
}
317-
catch(Exception $e) {
318-
echo(nl2br("Unable to extract document structure data, error: " . $e->getMessage() . "\n"));
319-
}
320-
}
321-
322270
//-----------------------------------------------------------------------------------
323271

324272
PDFNet::Terminate();

Samples/DataExtractionTest/PYTHON/DataExtractionTest.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -252,55 +252,6 @@ def main():
252252
print("Unable to extract key-value data, error: " + str(e))
253253

254254

255-
#-----------------------------------------------------------------------------------
256-
# The following sample illustrates how to extract document classes from PDF documents.
257-
#-----------------------------------------------------------------------------------
258-
259-
# Test if the add-on is installed
260-
if not DataExtractionModule.IsModuleAvailable(DataExtractionModule.e_DocClassification):
261-
print("")
262-
print("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
263-
print("-----------------------------------------------------------------------------")
264-
print("The Data Extraction suite is an optional add-on, available for download")
265-
print("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
266-
print("downloaded this module, ensure that the SDK is able to find the required files")
267-
print("using the PDFNet.AddResourceSearchPath() function.")
268-
print("")
269-
else:
270-
try:
271-
# Simple example: classify pages as a JSON file
272-
print("Classify pages as a JSON file")
273-
274-
outputFile = outputPath + "Invoice_Classified.json"
275-
DataExtractionModule.ExtractData(inputPath + "Invoice.pdf", outputFile, DataExtractionModule.e_DocClassification)
276-
277-
print("Result saved in " + outputFile)
278-
279-
#------------------------------------------------------
280-
# Classify pages as a JSON string
281-
print("Classify pages as a JSON string")
282-
283-
outputFile = outputPath + "Scientific_Publication_Classified.json"
284-
json = DataExtractionModule.ExtractData(inputPath + "Scientific_Publication.pdf", DataExtractionModule.e_DocClassification)
285-
WriteTextToFile(outputFile, json)
286-
287-
print("Result saved in " + outputFile)
288-
289-
#------------------------------------------------------
290-
# Example with customized options:
291-
print("Classify pages with customized options")
292-
293-
options = DataExtractionOptions()
294-
# Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
295-
options.SetMinimumConfidenceThreshold(0.7)
296-
outputFile = outputPath + "Email_Classified.json"
297-
DataExtractionModule.ExtractData(inputPath + "Email.pdf", outputFile, DataExtractionModule.e_DocClassification, options)
298-
299-
print("Result saved in " + outputFile)
300-
301-
except Exception as e:
302-
print("Unable to extract document structure data, error: " + str(e))
303-
304255
PDFNet.Terminate()
305256
print("Done.")
306257

Samples/DataExtractionTest/RUBY/DataExtractionTest.rb

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -244,57 +244,6 @@ def main()
244244
end
245245
end
246246

247-
#-----------------------------------------------------------------------------------
248-
# The following sample illustrates how to extract document classes from PDF documents.
249-
#-----------------------------------------------------------------------------------
250-
251-
# Test if the add-on is installed
252-
if !DataExtractionModule.IsModuleAvailable(DataExtractionModule::E_DocClassification) then
253-
puts ""
254-
puts "Unable to run Data Extraction: PDFTron SDK Structured Output module not available."
255-
puts "-----------------------------------------------------------------------------"
256-
puts "The Data Extraction suite is an optional add-on, available for download"
257-
puts "at https://docs.apryse.com/documentation/core/info/modules/. If you have already"
258-
puts "downloaded this module, ensure that the SDK is able to find the required files"
259-
puts "using the PDFNet.AddResourceSearchPath() function."
260-
puts ""
261-
else
262-
begin
263-
# Simple example: classify pages as a JSON file
264-
puts "Classify pages as a JSON file"
265-
266-
outputFile = $outputPath + "Invoice_Classified.json"
267-
DataExtractionModule.ExtractData($inputPath + "Invoice.pdf", outputFile, DataExtractionModule::E_DocClassification)
268-
269-
puts "Result saved in " + outputFile
270-
271-
#------------------------------------------------------
272-
# Classify pages as a JSON string
273-
puts "Classify pages as a JSON string"
274-
275-
outputFile = $outputPath + "Scientific_Publication_Classified.json"
276-
json = DataExtractionModule.ExtractData($inputPath + "Scientific_Publication.pdf", DataExtractionModule::E_DocClassification)
277-
File.open(outputFile, 'w') { |file| file.write(json) }
278-
279-
puts "Result saved in " + outputFile
280-
281-
#------------------------------------------------------
282-
# Example with customized options:
283-
puts "Classify pages with customized options"
284-
285-
options = DataExtractionOptions.new()
286-
# Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
287-
options.SetMinimumConfidenceThreshold(0.7)
288-
outputFile = $outputPath + "Email_Classified.json"
289-
DataExtractionModule.ExtractData($inputPath + "Email.pdf", outputFile, DataExtractionModule::E_DocClassification, options)
290-
291-
puts "Result saved in " + outputFile
292-
293-
rescue => error
294-
puts "Unable to extract document structure data, error: " + error.message
295-
end
296-
end
297-
298247
#-----------------------------------------------------------------------------------
299248

300249
PDFNet.Terminate

0 commit comments

Comments
 (0)