Revert "DocClassification samples"

bcldemjen · bcldemjen · commit cdd798069fab · 2025-09-19T11:30:29.000-07:00
This reverts commit 05314b0.
diff --git a/Samples/DataExtractionTest/GO/DataExtraction_test.go b/Samples/DataExtractionTest/GO/DataExtraction_test.go
@@ -302,60 +302,6 @@ func GenericKeyValueTest() (err error) {
 	return nil
 }
 
-//---------------------------------------------------------------------------------------
-// The following sample illustrates how to extract document classes from PDF documents.
-//---------------------------------------------------------------------------------------
-
-func DocClassifierTest() (err error) {
-	defer catch(&err)
-
-	// Test if the add-on is installed
-	if !DataExtractionModuleIsModuleAvailable(DataExtractionModuleE_DocClassification) {
-		fmt.Println("")
-		fmt.Println("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
-		fmt.Println("-----------------------------------------------------------------------------")
-		fmt.Println("The Data Extraction suite is an optional add-on, available for download")
-		fmt.Println("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
-		fmt.Println("downloaded this module, ensure that the SDK is able to find the required files")
-		fmt.Println("using the PDFNetAddResourceSearchPath() function.")
-		fmt.Println("")
-		return nil
-	}
-
-	// Simple example: classify pages as a JSON file
-	fmt.Println("Classify pages as a JSON file")
-
-	inputFile := inputPath + "Invoice.pdf"
-	outputFile := outputPath + "Invoice_Classified.json"
-	DataExtractionModuleExtractData(inputFile, outputFile, DataExtractionModuleE_DocClassification)
-
-	fmt.Println("Result saved in " + outputFile)
-
-	// Classify pages as a JSON string
-	fmt.Println("Classify pages as a JSON string")
-
-	inputFile = inputPath + "Scientific_Publication.pdf"
-	outputFile = outputPath + "Scientific_Publication_Classified.json"
-	json := DataExtractionModuleExtractData(inputFile, DataExtractionModuleE_DocClassification).(string)
-	WriteTextToFile(outputFile, json)
-
-	fmt.Println("Result saved in " + outputFile)
-
-	// Example with customized options:
-	fmt.Println("Classify pages with customized options")
-
-	inputFile = inputPath + "Email.pdf"
-	outputFile = outputPath + "Email_Classified.json"
-	options := NewDataExtractionOptions()
-	// Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
-	options.SetMinimumConfidenceThreshold(0.7)
-	DataExtractionModuleExtractData(inputFile, outputFile, DataExtractionModuleE_DocClassification, options)
-
-	fmt.Println("Result saved in " + outputFile)
-
-	return nil
-}
-
 //---------------------------------------------------------------------------------------
 
 func TestDataExtraction(t *testing.T) {
@@ -389,22 +335,13 @@ func TestDataExtraction(t *testing.T) {
 		fmt.Println(fmt.Errorf("Unable to extract form fields data, error: %s", err))
 	}
 
-	//-----------------------------------------------------------------------------------
-
 	err = GenericKeyValueTest()
 	if err != nil {
 		fmt.Println(fmt.Errorf("Unable to extract key-value pairs, error: %s", err))
 	}
 
 	//-----------------------------------------------------------------------------------
 
-	err = DocClassifierTest()
-	if err != nil {
-		fmt.Println(fmt.Errorf("Unable to extract document classifications, error: %s", err))
-	}
-
-	//-----------------------------------------------------------------------------------
-
 	PDFNetTerminate()
 	fmt.Println("Done.")
 }
diff --git a/Samples/DataExtractionTest/PHP/DataExtractionTest.php b/Samples/DataExtractionTest/PHP/DataExtractionTest.php
@@ -239,7 +239,7 @@ function main()
 			// Example with customized options:
 			// Extract Keys & Values from pages 2-4, excluding ads
 			$options = new DataExtractionOptions();
-			$options->SetPages("2-4");
+			$options->setPages("2-4");
 
 			$p2ExclusionZones = new RectCollection();
 			// Exclude the ad on page 2
@@ -267,58 +267,6 @@ function main()
 		}
 	}
 
-	//////////////////////////////////////////////////////////////////////////
-	// The following sample illustrates how to extract document classes from PDF documents.
-	//////////////////////////////////////////////////////////////////////////
-
-	// Test if the add-on is installed
-	if (!DataExtractionModule::IsModuleAvailable(DataExtractionModule::e_DocClassification)) {
-		echo(nl2br("\n"));
-		echo(nl2br("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.\n"));
-		echo(nl2br("-----------------------------------------------------------------------------\n"));
-		echo(nl2br("The Data Extraction suite is an optional add-on, available for download\n"));
-		echo(nl2br("at https://docs.apryse.com/documentation/core/info/modules/. If you have already\n"));
-		echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
-		echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
-		echo(nl2br("\n"));
-	}
-	else {
-		try {
-			// Simple example: classify pages as a JSON file
-			echo(nl2br("Classify pages as a JSON file\n"));
-
-			$outputFile = $outputPath."Invoice_Classified.json";
-			DataExtractionModule::ExtractData($inputPath."Invoice.pdf", $outputFile, DataExtractionModule::e_DocClassification);
-
-			echo(nl2br("Result saved in " . $outputFile . "\n"));
-
-			///////////////////////////////////////////////////////
-			// Classify pages as a JSON string
-			echo(nl2br("Classify pages as a JSON string\n"));
-
-			$outputFile = $outputPath."Scientific_Publication_Classified.json";
-			$json = DataExtractionModule::ExtractData($inputPath."Scientific_Publication.pdf", DataExtractionModule::e_DocClassification);
-			WriteTextToFile($outputFile, $json);
-
-			echo(nl2br("Result saved in " . $outputFile . "\n"));
-
-			///////////////////////////////////////////////////////
-			// Example with customized options:
-			echo(nl2br("Classify pages with customized options\n"));
-
-			$options = new DataExtractionOptions();
-			// Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
-			$options->SetMinimumConfidenceThreshold(0.7);
-			$outputFile = $outputPath."Email_Classified.json";
-			DataExtractionModule::ExtractData($inputPath."Email.pdf", $outputFile, DataExtractionModule::e_DocClassification, $options);
-
-			echo(nl2br("Result saved in " . $outputFile . "\n"));
-		}
-		catch(Exception $e) {
-			echo(nl2br("Unable to extract document structure data, error: " . $e->getMessage() . "\n"));
-		}
-	}
-
 	//-----------------------------------------------------------------------------------
 
 	PDFNet::Terminate();
diff --git a/Samples/DataExtractionTest/PYTHON/DataExtractionTest.py b/Samples/DataExtractionTest/PYTHON/DataExtractionTest.py
@@ -252,55 +252,6 @@ def main():
                 print("Unable to extract key-value data, error: " + str(e))
 
 
-    #-----------------------------------------------------------------------------------
-    # The following sample illustrates how to extract document classes from PDF documents.
-    #-----------------------------------------------------------------------------------
-
-    # Test if the add-on is installed
-    if not DataExtractionModule.IsModuleAvailable(DataExtractionModule.e_DocClassification):
-        print("")
-        print("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
-        print("-----------------------------------------------------------------------------")
-        print("The Data Extraction suite is an optional add-on, available for download")
-        print("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
-        print("downloaded this module, ensure that the SDK is able to find the required files")
-        print("using the PDFNet.AddResourceSearchPath() function.")
-        print("")
-    else:
-        try:
-            # Simple example: classify pages as a JSON file
-            print("Classify pages as a JSON file")
-
-            outputFile = outputPath + "Invoice_Classified.json"
-            DataExtractionModule.ExtractData(inputPath + "Invoice.pdf", outputFile, DataExtractionModule.e_DocClassification)
-
-            print("Result saved in " + outputFile)
-
-            #------------------------------------------------------
-            # Classify pages as a JSON string
-            print("Classify pages as a JSON string")
-
-            outputFile = outputPath + "Scientific_Publication_Classified.json"
-            json = DataExtractionModule.ExtractData(inputPath + "Scientific_Publication.pdf", DataExtractionModule.e_DocClassification)
-            WriteTextToFile(outputFile, json)
-
-            print("Result saved in " + outputFile)
-
-            #------------------------------------------------------
-            # Example with customized options:
-            print("Classify pages with customized options")
-
-            options = DataExtractionOptions()
-            # Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
-            options.SetMinimumConfidenceThreshold(0.7)
-            outputFile = outputPath + "Email_Classified.json"
-            DataExtractionModule.ExtractData(inputPath + "Email.pdf", outputFile, DataExtractionModule.e_DocClassification, options)
-
-            print("Result saved in " + outputFile)
-
-        except Exception as e:
-            print("Unable to extract document structure data, error: " + str(e))
-
     PDFNet.Terminate()
     print("Done.")
     
diff --git a/Samples/DataExtractionTest/RUBY/DataExtractionTest.rb b/Samples/DataExtractionTest/RUBY/DataExtractionTest.rb
@@ -244,57 +244,6 @@ def main()
 		end
 	end
 
-	#-----------------------------------------------------------------------------------
-	# The following sample illustrates how to extract document classes from PDF documents.
-	#-----------------------------------------------------------------------------------
-
-	# Test if the add-on is installed
-	if !DataExtractionModule.IsModuleAvailable(DataExtractionModule::E_DocClassification) then
-		puts ""
-		puts "Unable to run Data Extraction: PDFTron SDK Structured Output module not available."
-		puts "-----------------------------------------------------------------------------"
-		puts "The Data Extraction suite is an optional add-on, available for download"
-		puts "at https://docs.apryse.com/documentation/core/info/modules/. If you have already"
-		puts "downloaded this module, ensure that the SDK is able to find the required files"
-		puts "using the PDFNet.AddResourceSearchPath() function."
-		puts ""
-	else
-		begin
-			# Simple example: classify pages as a JSON file
-			puts "Classify pages as a JSON file"
-	
-			outputFile = $outputPath + "Invoice_Classified.json"
-			DataExtractionModule.ExtractData($inputPath + "Invoice.pdf", outputFile, DataExtractionModule::E_DocClassification)
-
-			puts "Result saved in " + outputFile
-
-			#------------------------------------------------------
-			# Classify pages as a JSON string
-			puts "Classify pages as a JSON string"
-	
-			outputFile = $outputPath + "Scientific_Publication_Classified.json"
-			json = DataExtractionModule.ExtractData($inputPath + "Scientific_Publication.pdf", DataExtractionModule::E_DocClassification)
-			File.open(outputFile, 'w') { |file| file.write(json) }
-	
-			puts "Result saved in " + outputFile
-
-			#------------------------------------------------------
-			# Example with customized options:
-			puts "Classify pages with customized options"
-	
-			options = DataExtractionOptions.new()
-			# Classes that don't meet the minimum confidence threshold of 70% will not be listed in the output JSON
-			options.SetMinimumConfidenceThreshold(0.7)
-			outputFile = $outputPath + "Email_Classified.json"
-			DataExtractionModule.ExtractData($inputPath + "Email.pdf", outputFile, DataExtractionModule::E_DocClassification, options)
-
-			puts "Result saved in " + outputFile
-			
-		rescue => error
-			puts "Unable to extract document structure data, error: " + error.message
-		end
-	end
-
 	#-----------------------------------------------------------------------------------
 
 	PDFNet.Terminate