diff --git a/.gitignore b/.gitignore index 30c2dd6..d8423d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .DS_Store obj +.idea + #Python __pycache__/ *.py[cod] @@ -26,3 +28,20 @@ MANIFEST .venv .venv/ .vscode/ + +#Go +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ diff --git a/README.md b/README.md index 80147b0..15c05c7 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,10 @@ For documentation on usage see: [src-python/README.md](src-python/README.md) For documentation on usage see: [src-js/README.md](src-js/README.md) +## Go Usage + +For documentation on usage see: [src-go/README.md](src-go/README.md) + ## C# Usage ### Forms diff --git a/src-go/README.md b/src-go/README.md new file mode 100644 index 0000000..2b2686c --- /dev/null +++ b/src-go/README.md @@ -0,0 +1,55 @@ +# Usage + +## Forms + +```go +bucket := "BUCKET" +key := "KEY" +feature := "FORMS" +jobId := StartDocumentAnalysis(&bucket, &key, &feature) + if *jobId == "" { + t.Fail() + } + + output := GetJobResults(jobId) + if *output.JobStatus != "SUCCEEDED" { + t.Fail() + } +``` + +## Tables + +```go +bucket := "BUCKET" +key := "KEY" +feature := "TABLES" +jobId := StartDocumentAnalysis(&bucket, &key, &feature) + if *jobId == "" { + t.Fail() + } + + output := GetJobResults(jobId) + if *output.JobStatus != "SUCCEEDED" { + t.Fail() + } +``` + +# Test + +## Prerequisites + +- [Install](https://go.dev/doc/install) GoLang +- [Install](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html) + and + [Configure](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) + AWS CLI + +Then + +- Download source code to your local machine +- Populate the `test/testConfig.yaml` file with the proper configuration settings +- Run the following at a command line inside the source code folder to execute + +``` +go test +``` \ No newline at end of file diff --git a/src-go/go.mod b/src-go/go.mod new file mode 100644 index 0000000..ebcc40d --- /dev/null +++ b/src-go/go.mod @@ -0,0 +1,10 @@ +module trp + +go 1.17 + +require ( + github.com/aws/aws-sdk-go v1.42.16 + gopkg.in/yaml.v2 v2.2.8 +) + +require github.com/jmespath/go-jmespath v0.4.0 // indirect diff --git a/src-go/go.sum b/src-go/go.sum new file mode 100644 index 0000000..d119447 --- /dev/null +++ b/src-go/go.sum @@ -0,0 +1,18 @@ +github.com/aws/aws-sdk-go v1.42.16 h1:jOUmYYpC77NZYQVHTOTFT4lwFBT1u3s8ETKciU4l6gQ= +github.com/aws/aws-sdk-go v1.42.16/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/src-go/test/testConfig.yaml b/src-go/test/testConfig.yaml new file mode 100644 index 0000000..b939258 --- /dev/null +++ b/src-go/test/testConfig.yaml @@ -0,0 +1,5 @@ +region: "us-east-2" +profile: "test-profile" +bucketToTest: "frgud-ml-tfc-bucket" +keyToTest: "textractor/3MSDS.pdf" +featuresListItem: "FORMS" #FORMS or TABLES \ No newline at end of file diff --git a/src-go/textractService.go b/src-go/textractService.go new file mode 100644 index 0000000..243bb35 --- /dev/null +++ b/src-go/textractService.go @@ -0,0 +1,94 @@ +package trp + +import ( + "context" + "fmt" + "github.com/aws/aws-sdk-go/service/textract" + "github.com/aws/aws-sdk-go/service/textract/textractiface" + "time" +) + +var svc textractiface.TextractAPI + +func TextractAnalysisService(service textractiface.TextractAPI) { + svc = service +} + +func GetJobResults(jobId *string) *textract.GetDocumentAnalysisOutput { + ctx, cancelFunction := context.WithTimeout(context.Background(), 500) + + defer func() { + fmt.Println("Document Analysis Results Collected.") + cancelFunction() + }() + + ch := make(chan bool) + go IsJobComplete(ctx, jobId, ch) + + var output *textract.GetDocumentAnalysisOutput + var err error + + if <-ch { + ctx.Done() + output, err = svc.GetDocumentAnalysis(&textract.GetDocumentAnalysisInput{ + JobId: jobId, + }) + if err != nil { + _ = fmt.Errorf("encountered an error %s", err) + return nil + } + return output + } else { + return nil + } +} + +func IsJobComplete(context context.Context, jobId *string, ch chan bool) { + + isComplete := false + defer func() { + fmt.Println("go subroutine IsJobComplete complete") + }() + + if *jobId == "" { + context.Done() + } + + for !isComplete { + fmt.Println("checking status of job " + *jobId) + response, err := svc.GetDocumentAnalysis(&textract.GetDocumentAnalysisInput{ + JobId: jobId, + }) + if err != nil { + _ = fmt.Errorf("encountered an error %s", err) + context.Done() + } + if *response.JobStatus == "SUCCEEDED" { + isComplete = true + } else { + time.Sleep(5000000000) // 5 seconds + } + } + ch <- true +} + +func StartDocumentAnalysis(bucketName *string, key *string, featureType *string) *string{ + var featureList []*string + featureList = append(featureList, featureType) + request := &textract.StartDocumentAnalysisInput{ + DocumentLocation: &textract.DocumentLocation{ + S3Object: &textract.S3Object{ + Bucket: bucketName, + Name: key, + }, + }, + FeatureTypes: featureList, + } + + response, err := svc.StartDocumentAnalysis(request) + if err != nil { + _ = fmt.Errorf("encountered an error %s", err) + return nil + } + return response.JobId +} \ No newline at end of file diff --git a/src-go/textractService_test.go b/src-go/textractService_test.go new file mode 100644 index 0000000..be21799 --- /dev/null +++ b/src-go/textractService_test.go @@ -0,0 +1,79 @@ +package trp + +import ( + "fmt" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/textract" + "gopkg.in/yaml.v2" + "io/ioutil" + "log" + "path/filepath" + "runtime" + "testing" +) + +var c testConfig +var testConfigFile = "test/testConfig.yaml" + +type testConfig struct { + Region string `yaml:"region"` + Profile string `yaml:"profile"` + BucketToTest string `yaml:"bucketToTest"` + KeyToTest string `yaml:"keyToTest"` + FeaturesListItem string `yaml:"featuresListItem"` +} + +func (c *testConfig) getConfig(fileName string) *testConfig { + _, b, _, _ := runtime.Caller(0) + file, err := ioutil.ReadFile(filepath.Dir(b) + "/" + fileName) + if err != nil { + log.Fatal(err) + } + + fmt.Println("printing test config...") + fmt.Printf("\t%#v\n\n", string(file)) + err = yaml.Unmarshal(file, c) + if err != nil { + return nil + } + + return c +} + +func connectToTextract(config *testConfig) { + localSession := session.Must( + session.NewSession(&aws.Config{ + Credentials: credentials.NewSharedCredentials("", config.Profile), + Region: &config.Region, + }), + ) + mockSvc := textract.New(localSession) + TextractAnalysisService(mockSvc) +} + +func TestStartDocumentAnalysis(t *testing.T) { + config := c.getConfig(testConfigFile) + connectToTextract(config) + + jobId := StartDocumentAnalysis(&config.BucketToTest, &config.KeyToTest, &config.FeaturesListItem) + if jobId == nil || *jobId == "" { + t.Fail() + } +} + +func TestGetJobResultsAsync(t *testing.T) { + config := c.getConfig(testConfigFile) + connectToTextract(config) + + jobId := StartDocumentAnalysis(&config.BucketToTest, &config.KeyToTest, &config.FeaturesListItem) + if jobId == nil || *jobId == "" { + t.Fail() + } + + output := GetJobResults(jobId) + if *output.JobStatus != "SUCCEEDED" { + t.Fail() + } +} \ No newline at end of file