Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
.DS_Store
obj

.idea

#Python
__pycache__/
*.py[cod]
Expand All @@ -26,3 +28,20 @@ MANIFEST
.venv
.venv/
.vscode/

#Go
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ For documentation on usage see: [src-python/README.md](src-python/README.md)

For documentation on usage see: [src-js/README.md](src-js/README.md)

## Go Usage

For documentation on usage see: [src-go/README.md](src-go/README.md)

## C# Usage

### Forms
Expand Down
55 changes: 55 additions & 0 deletions src-go/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Usage

## Forms

```go
bucket := "BUCKET"
key := "KEY"
feature := "FORMS"
jobId := StartDocumentAnalysis(&bucket, &key, &feature)
if *jobId == "" {
t.Fail()
}

output := GetJobResults(jobId)
if *output.JobStatus != "SUCCEEDED" {
t.Fail()
}
```

## Tables

```go
bucket := "BUCKET"
key := "KEY"
feature := "TABLES"
jobId := StartDocumentAnalysis(&bucket, &key, &feature)
if *jobId == "" {
t.Fail()
}

output := GetJobResults(jobId)
if *output.JobStatus != "SUCCEEDED" {
t.Fail()
}
```

# Test

## Prerequisites

- [Install](https://go.dev/doc/install) GoLang
- [Install](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html)
and
[Configure](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html)
AWS CLI

Then

- Download source code to your local machine
- Populate the `test/testConfig.yaml` file with the proper configuration settings
- Run the following at a command line inside the source code folder to execute

```
go test
```
10 changes: 10 additions & 0 deletions src-go/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module trp

go 1.17

require (
github.com/aws/aws-sdk-go v1.42.16
gopkg.in/yaml.v2 v2.2.8
)

require github.com/jmespath/go-jmespath v0.4.0 // indirect
18 changes: 18 additions & 0 deletions src-go/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
github.com/aws/aws-sdk-go v1.42.16 h1:jOUmYYpC77NZYQVHTOTFT4lwFBT1u3s8ETKciU4l6gQ=
github.com/aws/aws-sdk-go v1.42.16/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
5 changes: 5 additions & 0 deletions src-go/test/testConfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
region: "us-east-2"
profile: "test-profile"
bucketToTest: "frgud-ml-tfc-bucket"
keyToTest: "textractor/3MSDS.pdf"
featuresListItem: "FORMS" #FORMS or TABLES
94 changes: 94 additions & 0 deletions src-go/textractService.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package trp

import (
"context"
"fmt"
"github.com/aws/aws-sdk-go/service/textract"
"github.com/aws/aws-sdk-go/service/textract/textractiface"
"time"
)

var svc textractiface.TextractAPI

func TextractAnalysisService(service textractiface.TextractAPI) {
svc = service
}

func GetJobResults(jobId *string) *textract.GetDocumentAnalysisOutput {
ctx, cancelFunction := context.WithTimeout(context.Background(), 500)

defer func() {
fmt.Println("Document Analysis Results Collected.")
cancelFunction()
}()

ch := make(chan bool)
go IsJobComplete(ctx, jobId, ch)

var output *textract.GetDocumentAnalysisOutput
var err error

if <-ch {
ctx.Done()
output, err = svc.GetDocumentAnalysis(&textract.GetDocumentAnalysisInput{
JobId: jobId,
})
if err != nil {
_ = fmt.Errorf("encountered an error %s", err)
return nil
}
return output
} else {
return nil
}
}

func IsJobComplete(context context.Context, jobId *string, ch chan bool) {

isComplete := false
defer func() {
fmt.Println("go subroutine IsJobComplete complete")
}()

if *jobId == "" {
context.Done()
}

for !isComplete {
fmt.Println("checking status of job " + *jobId)
response, err := svc.GetDocumentAnalysis(&textract.GetDocumentAnalysisInput{
JobId: jobId,
})
if err != nil {
_ = fmt.Errorf("encountered an error %s", err)
context.Done()
}
if *response.JobStatus == "SUCCEEDED" {
isComplete = true
} else {
time.Sleep(5000000000) // 5 seconds
}
}
ch <- true
}

func StartDocumentAnalysis(bucketName *string, key *string, featureType *string) *string{
var featureList []*string
featureList = append(featureList, featureType)
request := &textract.StartDocumentAnalysisInput{
DocumentLocation: &textract.DocumentLocation{
S3Object: &textract.S3Object{
Bucket: bucketName,
Name: key,
},
},
FeatureTypes: featureList,
}

response, err := svc.StartDocumentAnalysis(request)
if err != nil {
_ = fmt.Errorf("encountered an error %s", err)
return nil
}
return response.JobId
}
79 changes: 79 additions & 0 deletions src-go/textractService_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package trp

import (
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/textract"
"gopkg.in/yaml.v2"
"io/ioutil"
"log"
"path/filepath"
"runtime"
"testing"
)

var c testConfig
var testConfigFile = "test/testConfig.yaml"

type testConfig struct {
Region string `yaml:"region"`
Profile string `yaml:"profile"`
BucketToTest string `yaml:"bucketToTest"`
KeyToTest string `yaml:"keyToTest"`
FeaturesListItem string `yaml:"featuresListItem"`
}

func (c *testConfig) getConfig(fileName string) *testConfig {
_, b, _, _ := runtime.Caller(0)
file, err := ioutil.ReadFile(filepath.Dir(b) + "/" + fileName)
if err != nil {
log.Fatal(err)
}

fmt.Println("printing test config...")
fmt.Printf("\t%#v\n\n", string(file))
err = yaml.Unmarshal(file, c)
if err != nil {
return nil
}

return c
}

func connectToTextract(config *testConfig) {
localSession := session.Must(
session.NewSession(&aws.Config{
Credentials: credentials.NewSharedCredentials("", config.Profile),
Region: &config.Region,
}),
)
mockSvc := textract.New(localSession)
TextractAnalysisService(mockSvc)
}

func TestStartDocumentAnalysis(t *testing.T) {
config := c.getConfig(testConfigFile)
connectToTextract(config)

jobId := StartDocumentAnalysis(&config.BucketToTest, &config.KeyToTest, &config.FeaturesListItem)
if jobId == nil || *jobId == "" {
t.Fail()
}
}

func TestGetJobResultsAsync(t *testing.T) {
config := c.getConfig(testConfigFile)
connectToTextract(config)

jobId := StartDocumentAnalysis(&config.BucketToTest, &config.KeyToTest, &config.FeaturesListItem)
if jobId == nil || *jobId == "" {
t.Fail()
}

output := GetJobResults(jobId)
if *output.JobStatus != "SUCCEEDED" {
t.Fail()
}
}