Skip to content

Latest commit

 

History

History
70 lines (55 loc) · 2.19 KB

README.md

File metadata and controls

70 lines (55 loc) · 2.19 KB

GoDoc Build Status Report

Gopostager

HMM applied to Part-Of-Speech Tagging in Go. Implementation of Part-of-Speech Tagging with Hidden Markov Models - Graham Neubig

Installation

go get github.com/lucasmenendez/gopostagger

Tested corpus

Name Language Size Link corpus
Brown en 11.6 Mb Link
AnCora es 0.54 Mb Link

Examples

Tag sentence

    package main

    import (
        "github.com/lucasmenendez/gotokenizer"
        "github.com/lucasmenendez/gopostagger"
        "fmt"
    )

    func main() {
        var s string = "El mundo del tatuaje es la forma de representación artística más expresiva que puede existir para un artista, puesto que su obra permanece inalterable de por vida."

        if m, e := gopostagger.LoadModel("./models/es"); e != nil {
            fmt.Println(e)
        } else {
            var tagger *gopostagger.Tagger = gopostagger.NewTagger(m)
            var tokens []string = gotokenizer.Words(s)
            var tagged [][]string = tagger.Tag(tokens)

            for _, token := range tagged {
                fmt.Printf("%q ", token)
            }
        }
    }

Train corpus

IMPORTANT: All datasets must have the following format: raw_word/tag_propossed

    package main

    import (
        "github.com/lucasmenendez/gopostagger"
        "fmt"
    )

    func main() {
        if m, e := gopostagger.Train("./es"); e != nil {
            fmt.Println(e)
        } else if e = m.Store("./models/es"); e != nil {
            fmt.Println(e)
        } else {
            fmt.Println("Trained!")
        }
    }