Skip to content

Commit f443468

Browse files
author
thc
committed
v0.1 working draft
0 parents  commit f443468

File tree

7 files changed

+196
-0
lines changed

7 files changed

+196
-0
lines changed

README.md

Whitespace-only changes.

go.mod

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
module projects/subayes
2+
3+
go 1.20
4+
5+
require github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97 h1:6CBjPos6l0GnoMCMiQPbhPRhHXFaa7RFAGwpRwveVlI=
2+
github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97/go.mod h1:P1c1lcW3JeYIRbVw98K6qNHJq/3hX4ru5SCQc84ZbZo=

main.go

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package main
2+
3+
import (
4+
"bufio"
5+
"errors"
6+
"flag"
7+
"fmt"
8+
"os"
9+
"regexp"
10+
11+
"github.com/navossoc/bayesian"
12+
)
13+
14+
var (
15+
db, data string
16+
learnSpam, learnHam bool
17+
Spam bayesian.Class = "Spam"
18+
Ham bayesian.Class = "Ham"
19+
rxp = regexp.MustCompile(" |'|,|\t|\n")
20+
)
21+
22+
func main() {
23+
24+
// db is path for storing data classes
25+
flag.StringVar(&db, "db", "db", " db path")
26+
// data is the file to be read
27+
flag.StringVar(&data, "d", "data", "data filename")
28+
// choosing between learning Spam or Han (write db/classes files)
29+
flag.BoolVar(&learnSpam, "learnSpam", false, "Learn Spam subjects")
30+
flag.BoolVar(&learnHam, "learnHam", false, "Learn Ham subjects")
31+
// Default is to read stdin line per line for classification
32+
flag.Parse()
33+
34+
K := bayesian.NewClassifier(Ham, Spam)
35+
36+
switch {
37+
38+
case learnHam && learnSpam:
39+
errcheck(errors.New("Please choose learn Ham or Spam, not Both !"))
40+
41+
case learnHam:
42+
errcheck(learn(K, db, data, Ham))
43+
showClassesCount(K)
44+
45+
case learnSpam:
46+
errcheck(learn(K, db, data, Spam))
47+
showClassesCount(K)
48+
49+
case !learnHam && !learnSpam:
50+
errcheck(K.ReadClassFromFile(Spam, db))
51+
errcheck(K.ReadClassFromFile(Ham, db))
52+
showClassesCount(K)
53+
54+
scanner := bufio.NewScanner(os.Stdin)
55+
56+
for scanner.Scan() { // read line per line
57+
text := scanner.Text()
58+
59+
if len(text) > 3 { // Minimum 'Re:'
60+
61+
spl := rxp.Split(text, -1)
62+
63+
fmt.Printf("%v: %s\n",
64+
classify(K, spl),
65+
text)
66+
}
67+
}
68+
if err := scanner.Err(); err != nil {
69+
errcheck(err)
70+
}
71+
}
72+
}
73+
74+
func learn(c *bayesian.Classifier, xdb string, input string, class bayesian.Class) (err error) {
75+
76+
err = c.ReadClassFromFile(class, xdb)
77+
// if db/class don't exist, we will create it, so any err is acceptable
78+
// errcheck(err)
79+
80+
in, err := os.ReadFile(input) // in type is []byte
81+
errcheck(err)
82+
83+
ins := string(in) // ins type is string
84+
85+
indata := rxp.Split(ins, -1) // indata is []string
86+
c.Learn(indata, class)
87+
88+
err = c.WriteClassToFile(class, xdb)
89+
errcheck(err)
90+
91+
return nil
92+
}
93+
func classify(c *bayesian.Classifier, pattern []string) bayesian.Class {
94+
// ProbScores return scores ([]float64), indexofclass, strict(?)
95+
_, likelyb, _ := c.ProbScores(pattern)
96+
return c.Classes[likelyb]
97+
}
98+
99+
func showClassesCount(c *bayesian.Classifier) {
100+
fmt.Printf("INFO classifier corpus : ")
101+
for i := 0; i < len(c.Classes); i++ {
102+
fmt.Printf(" [ %v -> %d items ]",
103+
c.Classes[i],
104+
c.WordCount()[i])
105+
106+
}
107+
fmt.Println()
108+
}
109+
110+
func errcheck(e error) {
111+
if e != nil {
112+
fmt.Println(e)
113+
os.Exit(-1)
114+
}
115+
}

test/go.mod

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
module projects/subayes/test
2+
3+
go 1.20
4+
5+
require github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97

test/go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97 h1:6CBjPos6l0GnoMCMiQPbhPRhHXFaa7RFAGwpRwveVlI=
2+
github.com/navossoc/bayesian v0.0.0-20230423142728-ab66f8feaf97/go.mod h1:P1c1lcW3JeYIRbVw98K6qNHJq/3hX4ru5SCQc84ZbZo=

test/main.go

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/navossoc/bayesian"
7+
)
8+
9+
const (
10+
Good bayesian.Class = "Good"
11+
Bad bayesian.Class = "Bad"
12+
// Grey bayesian.Class = "Grey"
13+
)
14+
15+
func main() {
16+
17+
classifier := bayesian.NewClassifier(Good, Bad)
18+
19+
goodStuff := []string{"tall", "rich", "handsome", "tennis", "chicken", "theatre", "reading", "books", "money", "suits", "golf", "eating", "wine"}
20+
badStuff := []string{"poor", "smelly", "ugly", "playing", "games", "burgers", "tv", "magazines", "debts", "jogging", "foot", "drinking", "beer", "Zdob<C4><85>d<C5><BA>pobli\xc5\xbcu"}
21+
greyStuff := []string{"man", "girl", "young", "citizen"}
22+
23+
classifier.Learn(goodStuff, Good)
24+
classifier.Learn(badStuff, Bad)
25+
classifier.Learn(greyStuff, Bad)
26+
27+
// classifier.Learn(greyStuff, Grey)
28+
29+
//classifier.WriteToFile("data")
30+
31+
fmt.Printf("classifier learned :\n")
32+
for i := 0; i < len(classifier.Classes); i++ {
33+
fmt.Printf(" %d words for classes %v \n",
34+
classifier.WordCount()[i],
35+
classifier.Classes[i])
36+
}
37+
fmt.Println()
38+
39+
tests := [][]string{
40+
{"tall", "girl", "watching", "tv", "eating", "chicken", "wearing", "jogging"},
41+
{"small", "handsome", "man", "playing", "games", "before", "tv"},
42+
{"unknows", "words"},
43+
}
44+
45+
for _, pattern := range tests {
46+
47+
fmt.Printf("test : %v\n", pattern)
48+
49+
probs, likelyb, _ := classifier.ProbScores(pattern)
50+
fmt.Printf("probs ")
51+
for i := 0; i < len(classifier.Classes); i++ {
52+
fmt.Printf("%s(%.2f) ",
53+
classifier.Classes[i], probs[i])
54+
}
55+
fmt.Printf("=> Class : %s\n\n",
56+
classifier.Classes[likelyb])
57+
}
58+
}
59+
60+
// func round(num float64) int {
61+
// return int(num + math.Copysign(0.5, num))
62+
// }
63+
64+
// func toFixed(num float64, precision int) float64 {
65+
// output := math.Pow(10, float64(precision))
66+
// return float64(round(num*output)) / output
67+
// }

0 commit comments

Comments
 (0)