This repository has been archived by the owner on Sep 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
problem.go
184 lines (146 loc) · 4.77 KB
/
problem.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2015 The golinear Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file.
package golinear
/*
#cgo CFLAGS: -Wall -Wconversion -O3
#cgo LDFLAGS: -llinear -lstdc++ -lm
#include <stddef.h>
#include "wrap.h"
*/
import "C"
import (
"fmt"
"runtime"
"sort"
)
// FeatureValue represents a feature and its value. The Index of a feature
// is used to uniquely identify the feature, and should start at 1.
type FeatureValue struct {
Index int
Value float64
}
// A FeatureVector is a sparse feature vector, represented as a
// slice of non-zero features.
type FeatureVector []FeatureValue
type byIndex struct{ FeatureVector }
// A TrainingInstance consists of the label of the instance and
// its feature vector. In classification, the label is an integer
// indicating the class label. In regression, the label is the
// target value, which can be any real number. The label is not used
// for one-class SVMs.
type TrainingInstance struct {
Label float64
Features FeatureVector
}
// A Problem is a set of instances and corresponding labels.
type Problem struct {
problem *C.problem_t
insts []*C.feature_node_t
}
// NewProblem constructs a new problem instance. Problems are used
// to store training instances.
func NewProblem() *Problem {
cProblem := newProblem()
problem := &Problem{cProblem, nil}
runtime.SetFinalizer(problem, finalizeProblem)
return problem
}
func finalizeProblem(p *Problem) {
for _, nodes := range p.insts {
C.nodes_free(nodes)
}
p.insts = nil
C.problem_free(p.problem)
}
// FromDenseVector convert sa dense feature vector, represented as a slice
// of feature values to the sparse representation used by this package. The
// features will be numbered 1..len(denseVector). The following vectors
// will be equal:
//
// gosvm.FromDenseVector([]float64{0.2, 0.1, 0.3, 0.6})
// gosvm.FeatureVector{{1, 0.2}, {2, 0.1}, {3, 0.3}, {4, 0.6}}
func FromDenseVector(denseVector []float64) FeatureVector {
fv := make(FeatureVector, len(denseVector))
for idx, val := range denseVector {
fv[idx] = FeatureValue{idx + 1, val}
}
return fv
}
func cNodes(nodes []FeatureValue) *C.feature_node_t {
n := newNodes(C.size_t(len(nodes)))
for idx, val := range nodes {
C.nodes_put(n, C.size_t(idx), C.int(val.Index), C.double(val.Value))
}
return n
}
// Add adds a training instance to the problem.
func (problem *Problem) Add(trainInst TrainingInstance) error {
if err := verifyFeatureIndices(trainInst.Features); err != nil {
return err
}
features := sortedFeatureVector(trainInst.Features)
nodes := newNodes(C.size_t(len(features)))
problem.insts = append(problem.insts, nodes)
for idx, val := range features {
C.nodes_put(nodes, C.size_t(idx), C.int(val.Index), C.double(val.Value))
}
C.problem_add_train_inst(problem.problem, nodes, C.double(trainInst.Label))
return nil
}
// Bias return the bias term.
func (problem *Problem) Bias() float64 {
return float64(C.problem_bias(problem.problem))
}
// SetBias sets the bias term. Setting this value to non-zero amounts to
// adding an extra feature to each instance with the bias as its value.
func (problem *Problem) SetBias(bias float64) {
C.set_problem_bias(problem.problem, C.double(bias))
}
// ProblemIterFunc is the function prototype for iteration over problems.
// The function should return 'true' if the iteration should continue or
// 'false' otherwise.
type ProblemIterFunc func(instance *TrainingInstance) bool
// Iterate over the training instances in a problem.
func (problem *Problem) Iterate(fun ProblemIterFunc) {
for i := 0; i < int(problem.problem.l); i++ {
label := float64(C.get_double_idx(problem.problem.y, C.int(i)))
cNodes := C.nodes_vector_get(problem.problem, C.size_t(i))
fVals := make(FeatureVector, 0)
var j C.size_t
for j = 0; C.nodes_get(cNodes, j).index != -1; j++ {
cNode := C.nodes_get(cNodes, j)
fVals = append(fVals, FeatureValue{int(cNode.index), float64(cNode.value)})
}
if !fun(&TrainingInstance{label, fVals}) {
break
}
}
}
// Helper functions
func sortedFeatureVector(fv FeatureVector) FeatureVector {
sorted := make(FeatureVector, len(fv))
copy(sorted, fv)
sort.Sort(byIndex{sorted})
return sorted
}
func verifyFeatureIndices(featureVector FeatureVector) error {
for _, fv := range featureVector {
if fv.Index < 1 {
return fmt.Errorf("Feature index should be at least one: %d:%f",
fv.Index, fv.Value)
}
}
return nil
}
// Interface for sorting of feature vectors by feature index.
func (fv byIndex) Len() int {
return len(fv.FeatureVector)
}
func (fv byIndex) Swap(i, j int) {
fv.FeatureVector[i], fv.FeatureVector[j] =
fv.FeatureVector[j], fv.FeatureVector[i]
}
func (fv byIndex) Less(i, j int) bool {
return fv.FeatureVector[i].Index < fv.FeatureVector[j].Index
}