-
Notifications
You must be signed in to change notification settings - Fork 0
/
rocketDataFrame.go
158 lines (144 loc) · 3.69 KB
/
rocketDataFrame.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Package rocketc is fast, simple and lightweight library for CSV data manipulation and mathematical computation involving 2D Matrices.
package rocketc
import (
"fmt"
"strconv"
)
// DataFrame : Basic data container, stores data in form of 2D slices of string.
type DataFrame [][]string
// Rows : Returns number of rows in DataFrame.
func (d DataFrame) Rows() int {
return len(d)
}
// Cols : Returns number of columns in DataFrame, DataFrame must be uniform for accurate result.
func (d DataFrame) Cols() int {
if len(d) != 0 {
return len(d[0])
}
return 0
}
// Shape : Returns shape of DataFrame, slice of length 2.
func (d DataFrame) Shape() []int {
var size = make([]int, 2, 2)
size[0] = d.Rows()
size[1] = d.Cols()
return size
}
// Headers : Returns header of the dataframe i.e row 0.
func (d DataFrame) Headers() []string {
return d[0]
}
// Head : Returns first n rows of DataFrame including headers.
func (d DataFrame) Head(n int) DataFrame {
if n <= len(d) {
return d[0:n]
}
return d[0:]
}
// SetHeaders : Set custom column names to a DataFrame.
// Takes a slice of string containing name of columns.
func (d *DataFrame) SetHeaders(header []string) {
newDataFrame := Allocate(d.Rows()+1, len(header))
newDataFrame[0] = header
r := newDataFrame.Rows()
for i := 1; i < r; i++ {
newDataFrame[i] = (*d)[i-1]
}
*d = newDataFrame
}
// Allocate : Allocate a blank DataFrame of given size.
func Allocate(row, col int) DataFrame {
var d = make(DataFrame, row)
for i := 0; i < row; i++ {
d[i] = make([]string, col)
}
return d
}
// WipeDown : Returns unifom DataFrame by only including rows of length l
// in returned DataFrame. Takes a DataFrame and a integer as arguments.
func WipeDown(m DataFrame, l int) DataFrame {
var r DataFrame
n := m.Rows()
for i := 0; i < n; i++ {
value := m[i]
if len(value) == l {
r = append(r, value)
}
}
return r
}
// DropColumn : Drops columns from a DataFrame, takes a DataFrame and variable number of arguments
// which are indexes of columns to be droped.
func DropColumn(d DataFrame, i ...int) DataFrame {
f := func(arr []int) int {
var max = arr[0]
for _, value := range arr {
if value > max {
max = value
}
}
return max
}
var result = make(DataFrame, len(d))
var arr = make([]int, f(i)+1)
for _, value := range i {
arr[value]++
}
for j := 0; j < len(d[0]); j++ {
for i := 0; i < len(d); i++ {
if arr[j] > 0 {
break
}
result[i] = append(result[i], d[i][j])
}
}
return result
}
// ConvMatrix : Converts numerical DataFrame into Matrix, returns err if
// dataframe contains values that cannot be converted into a float64.
func ConvMatrix(d DataFrame) (Matrix, error) {
var m = Zeros(d.Rows(), d.Cols())
var r = d.Rows()
var c = d.Cols()
for i := 0; i < r; i++ {
for j := 0; j < c; j++ {
temp, err := strconv.ParseFloat(d[i][j], 64)
if err != nil {
return nil, err
}
m[i][j] = float32(temp)
}
}
return m, nil
}
// PrintDataframe : for pretty printing of DataFrame.
func PrintDataframe(d ...DataFrame) {
lambda := func(d DataFrame) {
for row := range d {
fmt.Printf("%3d |", row)
for col := range d[row] {
if col < len(d[row])-1 {
fmt.Printf("%-15s, ", d[row][col])
} else {
fmt.Printf("%-15s \n", d[row][col])
}
}
}
}
for _, value := range d {
lambda(value)
fmt.Println()
}
}
// GetColumnsDataFrame : Returns a DataFrame by only including specific columns
// whose column indexs are passed as argument. Take a DataFrame and variadic number
// integers which are column indexes.
func GetColumnsDataFrame(d DataFrame, i ...int) DataFrame {
var c = Allocate(d.Rows(), len(i))
for row, value := range d {
for index, v := range i {
c[row][index] = value[v]
}
}
return c
}