forked from sibbr/tableconverter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reshape.go
123 lines (107 loc) · 2.6 KB
/
reshape.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package main
import (
"encoding/csv"
"fmt"
"io"
"strconv"
"strings"
)
// error "handling"
type reshapeError struct {
prob string
}
func (c *reshapeError) Error() string {
return fmt.Sprintf("%s", c.prob)
}
// Melt will change format to wide -> long
func Melt(input io.Reader, output io.Writer, fixed []string, sep string) error {
dados := csv.NewReader(input)
if sep == "tab" {
dados.Comma = '\t'
} else {
dados.Comma = rune(sep[0])
}
dados.FieldsPerRecord = -1
dados.LazyQuotes = true
labels, err := dados.Read()
if err != nil {
return err
}
// data cleaning, removing all leading and trailing white space
for k, v := range labels {
labels[k] = strings.TrimSpace(v)
}
for k, v := range fixed {
fixed[k] = strings.TrimSpace(v)
}
// stop if duplicate labels are found
found := map[string]int{}
anyDuplicate := []string{}
for _, v := range labels {
if _, ok := found[v]; ok {
anyDuplicate = append(anyDuplicate, v)
}
found[v]++
}
if len(anyDuplicate) > 0 {
return &reshapeError{"Error: Duplicated columns not allowed: " + strings.Join(anyDuplicate, ", ")}
}
writeMeasurementData := csv.NewWriter(output)
outputLabels := []string{"conversion_ID"}
outputLabels = append(outputLabels, fixed...)
outputLabels = append(outputLabels, "Variable_Name", "Varaible_Value")
if writeMeasurementData.Write(outputLabels) != nil {
return err
}
fixedPos := []int{}
for k, v := range labels {
if indexContains(v, &fixed) > -1 {
fixedPos = append(fixedPos, k)
}
}
if len(fixedPos) < len(fixed) {
return &reshapeError{"Selected fixed column(s) was not found in table file"}
}
// for each line do a rotation and write, no waste in memory
// number of resulting lines = (ncol - fixed) * nrow
// fixed are columns like eventid to control the rotation of data
for eventid := 1; ; eventid++ {
line, err := dados.Read()
if err == io.EOF {
break
} else if err != nil {
return err
}
for elem := 0; elem < len(line); elem++ {
if contains(labels[elem], &fixed) {
continue
}
outputLine := []string{strconv.Itoa(eventid)}
for _, v := range fixedPos {
outputLine = append(outputLine, line[v])
}
outputLine = append(outputLine, labels[elem], line[elem])
if writeMeasurementData.Write(outputLine) != nil {
return err
}
}
}
writeMeasurementData.Flush()
return nil
}
func contains(element string, elements *[]string) bool {
for _, v := range *elements {
if element == v {
return true
}
}
return false
}
func indexContains(element string, elements *[]string) int {
for k, v := range *elements {
if element == v {
return k
}
}
return -1
}