This repository has been archived by the owner on Dec 17, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
read.go
252 lines (183 loc) · 5.57 KB
/
read.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
package main
import (
"encoding/binary"
"io"
"log"
"os"
"runtime"
"time"
wp "github.com/gammazero/workerpool"
"github.com/itzmeanjan/pproto/pb"
"google.golang.org/protobuf/proto"
)
// SequentialReadFromFile - Given path to protocol buffer encoded data file
// attempting to read deserialised content of file in-memory, sequentially
// but in buffer fashion, so that memory footprint stays low
func SequentialReadFromFile(file string) (bool, int) {
// Opening file in read only mode
fd, err := os.OpenFile(file, os.O_RDONLY, 0644)
if err != nil {
log.Printf("[!] Error : %s\n", err.Error())
return false, 0
}
// file handle to be closed when whole file is read i.e.
// EOF reached
defer fd.Close()
// count of entries read back from file
var count int
for {
buf := make([]byte, 4)
// reading size of next protocol buffer encoded
// data chunk
if _, err := fd.Read(buf); err != nil {
// reached EOF, good to get out of loop
if err == io.EOF {
break
}
log.Printf("[!] Error : %s\n", err.Error())
return false, count
}
// converting size of next data chunk to `uint`
// so that memory allocation can be performed
// for next read
size := binary.LittleEndian.Uint32(buf)
// allocated buffer where to read next protocol buffer
// serialized data chunk
data := make([]byte, size)
if _, err = fd.Read(data); err != nil {
log.Printf("[!] Error : %s\n", err.Error())
return false, count
}
// Synthetic delay to emulate interaction with DB
time.Sleep(time.Duration(1) * time.Microsecond)
// attempting to deserialize protocol buffer encoded
// data into something meaningful
cpu := &pb.CPU{}
if err := proto.Unmarshal(data, cpu); err != nil {
log.Printf("[!] Error : %s\n", err.Error())
return false, count
}
count++
}
return true, count
}
// ConcurrentReadFromFile - Reading content from file and letting
// workers process those content concurrently
func ConcurrentReadFromFile(file string) (bool, int) {
// Opening file in read only mode
fd, err := os.OpenFile(file, os.O_RDONLY, 0644)
if err != nil {
log.Printf("[!] Error : %s\n", err.Error())
return false, 0
}
// file handle to be closed when whole file is read i.e.
// EOF reached
defer fd.Close()
// count of entries read back from file
var count uint64
pool := wp.New(runtime.NumCPU())
control := make(chan bool, 1000)
entryCount := make(chan uint64)
done := make(chan bool)
go UnmarshalCoordinator(control, entryCount, done)
for {
buf := make([]byte, 4)
// reading size of next protocol buffer encoded
// data chunk
if _, err := fd.Read(buf); err != nil {
// reached EOF, good to get out of loop
if err == io.EOF {
break
}
log.Printf("[!] Error : %s\n", err.Error())
break
}
// converting size of next data chunk to `uint`
// so that memory allocation can be performed
// for next read
size := binary.LittleEndian.Uint32(buf)
// allocated buffer where to read next protocol buffer
// serialized data chunk
data := make([]byte, size)
if _, err = fd.Read(data); err != nil {
log.Printf("[!] Error : %s\n", err.Error())
break
}
count++
// Submitting job to worker pool
pool.Submit(func() {
func(_data []byte) {
UnmarshalData(_data, control)
}(data)
})
}
// letting coordinator know that `count` many workers
// should let it know about their respective status of job
entryCount <- count
// waiting for coordinator to let us know
// that all workers have completed their job
<-done
// no more jobs to be submitted to pool
// but all existing one to be completed
//
// this call is redundant here, but still made
pool.StopWait()
return true, int(count)
}
// UnmarshalData - Given byte array read from file, attempting
// to unmarshall it into structured data, with synthetic delay
//
// Also letting coordinator go routine know that this worker
// has completed its job
func UnmarshalData(data []byte, control chan bool) {
// synthetic delay to emulate database interaction
time.Sleep(time.Duration(1) * time.Microsecond)
cpu := &pb.CPU{}
if err := proto.Unmarshal(data, cpu); err != nil {
log.Printf("[!] Error : %s\n", err.Error())
control <- false
return
}
control <- true
}
// UnmarshalCoordinator - Given a lot of unmarshal workers to be
// created for processing i.e. deserialize & put into DB, more entries
// in smaller amount of time, they need to be synchronized properly
//
// That's all this go routine does
func UnmarshalCoordinator(control <-chan bool, count <-chan uint64, done chan bool) {
// letting main go routine know reading & processing all entries
// done successfully, while getting out of this execution context
defer func() {
done <- true
}()
// received processing done count from worker go routines
var success uint64
// how many workers should actually let this go routine know
// their status i.e. how long is this go routine supposed to
// wait for all of them to finish
var expected uint64
for {
select {
case c := <-control:
if !c {
log.Fatalf("[!] Error received by unmarshal coordinator\n")
}
// some worker just let us know it completed
// its job successfully
success++
// If this satisfies, it's time to exit from loop
// i.e. all workers have completed their job
if success == expected {
return
}
// Once reading whole file is done, main go routine
// knows how many entries are expected, which is to be
// matched against how many of them actually completed their job
//
// Exiting from this loop, that logic is written 👆
case v := <-count:
expected = v
}
}
}