This repository has been archived by the owner on Oct 8, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloccounter.go
275 lines (253 loc) · 9.58 KB
/
loccounter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
// Copyright 2018 Christos Katsakioris
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package glocc
import (
"bufio"
"fmt"
"os"
"strings"
"unicode/utf8"
)
// These states don't need to exist per LocCounter, as they don't carry any
// LocCounter-specific data.
var (
globalStateInitial = &stateInitial{}
globalStateCode = &stateCode{}
)
// LocCounter is the core entity of the package, which initiates and later
// holds the state of the counting for a single file.
// It is associated to the counting of a single file, and created in the
// goroutine that is assigned to count the file.
type LocCounter struct {
language language
loc int
file *os.File
currLine string
currLineCounted bool
fileLinesCnt int
state loccState
stateMultiLineComment *stateMultiLineComment
}
// NewLocCounter returns a new LocCounter, properly initialized to count the
// lines of code in a specific file of a specific language.
// Returns an error if a supported language cannot be detected.
func NewLocCounter(file *os.File, ext string) (lc *LocCounter, err error) {
if lang, valid := languages[ext]; !valid {
err = fmt.Errorf("Cannot deduce a supported language from extension %q.", ext)
} else {
lc = &LocCounter{
language: lang,
file: file,
state: globalStateInitial,
stateMultiLineComment: &stateMultiLineComment{},
}
}
return
}
// Count is the only exported method of LocCounter. It basically reads (line by
// line) the content of the file associated with the LocCounter, and performs
// the counting. It is implemented using the State design pattern.
func (lc *LocCounter) Count() (int, error) {
logger.Printf("DEBUG LocCounter.Count() for file %q: Starting...\n", lc.file.Name())
fsc := bufio.NewScanner(lc.file)
for fsc.Scan() {
lc.fileLinesCnt++
lc.currLine = fsc.Text()
lc.currLine = strings.TrimLeft(lc.currLine, " \t") // trim leading whitespace
lc.currLineCounted = false
for !lc.state.process(lc) {
}
if lc.currLineCounted {
logger.Printf("DEBUG %q:%d --> Counted\n", lc.file.Name(), lc.fileLinesCnt)
lc.loc++
} else {
logger.Printf("DEBUG %q:%d --> Discarded\n", lc.file.Name(), lc.fileLinesCnt)
}
}
if err := fsc.Err(); err != nil {
logger.Println("ERROR", err)
return lc.loc, err
}
logger.Printf("DEBUG LocCounter.Count() for file %q: Finished.\n", lc.file.Name())
return lc.loc, nil
}
// Change the state of the LocCounter.
func (lc *LocCounter) setState(state loccState) {
lc.state = state
}
// Returns true if current line is empty; false otherwise.
func (lc *LocCounter) lineIsEmpty() bool {
if len(lc.currLine) == 0 {
return true
}
return false
}
// Returns the index of the first inline comment token that was found in
// current line, or the length of current line if none was found.
func (lc *LocCounter) inlineCommentIndex() int {
firstInlineCommTokenIdx := len(lc.currLine)
for _, t := range lc.language.inlineCommentTokens {
ilcIdx := strings.Index(lc.currLine, t)
if ilcIdx != -1 && ilcIdx < firstInlineCommTokenIdx {
firstInlineCommTokenIdx = ilcIdx
}
}
if firstInlineCommTokenIdx < len(lc.currLine) {
logger.Printf("DEBUG Inline comment token found at %q:%d\n", lc.file.Name(), lc.fileLinesCnt)
}
return firstInlineCommTokenIdx
}
// The current state of a LocCounter. It may change from zero to multiple times
// while processing the same single line.
// Part of the State design pattern implementation.
type loccState interface {
// The bool returned shows whether we're done processing currLine, so
// as to break from the loop that LoccState.process() was called in.
process(*LocCounter) bool
}
// The initial state in which every LocCounter starts in.
type stateInitial struct{}
// Line processing method for state stateInitial.
func (s *stateInitial) process(lc *LocCounter) bool {
firstInlineCommTokenIdx := lc.inlineCommentIndex()
if lc.lineIsEmpty() || firstInlineCommTokenIdx == 0 {
return true
}
// On the first non-empty and non-inline-commented-out line, the state is changing.
// Find the first occurrence of a multi-line comment starting token, if any.
firstMultiLineCommTokenIdx, firstMultiLineCommToken := len(lc.currLine), ""
for _, t := range lc.language.multiLineCommentStartingTokens {
mlcIdx := strings.Index(lc.currLine, t)
if mlcIdx != -1 && mlcIdx < firstMultiLineCommTokenIdx {
firstMultiLineCommTokenIdx = mlcIdx
firstMultiLineCommToken = t
}
}
// If a multi-line comment starting token was found before the first inline comment token
if firstMultiLineCommTokenIdx < firstInlineCommTokenIdx {
logger.Printf("DEBUG Multi-line comment starting at %q:%d\n", lc.file.Name(), lc.fileLinesCnt)
// If it wasn't in the beginning of the line
if firstMultiLineCommTokenIdx > 0 {
lc.currLineCounted = true
}
// Immediately continue processing the rest of the line in stateMultiLineComment,
// as the state may change again within the same line.
lc.currLine = strings.TrimLeft(lc.currLine[(firstMultiLineCommTokenIdx+len(firstMultiLineCommToken)):], " \t")
lc.stateMultiLineComment.setToken(firstMultiLineCommToken)
lc.setState(lc.stateMultiLineComment)
} else {
// If no multi-line comment starting token was found before the first inline comment token
lc.setState(globalStateCode)
}
// State has to change from stateInitial in any case.
return false
}
// The state of the LocCounter currently processing multi-line commented code.
type stateMultiLineComment struct {
// Needed for Python (or any other language that I may not know of,
// similar to Python in) that they need to nest e.g. occurrences of
// `'''` in a `"""` multi-line comment, and of `"""` in a `'''`
// multi-line comment.
token string
}
// Line processing method for state stateMultiLineComment.
func (s *stateMultiLineComment) process(lc *LocCounter) bool {
// Based on the observation that all supported languages actually use the
// same token for closing block comments as for opening, only reversed.
// Exceptions (handled) to this (for now): Ruby, and Java, PHP for docstrings.
tokens := []string{} // the tokens which change the state
reversedToken := reversed(lc.stateMultiLineComment.token)
reversedTokenIsValid := false
for _, t := range lc.language.multiLineCommentEndingTokens {
if t == reversedToken {
reversedTokenIsValid = true
break
}
}
if reversedTokenIsValid {
tokens = append(tokens, reversedToken)
} else {
tokens = append(tokens, lc.language.multiLineCommentEndingTokens...)
}
// Find the first occurrence of a multi-line comment ending token, if any
firstMultiLineCommTokenIdx, firstMultiLineCommToken := len(lc.currLine), ""
for _, t := range tokens {
mlcIdx := strings.Index(lc.currLine, t)
if mlcIdx != -1 && mlcIdx < firstMultiLineCommTokenIdx {
firstMultiLineCommTokenIdx = mlcIdx
firstMultiLineCommToken = t
}
}
// If a multi-line comment ending token was found
if firstMultiLineCommTokenIdx < len(lc.currLine) {
logger.Printf("DEBUG Multi-line comment ending at %q:%d\n", lc.file.Name(), lc.fileLinesCnt)
s.token = ""
lc.currLine = strings.TrimLeft(lc.currLine[(firstMultiLineCommTokenIdx+len(firstMultiLineCommToken)):], " \t")
lc.setState(globalStateCode)
return false
}
// If no multi-line comment ending token was found
return true
}
// Change the saved token in stateMultiLineComment, and return the state struct
// itself.
func (s *stateMultiLineComment) setToken(token string) {
s.token = token
}
// The state of the LocCounter currently processing code that needs to be
// counted in.
type stateCode struct{}
// Line processing method for state stateCode.
func (s *stateCode) process(lc *LocCounter) bool {
firstInlineCommTokenIdx := lc.inlineCommentIndex()
if lc.lineIsEmpty() || firstInlineCommTokenIdx == 0 {
return true
}
// Find the first occurrence of a multi-line comment starting token, if any.
firstMultiLineCommTokenIdx, firstMultiLineCommToken := len(lc.currLine), ""
for _, t := range lc.language.multiLineCommentStartingTokens {
mlcIdx := strings.Index(lc.currLine, t)
if mlcIdx != -1 && mlcIdx < firstMultiLineCommTokenIdx {
firstMultiLineCommTokenIdx = mlcIdx
firstMultiLineCommToken = t
}
}
// If a multi-line comment starting token was found before the first occurrence of an inline comment token
if firstMultiLineCommTokenIdx < firstInlineCommTokenIdx {
logger.Printf("DEBUG Multi-line comment start found at %q:%d\n", lc.file.Name(), lc.fileLinesCnt)
// If it wasn't in the beginning of the line
if firstMultiLineCommTokenIdx > 0 {
lc.currLineCounted = true
}
// Immediately continue processing the rest of the line in stateMultiLineComment,
// as the state may change again within the same line.
lc.currLine = strings.TrimLeft(lc.currLine[(firstMultiLineCommTokenIdx+len(firstMultiLineCommToken)):], " \t")
lc.stateMultiLineComment.setToken(firstMultiLineCommToken)
lc.setState(lc.stateMultiLineComment)
return false
}
lc.currLineCounted = true
return true
}
// Returns the input string reversed.
func reversed(s string) string {
size := len(s)
buf := make([]byte, size)
for i := 0; i < size; {
r, n := utf8.DecodeRuneInString(s[i:])
i += n
utf8.EncodeRune(buf[(size-i):], r)
}
return string(buf)
}