-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetBins.py
59 lines (46 loc) · 1.3 KB
/
getBins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding: utf-8 -*-
import Utils
import pandas as pd
from numpy import log2, floor
def getNamesOfBins(qnt: int):
letters = 'abcdefghijklmnopqrstuvwxyz'
labels = []
i = 0
currentLen = 0
letLen = len(letters)
while len(labels) < qnt:
labels.append(letters[i] + chr((currentLen // letLen) + 97))
currentLen += 1
i += 1
return labels
def getNumberOfBins(qnt: int):
return 1 + floor(log2(qnt))
def transformToRows(columns):
rows = []
# i - строка
for i in range(0, len(columns[0])):
rowArray = []
# j - колонка
for j in range(0, len(columns)):
rowArray.append(columns[j][i])
rows.append(rowArray)
return rows
def prepareData(columns):
newD = []
# Категоризация колонок
for col in columns:
k = getNumberOfBins(len(col._data))
newD.append(
list(
pd.cut(col._data,
bins=k,
labels=getNamesOfBins(k))
)
)
# pprint(newD[-1])
# Переход к транзакциям
return transformToRows(newD)
if __name__ == '__main__':
param = Utils.readExcelData('data/CarData1Lab.xlsx')
# print(len(param[0]))
# print(prepareData(param))