-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpydriller_commit.py
103 lines (79 loc) · 3.63 KB
/
pydriller_commit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from pydriller import Repository
import json
import numpy as np
### csv - rows = no.of developers, columns- months/yr.
### link to apache mailing list: http://mail-archives.apache.org/mod_mbox/
### for each commit add modified files to map with filename as keys.
file_to_users = {} ## for each file how many users commited
file_to_id = {} ##
i=0
url = "E:\\MS\\Qtr1\\ecs 260 se\\project\\tvm\\.git"
for commit in Repository(url).traverse_commits():
print(
'The commit has been modified by {} and files changed are {}\n'.format(
commit.author.email,
[x.filename for x in commit.modified_files]
)
)
for x in commit.modified_files:
if x.filename not in file_to_users:
file_to_users[x.filename] = set()
file_to_users[x.filename].add(str(commit.author.email))
if x.filename not in file_to_id:
file_to_id[x.filename] = i
i = i+1
else:
file_to_users[x.filename].add(str(commit.author.email))
if x.filename not in file_to_id:
file_to_id[x.filename] = i
i = i+1
### Identifing unique users for each files
unique_users_dict = {}
unique_users = set()
i = 0
for x in file_to_users.keys():
file_to_users[x] = list(file_to_users[x])
for user in file_to_users[x]:
if user not in unique_users_dict:
unique_users_dict[user] = i
i=i+1
unique_users.add(user)
### Calculation of Actual Coordination - fct (file changed together)
## Assumption - 1 files - n developers have changed it. So they would have communicated at one point.
list_unique_users = unique_users
AC = [[0]*len(list_unique_users) for x in range(0,len(list_unique_users))]
for key in file_to_users.keys():
users = file_to_users[key]
for i in range(0, len(users)):
for j in range(i+1,len(users)):
AC[unique_users_dict[users[i]]][unique_users_dict[users[j]]] = 1
AC[unique_users_dict[users[j]]][unique_users_dict[users[i]]] = 1
### Calculation of Coordination needs matrix: TA * TD * TAT
CR = []
TA = [[0]*len(file_to_users.keys()) for x in range(0,len(list_unique_users))]
TD = [[0]*len(file_to_users.keys()) for x in range(0,len(file_to_users.keys()))]
### TD - m * m; m = number of files TD[i][j] = ith file and jth file are dependent on each other.
### if in commit they are changed togteher they are dependent. So we have assumption of undirected graph.
for key in file_to_users.keys():
list_of_users = file_to_users[key]
for x in list_of_users:
TA[unique_users_dict[x]][file_to_id[key]] = 1
for commit in Repository(url).traverse_commits():
modified_files_in_commit = commit.modified_files
for i in range(0, len(modified_files_in_commit)):
for j in range(i, len(modified_files_in_commit)):
TD[file_to_id[modified_files_in_commit[i].filename]][file_to_id[modified_files_in_commit[j].filename]] = 1
TD[file_to_id[modified_files_in_commit[j].filename]][file_to_id[modified_files_in_commit[i].filename]] = 1
### dump results to json files
with open('data.json', 'w') as fp:
json.dump(file_to_users, fp, indent = 4)
with open('unique_users.json','w') as fp:
json.dump(unique_users_dict, fp, indent = 4)
with open('ActualCoordination.json', 'w') as filepointer:
json.dump(AC,filepointer)
with open('TaskAssignment.json', 'w') as filepointer:
json.dump(TA,filepointer)
with open('TaskDependency.json', 'w') as filepointer:
json.dump(TD,filepointer)
with open('CoordinationRequirement.json', 'w') as filepointer:
json.dump(CR,filepointer)