Skip to content

Commit 59fc7d6

Browse files
committed
Updated Readme and structure
1 parent 99af983 commit 59fc7d6

File tree

6 files changed

+84
-108
lines changed

6 files changed

+84
-108
lines changed

Data_Collection_Tools/APIConsumer.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,20 +86,4 @@ def fullConsume(self, leagues):
8686
for matchID in self.getMatchList(league._leagueID, league._startDate, league._endDate):
8787
toBeInserted.append(self.getMatch(matchID).getMongoObj())
8888
print('Inserted match: ' + str(matchID))
89-
self.dbc.insertMany(toBeInserted, databaseCollections.MATCHES)
90-
91-
92-
myAPIC = APIConsumer()
93-
94-
# matches = myAPIC.getMatchList("9633", datetime.datetime.fromtimestamp(1519102800), datetime.datetime.fromtimestamp(1519534800))
95-
# pprint.pprint(matches)
96-
# pprint.pprint(len(matches))
97-
98-
# myMatch = myAPIC.getMatch(3752505318)
99-
# pprint.pprint(myMatch.players)
100-
myDBC = DatabaseConnector(os.environ['LOCALMONGOSTR'])
101-
leagueList = []
102-
for item in myDBC.makeQuery({}, databaseCollections.LEAGUES):
103-
leagueList.append(league.league.from_dict(item))
104-
105-
myAPIC.fullConsume(leagueList)
89+
self.dbc.insertMany(toBeInserted, databaseCollections.MATCHES)

Data_Collection_Tools/dataScraper.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,4 @@ def whatsIn(self):
202202
fullList = self.getTourneyList()
203203
for item in inDB:
204204
fullList.remove(item['url'])
205-
return fullList
206-
207-
myScraper = dataScraper()
208-
# to Run the script uncomment the following line and run dataScraper.py in the command line.
209-
# myScraper.fullScrape()
205+
return fullList

DatabaseConnector.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,4 @@ class databaseCollections(Enum):
9090
TEAMS = 'Teams'
9191
LEAGUES = 'Leagues'
9292
HEROES = 'Heroes'
93-
REGRESSIONS = 'Regressions'
94-
95-
96-
# client = MongoClient(os.environ['LOCALMONGOSTR'])
97-
# db = client.Dota2ProMatches
98-
# tbu = db.Matches.find({})
99-
# for iter in tbu:
100-
# if(not iter['matchID'] > 0 ):
101-
# print(type(iter['matchID']))
102-
# # db.Matches.update_one(
103-
# # {'_id': iter['_id']},
104-
# # {"$set": {'matchID': iter['matchID'][0]}})
105-
# client.close()
93+
REGRESSIONS = 'Regressions'

README.md

Lines changed: 79 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,82 @@
33
This Project was making a public dataset for professional dota leagues and matches.
44
I included events from this [list](https://liquipedia.net/dota2/Tier_1_Tournaments)
55

6-
Database Specification:
7-
8-
**Leagues**
9-
10-
| Field | type | Description |
11-
| ---------- | ------- | ----------------------------------------- |
12-
| leagueName | String | The name of the league |
13-
| leagueID | String | The league's dotaTV id |
14-
| prizePool | Int | The league's prizepool in USD |
15-
| startDate | Date | The league's first main event day |
16-
| endDate | Date | The league's last main event day |
17-
| teams | Array | This contains json team objects |
18-
| url | String | This string is the url source for this data |
19-
20-
21-
To use the steam api you must generate a key and set the environmental variable 'STEAM_API_KEY' to its value.
22-
23-
LOCALMONGOSTR
24-
Notes:
25-
26-
Some teams have a name mismatch between the actual name and the name on the results notably: VG.r (Vici Gaming Reborn), 4 Anchors + Sea Captain, RoX, Relax, they have not been processed.
27-
28-
Summit 5, 6 and 7 have a duplicate eventID so I have not included those teams in the team collection, the results are still intact however
29-
30-
Events without event IDs (There will be no match data for these events):
31-
32-
The International 2011
33-
34-
ASUS Open 2012 Finals
35-
36-
Thor Open 2012
37-
38-
World Cyber Games 2012
39-
40-
Electronic Sports World Cup 2012
41-
42-
The Premier League: Season 2
43-
44-
DreamHack Summer 2012
45-
46-
StarLadder StarSeries Season 1
47-
48-
The Premier League: Season 1
49-
50-
The Premier League: Season 1
51-
52-
Dota2 Star Championship
6+
## Database Specification
7+
8+
### Leagues
9+
10+
| Field | Type | Description |
11+
| ---------- | ------- | ------------------------ |
12+
| leagueName | String | The name of the league. |
13+
| leagueID | String | The league's dotaTV id. |
14+
| prizePool | Int | The league's prizepool in USD. |
15+
| startDate | Date | The league's first main event day. |
16+
| endDate | Date | The league's last main event day. |
17+
| teams | Array | This contains json team objects. |
18+
| url | String | This string is the url source for this data. |
19+
20+
### Teams
21+
22+
| Field | Type | Description |
23+
| ---------- | ------- | ------------------------ |
24+
| leagueName | String | The name of the league. |
25+
| result | String | The place the team got. Ties are rounded up.|
26+
| org | String | The team's organization |
27+
| players | Array | An array of {role: \<the role that player had\>), player: \<playerObject\>} |
28+
29+
### Players
30+
31+
| Field | Type | Description |
32+
| ---------- | ------- | ------------------------ |
33+
| tag | String | The players tag. |
34+
| playerName | String | The player's name (romanized in some cases). |
35+
| birthday | Date | The player's birthday. |
36+
| playerID | Int | The player's ID |
37+
| country | Array | An array of strings with player's nation's names. |
38+
| roles | String | The roles this player has played. |
39+
| url | String | The url source of this data. |
40+
41+
### Matches
42+
43+
| Field | Type | Description |
44+
| ---------- | ------- | ------------------------ |
45+
| players | Array | Array of players that contains their stats for this match. |
46+
| radiantWin | Boolean | True if radiant won. |
47+
| duration | Int | Duration of the match in seconds. |
48+
| matchID | Int | The ID of the match. |
49+
| leagueID | Int | The ID of the league. |
50+
51+
## Dataset Notes
52+
53+
- Some teams have a name mismatch between the actual name and the name on the results notably: VG.r (Vici Gaming Reborn), 4 Anchors + Sea Captain, RoX, Relax, they have not been processed.
54+
- Summit 5, 6 and 7 have a duplicate eventID
55+
- Players with no page on Liquipedia have not been processed.
56+
- Some events do not an eventID they are:
57+
- The International 2011
58+
- ASUS Open 2012 Finals
59+
- World Cyber Games 2012
60+
- Electronic Sports World Cup 2012
61+
- The Premier League: Season 2
62+
- DreamHack Summer 2012
63+
- StarLadder StarSeries Season 1
64+
- The Premier League: Season 1
65+
- The Premier League: Season 1
66+
- Dota2 Star Championship
67+
68+
## Additional Resources
69+
70+
In addition to the Dataset I also made some python classes for data collection and statistical operations. They are located in the source folder:
71+
72+
/Dota2EsportDataset/Database_Objects/
73+
I have also made the data collection tools available, feel free to use them to update the dataset or scan a different list of tournaments.
74+
75+
In order to use them you will need to set a few environmental variables:
76+
77+
- To use the steam api you must generate a key and set the environmental variable 'STEAM_API_KEY' to its value.
78+
- I was using mongoDB locally so I set the environmental variable LOCALMONGOSTR to my mongoString.
79+
80+
For more details on the steam API see [here](https://wiki.teamfortress.com/wiki/WebAPI)
81+
82+
Thanks to the volunteers / editors of [Liquipedia](https://liquipedia.net/dota2/Main_Page) without whom this project would not have been possible.
83+
84+
Shoutout to [DatDota](http://www.datdota.com/) and Noxville for helping me out.

Stat_Operations/doStats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import Database_Objects.match as dotaMatch
33
import Database_Objects.player as player
44
import Database_Objects.team as team
5-
import Database_Objects.regression as regression
5+
import regression as regression
66
from DatabaseConnector import DatabaseConnector, databaseCollections
77
import os
88
from dateutil.relativedelta import relativedelta

Database_Objects/regression.py renamed to Stat_Operations/regression.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,9 @@ def __init__(self, dependent, independents, groupType, description, xLabels, yLa
2222
self.yLabel = yLabel
2323
self.title = title
2424

25-
26-
self.beta = None
27-
self.alpha = None
28-
self.rSq = None
29-
self.arSq = None
30-
self.n = None
3125
self.linearRegression()
32-
# self.logitRegression()
26+
3327

34-
@classmethod
35-
def fromDict():
36-
pass
3728

3829
def linearRegression(self):
3930
dataFrameArgs = {
@@ -56,27 +47,12 @@ def linearRegression(self):
5647
print(mod.summary())
5748
fig = plt.figure(figsize=(12,6))
5849
fig = sm.graphics.plot_regress_exog(mod, (self.xLabels[0]), fig=fig)
59-
# plt.plot(df[self.xLabels[0]], df[self.yLabel], 'o')
60-
# plt.plot(df[self.xLabels[0]], mod.predict(), 'r', linewidth=2)
61-
# plt.xlabel = self.xLabels[0]
62-
# plt.ylabel = self.yLabel
63-
# plt.title = self.title
64-
# fig.show()
6550
plt.show()
66-
pass
6751

6852

69-
class regressionAttributes(Enum):
70-
pass
71-
7253
#Roster, role, individual, team
7354
class regressionGroupType(Enum):
7455
ROSTER = 'roster'
7556
TEAM = 'team'
7657
INDIVIDUAL = 'individual'
7758
ROLE = 'role'
78-
79-
# class regressionType(Enum):
80-
# LINEAR = True
81-
# NONLINEAR = False
82-

0 commit comments

Comments
 (0)