-
Notifications
You must be signed in to change notification settings - Fork 0
/
seanstclair_analytics.py
210 lines (168 loc) · 7.25 KB
/
seanstclair_analytics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# Project Module 3
"""Python module that demonstrates skills in fetching data from the web,
processing it using Python collections, and writing the processed data
to different file formats."""
# Standard library imports
import csv
import json
import pathlib
from io import StringIO
from pathlib import Path
import io
# External import
import requests
import pandas as pd
import xlrd
from collections import Counter
# Import local modules
import Sean_StClair_utils
import seanstclairprojsetup
#Data acquisition
def fetch_and_write_txt_data(folder_name, filename, url):
response = requests.get(url)
if response.status_code == 200:
write_txt_file(folder_name, filename, response.text)
else:
print(f"Failed to fetch data: {response.status_code}")
def fetch_and_write_csv_data(folder_name, filename, url):
response = requests.get(url)
if response.status_code == 200:
write_csv_file(folder_name, filename, response.text)
else:
print(f"Failed to fetch data: {response.status_code}")
def fetch_and_write_excel_data(folder_name, filename, url):
response = requests.get(url)
if response.status_code == 200:
write_excel_file(folder_name, filename, response.content)
else:
print(f"Failed to fetch Excel data: {response.status_code}")
def fetch_and_write_json_data(folder_name, filename, url):
response = requests.get(url)
if response.status_code == 200:
write_json_file(folder_name, filename, response.content)
else:
print(f"Failed to fetch json data: {response.status_code}")
#Write Data
def write_txt_file(folder_name, filename, data):
file_path = Path(folder_name).joinpath(filename) # use pathlib to join paths
# Create the folder if it doesn't exist
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open('w', encoding='utf-8-sig') as file:
file.write(data)
print(f"Text data saved to {file_path}")
def write_csv_file(folder_name, filename, data):
file_path = pathlib.Path(folder_name, filename)
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open('w', newline='', encoding='utf-8') as file:
file.write(data)
print(f"CSV data saved to {file_path}")
def write_excel_file(folder_name, filename, data):
file_path = pathlib.Path(folder_name, filename)
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'wb') as file:
file.write(data)
print(f"Excel data saved to {file_path}")
def write_json_file(folder_name, filename, data):
file_path = pathlib.Path(folder_name, filename)
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open('w') as file:
file.write(data)
print(f"JSON data saved to {file_path}")
#Function 1: Process text data
def process_txt_file(input_file, output_file):
#get count of words in text file
try:
with open(input_file, 'r') as file:
text = file.read()
words = text.split()
word_count = Counter(words)
with open(output_file, 'w') as file:
for word, count in word_count.items():
file.write(f"{word}: {count}\n")
print(f"Word count saved to {output_file}")
except Exception as e:
print(f"Error processing text data: {e}")
#Function 2. Process CSV Data
def process_csv_file(input_file, output_file):
# Calculate the average and median amount of global sales for video games.
try:
df = pd.read_csv(input_file)
average_global_sales = df['Global_sales'].mean()
median_global_sales = df['Global_sales'].mean()
with open(output_file, 'w') as file:
file.write(f"Average Global Sales: {average_global_sales}\n")
file.write(f"Median Global Sales: {median_global_sales}\n")
print(f"Averages saved to {output_file}")
except Exception as e:
print(f"Error processing CSV data: {e}")
#Function 3: Process Excel Data
def process_excel_file(input_file, output_file):
try:
df = pd.read_excel(input_file)
# Sort list of sample cases from highest to lowest
sorted_df = df.sort_values('cases', ascending=False)
sorted_output_file = output_file.replace('.txt', '.xlsx')
sorted_df.to_excel(sorted_output_file, index=False)
print(f"Data sorted by cases and saved to {sorted_output_file}")
except Exception as e:
print(f"Error processing Excel data: {e}")
#Function 4: Process Json Data
def process_json_file(input_file, output_file):
try:
with open(input_file, 'r') as file:
data = dict(json.load(file))
#Parsed Song Name of random songs data set
with open(output_file, 'w') as file:
for title in data:
songName = title.get('songName', [])
file.write(f"{title['title']}: {', '.join(songName)}\n")
print(f"Song name extracted and saved to {output_file}")
except Exception as e:
print(f"Error processing JSON data: {e}")
# Exception reporting
def fetch_txt_data(folder_name, url):
try:
response = requests.get(url)
response.raise_for_status()
# Will raise an HTTPError
# if the HTTP request returns an unsuccessful status code
# Assuming the response content is text data
file_path = Path(folder_name) / 'data.txt'
with open(file_path, 'w') as file:
file.write(response.text)
print(f"Text data saved to {file_path}")
except requests.exceptions.HTTPError as errh:
print(f"Http Error: {errh}")
except requests.exceptions.ConnectionError as errc:
print(f"Error Connecting: {errc}")
except requests.exceptions.Timeout as errt:
print(f"Timeout Error: {errt}")
except requests.exceptions.RequestException as err:
print(f"Oops: Something Else: {err}")
except IOError as e:
print(f"I/O error({e.errno}): {e.strerror}")
def main():
''' Main function to demonstrate module capabilities. '''
print(f"Name: {'datafun-03-analytics-SeanStClair'}")
txt_url = 'https://github.com/tadzik/5e-spells/blob/master/spelllist.txt'
csv_url = 'https://github.com/ValdisW/datasets/blob/master/video-game-sales.csv'
excel_url = 'https://github.com/bharathirajatut/sample-excel-dataset/blob/master/fullmoon.xls'
json_url = 'https://gist.github.com/Jetrom17/380adb1d9d7b6f23a948759b48fce64e#file-music-info-json'
txt_folder_name = 'data-txt'
csv_folder_name = 'data-csv'
excel_folder_name = 'data-excel'
json_folder_name = 'data-json'
txt_filename = 'data.txt'
csv_filename = 'data.csv'
excel_filename = 'data.xls'
json_filename = 'data.json'
fetch_and_write_txt_data(txt_folder_name, txt_filename, txt_url)
fetch_and_write_csv_data(csv_folder_name, csv_filename, csv_url)
fetch_and_write_excel_data(excel_folder_name, excel_filename, excel_url)
fetch_and_write_json_data(json_folder_name, json_filename, json_url)
process_txt_file(txt_folder_name,'data.txt', 'results_txt.txt')
process_csv_file(csv_folder_name,'data.csv', 'results_csv.txt')
process_excel_file(excel_folder_name,'data.xls', 'results_xls.txt')
process_json_file(json_folder_name,'data.json', 'results_json.txt')
if __name__ == "__main__":
main()