From 6fdcc115285a513739c1dcd737bf6e9692382ee2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zoufin=C3=A9=20Lauer-Bar=C3=A9?=
<82505312+zolabar@users.noreply.github.com>
Date: Wed, 20 Sep 2023 11:18:28 +0200
Subject: [PATCH] Update webApp.py
corrected/simplified pandas reading of uploaded file
---
TrendPy/webApp.py | 672 +++++++++++++++++++++++-----------------------
1 file changed, 336 insertions(+), 336 deletions(-)
diff --git a/TrendPy/webApp.py b/TrendPy/webApp.py
index 74dabf9..4959aac 100644
--- a/TrendPy/webApp.py
+++ b/TrendPy/webApp.py
@@ -1,336 +1,336 @@
-# -*- coding: utf-8 -*-
-
-
-import pandas as pd
-import ipywidgets as widgets
-from ipywidgets import interact, interactive
-import plotly as plt
-import plotly.express as px
-import plotly.graph_objects as go
-import io
-from IPython.display import display, clear_output
-from traitlets import traitlets
-import TrendPy.methods as tm
-import numpy as np
-import warnings
-import sympy as sym
-from sympy import atan as arctan
-from sympy import sqrt, sin, cos, tan, exp, log, ln
-a, b, c, x = sym.symbols('a, b, c, x', real=True)
-
-warnings.filterwarnings('ignore')
-
-version = 'v1.0.1'
-
-class LoadedButton(widgets.Button):
-
- def __init__(self, value=None, *args, **kwargs):
- super(LoadedButton, self).__init__(*args, **kwargs)
- self.add_traits(value=traitlets.Any(value))
-
-
-class App():
-
- def __init__(self):
-
- self.box_layout = widgets.Layout(display='flex',
- justify_content='center')
- header = widgets.HTML(
- value=f'
TrendPy Webapp\
- Visualization of trends in (time series) data {version}')
-
- logo = widgets.HTML(
- value='')
-
- logo_container = widgets.HBox([logo, header],layout=self.box_layout)
- display(logo_container)
-
-
- # In[3]:
-
-
- header2 = widgets.HTML(
- value='Upload a .csv file here')
-
- display(header2)
-
-
- # In[4]:
-
-
- self.uploaded_excel_file = widgets.FileUpload(accept=".csv", multiple=False)
- container = widgets.HBox([self.uploaded_excel_file],layout=self.box_layout)
-
- display(container)
-
- self.input_dropdown = widgets.Dropdown(
- options=[''],
- value='',
- description='Input (X):',
- disabled=False,
- )
-
- self.output_dropdown = widgets.Dropdown(
- options=[''],
- value='',
- description='Target (Y):',
- disabled=False,
- )
-
- self.trend_dropdown = widgets.Dropdown(
- options=['No trendline',
- 'linear',
- 'polynomial',
- 'trigonometric',
- 'exponential',
- 'manual'],
- value='No trendline',
- description='trendline:',
- disabled=False,
- )
-
- self.polynomial_deg_selection = widgets.Dropdown(
- options=[2,3,4,5,6],
- value=2,
- description='deg (polyn.)',
- disabled=True,
- )
-
- self.freeReg_ansatz_input = widgets.Text(
- value=' ',
- description='expression',
- disabled=True,
- )
-
-
- self.r2_checkbox = widgets.Checkbox(
- value=False,
- description='Show R2 score',
- disabled=False,
- indent=True
- )
-
- # arrangement of dropdown menus
- dropdown_elements = widgets.HBox([self.input_dropdown, self.output_dropdown], layout=self.box_layout)
- trend_selection = widgets.HBox([self.trend_dropdown, self.polynomial_deg_selection, self.freeReg_ansatz_input], layout=self.box_layout)
- r2_container = widgets.HBox([self.r2_checkbox],layout=self.box_layout)
-
-
- # making all the widgets interactive, so no button needs to be pressed when changing a dropdown value
- self.out2 = widgets.interactive_output(self.create_graphics, {'values_in': self.input_dropdown,
- 'values_out': self.output_dropdown,
- 'trend': self.trend_dropdown,
- 'deg': self.polynomial_deg_selection,
- 'expression': self.freeReg_ansatz_input,
- 'r2': self.r2_checkbox})
-
- self.out = widgets.Output()
-
- out2_container = widgets.HBox([self.out2],layout=self.box_layout)
-
-
- self.button = LoadedButton(description='Start calculation',
- disable=False,
- tooltip='Click to start calculation',
- icon='check',
- button_style='')
-
- self.button.on_click(self.create_pandas_dataframe)
-
-
- container2 = widgets.HBox([self.button],layout=self.box_layout)
- container_out = widgets.HBox([self.out],layout=self.box_layout)
-
- display(container2,container_out)
-
-
- display(dropdown_elements, trend_selection, r2_container, out2_container)
-
-
-
-
-
- # In[22]:
-
-
- # defining a figure widget, that is created in the beginning and remains static, only graphs are updated
- # it is needed to define all needed graphs as empty ones here, so that you can use update_traces
- self.fig_widget = go.FigureWidget()
- self.fig_widget.layout.margin=dict(l=120, r=120, b=25, t=25)
- self.fig_widget.layout.height=400
- self.fig_widget.add_scatter(mode='markers', name='datapoints')
- self.fig_widget.add_scatter(mode='lines', name='trendline', line=dict(shape='spline'))
- self.fig_widget.update_layout(title_text="Time Series Regression", template='plotly')
-
- return
-
- def create_pandas_dataframe(self, b):
-
- # resetting everything for a new calculation
- with self.out:
- clear_output()
- with self.out2:
- clear_output()
- self.input_dropdown.options = []
- self.output_dropdown.options = []
- self.fig_widget.update_traces(x=[],y=[],selector=({'name':'datapoints'}))
- self.fig_widget.update_traces(x=[],y=[],selector=({'name':'trendline'}))
- self.trend_dropdown.value='No trendline'
- self.polynomial_deg_selection.disabled=True
- self.freeReg_ansatz_input.disabled = True
- self.r2_checkbox.value=False
- self.r2_checkbox.disabled=True
-
- #checking whether data has been uploaded succesfully
- if (self.uploaded_excel_file.value=={}):
- self.button.button_style='warning'
- with self.out:
- print("No data entered. Please upload a .csv file.")
-
- else:
- try:
-
-
- self.button.button_style='success'
-
- #transforming the uploaded .csv file back to the type .csv in order to then create a pandas dataframe
- time_series_file = list(self.uploaded_excel_file.value.values())[0]
- content=time_series_file['content']
- content=io.StringIO(content.decode('utf-8'))
- time_series_data = pd.read_csv(content)
-
- #filling the drop down menus with the columns of the dataframe as options
- self.input_dropdown.options = time_series_data.select_dtypes(include='number').columns
- self.output_dropdown.options = time_series_data.select_dtypes(include='number').columns
- b.value = time_series_data
-
- with self.out:
- print("Data entered. Please select the X and Y values now. (Only works if header is in the first row of your file.)")
- except:
- self.button.button_style='warning'
- with self.out:
- print('Invalid input! Make sure that header of the file is in first row and general format is correct!')
-
- return
-
- def create_graphics(self, values_in,values_out,trend,deg=2, expression='x', r2=False):
- if trend == 'polynomial':
- self.polynomial_deg_selection.disabled=False
- else:
- self.polynomial_deg_selection.disabled=True
-
- if trend == 'manual':
- self.freeReg_ansatz_input.disabled=False
- else:
- self.freeReg_ansatz_input.disabled=True
-
- if values_in == '' or values_out=='':
- print("Please select your X and Y values in the dropdown menus above.")
- else:
- try: # sometimes an error is occuring when user hasn't changed the standard dropdown X and Y values yet
- # through try/except the user does not see the error and won't notice it, because standard values are
- # usually not going to be used
-
- # updateing the plot based on the input data
- self.fig_widget.layout.xaxis.title = values_in
- self.fig_widget.layout.yaxis.title = values_out
- self.fig_widget.update_traces(x=self.button.value[values_in],
- y=self.button.value[values_out],
- selector=({'name':'datapoints'}))
- if trend!="No trendline":
- with self.out2:
- clear_output()
- self.r2_checkbox.disabled=False
- self.calculate_trendline(values_in, values_out, trend, deg, expression, r2) # function that calculates and draws trendline is called
- else:
- self.fig_widget.update_traces(x=[],y=[],selector=({'name':'trendline'}))
- self.r2_checkbox.disabled=True
- except:
- pass
-
- return
-
-
- def calculate_trendline(self, values_in, values_out, trend, deg, expression, r2):
- sorted_df = self.button.value.sort_values(by=values_in) # without sorting trendlines can not be plotted correctly
- x_values=sorted_df[values_in].to_numpy()
- y_values=sorted_df[values_out].to_numpy()
-
- # in all cases coefficients are calculated first by calling function from methods.py
- # after that the predicted y-values are calculated by calling function from methods.py
- # with the data from y_pred-function it is then possible to plot the trendline
- # at last r2-score is calculated
- if trend=='linear':
- coefs=tm.linReg(x_values,y_values)
- print('coefficients: ', coefs)
- y_pred=tm.pred('linReg',coefs, x_values)
- self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
- self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot x+c_1$")
- if r2==True:
- r2_score = tm.r2(y_values, y_pred)
- print('R2-score: ', r2_score)
-
- elif trend=='polynomial':
- try:
- coefs = tm.polReg(x_values,y_values,deg)
- print('coefficients: ', coefs)
- y_pred = tm.pred('polReg', coefs, x_values)
- self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}), line=dict(shape='spline'))
- self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot x^N+c_1 x^{N-1}+...+c_N$")
- if r2==True:
- r2_score = tm.r2(y_values, y_pred)
- print('R2-score: ', r2_score)
- except:
- print('Selected regression might not be a good fit for the entered values! Please lower degree or choose other regression!')
- elif trend=='trigonometric':
- try:
- coefs = tm.trigReg(x_values,y_values)
- print('coefficients: ', coefs)
- y_pred = tm.pred('trigReg', coefs, x_values)
- self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
- self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot \text(\cos(2\pi\cdot c_1+c_2))$")
- if r2==True:
- r2_score = tm.r2(y_values, y_pred)
- print('R2-score: ', r2_score)
- except:
- print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
- elif trend=='exponential':
- try:
- coefs = tm.expReg(x_values,y_values)
- print('coefficients: ', coefs)
- y_pred = tm.pred('expReg', coefs, x_values)
- self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
- self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot e^{c_1\cdot x}$")
- if r2==True:
- r2_score = tm.r2(y_values, y_pred)
- print('R2-score: ', r2_score)
- except:
- print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
-
- elif trend=='manual':
- try:
- coefs = tm.freeReg(x_values,y_values, expression)
- print('coefficients: ', coefs)
- y_pred = tm.pred('freeReg', coefs, x_values, freeRegAnsatz=expression)
- self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
- self.fig_widget.update_layout(title_text=r"$f(x)=%s$" % sym.latex(eval(expression)))
- if r2==True:
- r2_score = tm.r2(y_values, y_pred)
- print('R2-score: ', r2_score)
- except:
- print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
-
- return
-
-
-
-
- def show(self):
-
- display(self.fig_widget)
-
- return
-
-
-
-
+# -*- coding: utf-8 -*-
+
+
+import pandas as pd
+import ipywidgets as widgets
+from ipywidgets import interact, interactive
+import plotly as plt
+import plotly.express as px
+import plotly.graph_objects as go
+import io
+from IPython.display import display, clear_output
+from traitlets import traitlets
+import TrendPy.methods as tm
+import numpy as np
+import warnings
+import sympy as sym
+from sympy import atan as arctan
+from sympy import sqrt, sin, cos, tan, exp, log, ln
+a, b, c, x = sym.symbols('a, b, c, x', real=True)
+
+warnings.filterwarnings('ignore')
+
+version = 'v1.0.1'
+
+class LoadedButton(widgets.Button):
+
+ def __init__(self, value=None, *args, **kwargs):
+ super(LoadedButton, self).__init__(*args, **kwargs)
+ self.add_traits(value=traitlets.Any(value))
+
+
+class App():
+
+ def __init__(self):
+
+ self.box_layout = widgets.Layout(display='flex',
+ justify_content='center')
+ header = widgets.HTML(
+ value=f'TrendPy Webapp\
+ Visualization of trends in (time series) data {version}')
+
+ logo = widgets.HTML(
+ value='')
+
+ logo_container = widgets.HBox([logo, header],layout=self.box_layout)
+ display(logo_container)
+
+
+ # In[3]:
+
+
+ header2 = widgets.HTML(
+ value='Upload a .csv file here')
+
+ display(header2)
+
+
+ # In[4]:
+
+
+ self.uploaded_excel_file = widgets.FileUpload(accept=".csv", multiple=False)
+ container = widgets.HBox([self.uploaded_excel_file],layout=self.box_layout)
+
+ display(container)
+
+ self.input_dropdown = widgets.Dropdown(
+ options=[''],
+ value='',
+ description='Input (X):',
+ disabled=False,
+ )
+
+ self.output_dropdown = widgets.Dropdown(
+ options=[''],
+ value='',
+ description='Target (Y):',
+ disabled=False,
+ )
+
+ self.trend_dropdown = widgets.Dropdown(
+ options=['No trendline',
+ 'linear',
+ 'polynomial',
+ 'trigonometric',
+ 'exponential',
+ 'manual'],
+ value='No trendline',
+ description='trendline:',
+ disabled=False,
+ )
+
+ self.polynomial_deg_selection = widgets.Dropdown(
+ options=[2,3,4,5,6],
+ value=2,
+ description='deg (polyn.)',
+ disabled=True,
+ )
+
+ self.freeReg_ansatz_input = widgets.Text(
+ value=' ',
+ description='expression',
+ disabled=True,
+ )
+
+
+ self.r2_checkbox = widgets.Checkbox(
+ value=False,
+ description='Show R2 score',
+ disabled=False,
+ indent=True
+ )
+
+ # arrangement of dropdown menus
+ dropdown_elements = widgets.HBox([self.input_dropdown, self.output_dropdown], layout=self.box_layout)
+ trend_selection = widgets.HBox([self.trend_dropdown, self.polynomial_deg_selection, self.freeReg_ansatz_input], layout=self.box_layout)
+ r2_container = widgets.HBox([self.r2_checkbox],layout=self.box_layout)
+
+
+ # making all the widgets interactive, so no button needs to be pressed when changing a dropdown value
+ self.out2 = widgets.interactive_output(self.create_graphics, {'values_in': self.input_dropdown,
+ 'values_out': self.output_dropdown,
+ 'trend': self.trend_dropdown,
+ 'deg': self.polynomial_deg_selection,
+ 'expression': self.freeReg_ansatz_input,
+ 'r2': self.r2_checkbox})
+
+ self.out = widgets.Output()
+
+ out2_container = widgets.HBox([self.out2],layout=self.box_layout)
+
+
+ self.button = LoadedButton(description='Start calculation',
+ disable=False,
+ tooltip='Click to start calculation',
+ icon='check',
+ button_style='')
+
+ self.button.on_click(self.create_pandas_dataframe)
+
+
+ container2 = widgets.HBox([self.button],layout=self.box_layout)
+ container_out = widgets.HBox([self.out],layout=self.box_layout)
+
+ display(container2,container_out)
+
+
+ display(dropdown_elements, trend_selection, r2_container, out2_container)
+
+
+
+
+
+ # In[22]:
+
+
+ # defining a figure widget, that is created in the beginning and remains static, only graphs are updated
+ # it is needed to define all needed graphs as empty ones here, so that you can use update_traces
+ self.fig_widget = go.FigureWidget()
+ self.fig_widget.layout.margin=dict(l=120, r=120, b=25, t=25)
+ self.fig_widget.layout.height=400
+ self.fig_widget.add_scatter(mode='markers', name='datapoints')
+ self.fig_widget.add_scatter(mode='lines', name='trendline', line=dict(shape='spline'))
+ self.fig_widget.update_layout(title_text="Time Series Regression", template='plotly')
+
+ return
+
+ def create_pandas_dataframe(self, b):
+
+ # resetting everything for a new calculation
+ with self.out:
+ clear_output()
+ with self.out2:
+ clear_output()
+ self.input_dropdown.options = []
+ self.output_dropdown.options = []
+ self.fig_widget.update_traces(x=[],y=[],selector=({'name':'datapoints'}))
+ self.fig_widget.update_traces(x=[],y=[],selector=({'name':'trendline'}))
+ self.trend_dropdown.value='No trendline'
+ self.polynomial_deg_selection.disabled=True
+ self.freeReg_ansatz_input.disabled = True
+ self.r2_checkbox.value=False
+ self.r2_checkbox.disabled=True
+
+ #checking whether data has been uploaded succesfully
+ if (self.uploaded_excel_file.value=={}):
+ self.button.button_style='warning'
+ with self.out:
+ print("No data entered. Please upload a .csv file.")
+
+ else:
+ try:
+
+
+ #transforming the uploaded .csv file back to the type .csv in order to then create a pandas dataframe
+
+ time_series_file = self.uploaded_excel_file.value[0].content
+
+ time_series_data = pd.read_csv(io.BytesIO(time_series_file))
+
+ #filling the drop down menus with the columns of the dataframe as options
+ self.input_dropdown.options = time_series_data.select_dtypes(include='number').columns
+ self.output_dropdown.options = time_series_data.select_dtypes(include='number').columns
+ b.value = time_series_data
+
+ self.button.button_style='success'
+
+ with self.out:
+ print("Data entered. Please select the X and Y values now. (Only works if header is in the first row of your file.)")
+ except:
+ self.button.button_style='warning'
+ with self.out:
+ print('Invalid input! Make sure that header of the file is in first row and general format is correct!')
+
+ return
+
+ def create_graphics(self, values_in,values_out,trend,deg=2, expression='x', r2=False):
+ if trend == 'polynomial':
+ self.polynomial_deg_selection.disabled=False
+ else:
+ self.polynomial_deg_selection.disabled=True
+
+ if trend == 'manual':
+ self.freeReg_ansatz_input.disabled=False
+ else:
+ self.freeReg_ansatz_input.disabled=True
+
+ if values_in == '' or values_out=='':
+ print("Please select your X and Y values in the dropdown menus above.")
+ else:
+ try: # sometimes an error is occuring when user hasn't changed the standard dropdown X and Y values yet
+ # through try/except the user does not see the error and won't notice it, because standard values are
+ # usually not going to be used
+
+ # updateing the plot based on the input data
+ self.fig_widget.layout.xaxis.title = values_in
+ self.fig_widget.layout.yaxis.title = values_out
+ self.fig_widget.update_traces(x=self.button.value[values_in],
+ y=self.button.value[values_out],
+ selector=({'name':'datapoints'}))
+ if trend!="No trendline":
+ with self.out2:
+ clear_output()
+ self.r2_checkbox.disabled=False
+ self.calculate_trendline(values_in, values_out, trend, deg, expression, r2) # function that calculates and draws trendline is called
+ else:
+ self.fig_widget.update_traces(x=[],y=[],selector=({'name':'trendline'}))
+ self.r2_checkbox.disabled=True
+ except:
+ pass
+
+ return
+
+
+ def calculate_trendline(self, values_in, values_out, trend, deg, expression, r2):
+ sorted_df = self.button.value.sort_values(by=values_in) # without sorting trendlines can not be plotted correctly
+ x_values=sorted_df[values_in].to_numpy()
+ y_values=sorted_df[values_out].to_numpy()
+
+ # in all cases coefficients are calculated first by calling function from methods.py
+ # after that the predicted y-values are calculated by calling function from methods.py
+ # with the data from y_pred-function it is then possible to plot the trendline
+ # at last r2-score is calculated
+ if trend=='linear':
+ coefs=tm.linReg(x_values,y_values)
+ print('coefficients: ', coefs)
+ y_pred=tm.pred('linReg',coefs, x_values)
+ self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
+ self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot x+c_1$")
+ if r2==True:
+ r2_score = tm.r2(y_values, y_pred)
+ print('R2-score: ', r2_score)
+
+ elif trend=='polynomial':
+ try:
+ coefs = tm.polReg(x_values,y_values,deg)
+ print('coefficients: ', coefs)
+ y_pred = tm.pred('polReg', coefs, x_values)
+ self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}), line=dict(shape='spline'))
+ self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot x^N+c_1 x^{N-1}+...+c_N$")
+ if r2==True:
+ r2_score = tm.r2(y_values, y_pred)
+ print('R2-score: ', r2_score)
+ except:
+ print('Selected regression might not be a good fit for the entered values! Please lower degree or choose other regression!')
+ elif trend=='trigonometric':
+ try:
+ coefs = tm.trigReg(x_values,y_values)
+ print('coefficients: ', coefs)
+ y_pred = tm.pred('trigReg', coefs, x_values)
+ self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
+ self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot \text(\cos(2\pi\cdot c_1+c_2))$")
+ if r2==True:
+ r2_score = tm.r2(y_values, y_pred)
+ print('R2-score: ', r2_score)
+ except:
+ print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
+ elif trend=='exponential':
+ try:
+ coefs = tm.expReg(x_values,y_values)
+ print('coefficients: ', coefs)
+ y_pred = tm.pred('expReg', coefs, x_values)
+ self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
+ self.fig_widget.update_layout(title_text=r"$f(x)=c_0\cdot e^{c_1\cdot x}$")
+ if r2==True:
+ r2_score = tm.r2(y_values, y_pred)
+ print('R2-score: ', r2_score)
+ except:
+ print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
+
+ elif trend=='manual':
+ try:
+ coefs = tm.freeReg(x_values,y_values, expression)
+ print('coefficients: ', coefs)
+ y_pred = tm.pred('freeReg', coefs, x_values, freeRegAnsatz=expression)
+ self.fig_widget.update_traces(x=sorted_df[values_in], y=y_pred, selector=({'name':'trendline'}))
+ self.fig_widget.update_layout(title_text=r"$f(x)=%s$" % sym.latex(eval(expression)))
+ if r2==True:
+ r2_score = tm.r2(y_values, y_pred)
+ print('R2-score: ', r2_score)
+ except:
+ print('Selected regression might not be a good fit for the entered values! Please choose other regression!')
+
+ return
+
+
+
+
+ def show(self):
+
+ display(self.fig_widget)
+
+ return
+
+
+
+