diff --git a/AUTHORS.md b/AUTHORS.md index 51dfc88..27f64dc 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -25,4 +25,6 @@ Please share scripts or how-to guides if you have built an integration with a ne * Minze Tolsman (https://github.com/miezie) * Implementation: VanOns * Slyoldfox (https://github.com/slyoldfox) - * Implementation: Fluvius \ No newline at end of file + * Implementation: Fluvius +* TylonHH (https://github.com/TylonHH) + * Implementation: EnergyControl (app) \ No newline at end of file diff --git a/Datasources/EnergyControl/EnergyControlDataPrepare.py b/Datasources/EnergyControl/EnergyControlDataPrepare.py new file mode 100644 index 0000000..4938582 --- /dev/null +++ b/Datasources/EnergyControl/EnergyControlDataPrepare.py @@ -0,0 +1,330 @@ +import datetime +import glob +import json +import math +import os +import sys +from collections import namedtuple +from typing import List + +import pandas as pd + +# DataFilter named tuple definition +# column: The name of the column on which the filter should be applied +# value: The value on which should be filtered (regular expressions can be used) +# equal: Boolean value indicating whether the filter should be inclusive or exclusive (True/False) +DataFilter = namedtuple("DataFilter", ["column", "value", "equal"]) + +# OutputFileDefinition named tuple definition +# outputFileName: The name of the output file +# valueColumnName: The name of the column holding the value +# dataFilters: A list of datafilters (see above the definition of a datafilter) +# recalculate: Boolean value indication whether the data should be recalculated, +# because the source is not an increasing value +OutputFileDefinition = namedtuple( + "OutputFileDefinition", + ["outputFileName", "valueColumnName", "dataFilters", "recalculate"], +) + +# --------------------------------------------------------------------------------------------------------------------- +# TEMPLATE SETUP +# --------------------------------------------------------------------------------------------------------------------- + +# Name of the energy provider +energyProviderName = "EnergyControl" + +# Inputfile(s): filename extension +inputFileNameExtension = ".csv" +# Inputfile(s): Name of the column containing the date of the reading. +# Use this in case date and time is combined in one field. +inputFileDateColumnName = "Datum" +# Inputfile(s): Name of the column containing the time of the reading. +# Leave empty in case date and time is combined in one field. +inputFileTimeColumnName = "Zeit" +# Inputfile(s): Date/time format used in the datacolumn. +# Combine the format of the date and time in case date and time are two seperate fields. +inputFileDateTimeColumnFormat = "%d.%m.%y %H:%M" +# Inputfile(s): Data seperator being used in the .csv input file +inputFileDataSeperator = ";" +# Inputfile(s): Decimal token being used in the input file +inputFileDataDecimal = "," +# Inputfile(s): Number of header rows in the input file +inputFileNumHeaderRows = 2 +# Inputfile(s): Number of footer rows in the input file +inputFileNumFooterRows = 6 +# Inputfile(s): Json path of the records (only needed for json files) +# Example: inputFileJsonPath: List[str] = ['energy', 'values'] +inputFileJsonPath: List[str] = [] +# Inputfile(s): Name or index of the excel sheet (only needed for excel files containing more sheets, +# leave at 0 for the first sheet) +inputFileExcelSheetName = 0 + +# Name used for the temporary date/time field. +# This needs normally no change only when it conflicts with existing columns. +dateTimeColumnName = "_DateTime" + +# Provide any data preparation code (if needed) +# Example: dataPreparation = "df["Energy Produced (Wh)"] = +# df["Energy Produced (Wh)"].str.replace(',', '').replace('\"', '').astype(int)" +dataPreparation = "" + +# List of one or more output file definitions +outputFiles = [ + OutputFileDefinition( + "water_high_resolution.csv", + "Zählerstand", + [], + False, + ), +] + +# --------------------------------------------------------------------------------------------------------------------- + + +# Prepare the input data +def prepareData(dataFrame: pd.DataFrame) -> pd.DataFrame: + print("Preparing data") + + # Check if we have to combine a date and time field + if inputFileTimeColumnName != "": + # Take note that the format is changed in case the column was parsed as date. + # For excel change the type of the cell to text or adjust the format accordingly, + # use statement print(dataFrame) to get information about the used format. + dataFrame[dateTimeColumnName] = pd.to_datetime( + dataFrame[inputFileDateColumnName].astype(str) + + " " + + dataFrame[inputFileTimeColumnName].astype(str), + format=inputFileDateTimeColumnFormat, + utc=True, + ) + else: + dataFrame[dateTimeColumnName] = pd.to_datetime( + dataFrame[inputFileDateColumnName], + format=inputFileDateTimeColumnFormat, + utc=True, + ) + # Remove the timezone (if it exists) + dataFrame[dateTimeColumnName] = dataFrame[dateTimeColumnName].dt.tz_localize(None) + + # Select only correct dates + df = dataFrame.loc[ + ( + dataFrame[dateTimeColumnName] + >= datetime.datetime.strptime("01-01-1970", "%d-%m-%Y") + ) + & ( + dataFrame[dateTimeColumnName] + <= datetime.datetime.strptime("31-12-2099", "%d-%m-%Y") + ) + ] + + # Make sure that the data is correctly sorted + df.sort_values(by=dateTimeColumnName, ascending=True, inplace=True) + + # Transform the date into unix timestamp for Home-Assistant + df[dateTimeColumnName] = ( + df[dateTimeColumnName].astype("int64") / 1000000000 + ).astype("int64") + + # Execute any datapreparation code if provided + exec(dataPreparation) + + return df + + +# Filter the data based on the provided dataFilter(s) +def filterData(dataFrame: pd.DataFrame, filters: List[DataFilter]) -> pd.DataFrame: + df = dataFrame + # Iterate all the provided filters + for dataFilter in filters: + # Determine the subset based on the provided filter (regular expression) + series = ( + df[dataFilter.column].astype(str).str.contains(dataFilter.value, regex=True) + ) + + # Validate whether the data is included or excluded + if not dataFilter.equal: + series = ~series + + df = df[series] + + return df + + +# Recalculate the data so that the value increases +def recalculateData(dataFrame: pd.DataFrame, dataColumnName: str) -> pd.DataFrame: + df = dataFrame + + # Make the value column increasing (skip first row) + previousRowIndex = -1 + for index, _ in df.iterrows(): + # Check if the current row contains a valid value + if math.isnan(df.at[index, dataColumnName]): + df.at[index, dataColumnName] = 0.0 + + if previousRowIndex > -1: + # Add the value of the previous row to the current row + df.at[index, dataColumnName] = round( + df.at[index, dataColumnName] + df.at[previousRowIndex, dataColumnName], + 3, + ) + previousRowIndex = index + + return df + + +# Generate the datafile which can be imported +def generateImportDataFile( + dataFrame: pd.DataFrame, + outputFile: str, + dataColumnName: str, + filters: list[DataFilter], + recalculate: bool, +): + # Check if the column exists + if dataColumnName in dataFrame.columns: + print("Creating file: " + outputFile) + dataFrameFiltered = filterData(dataFrame, filters) + + # Check if we have to recalculate the data + if recalculate: + dataFrameFiltered = recalculateData(dataFrameFiltered, dataColumnName) + + # Select only the needed data + dataFrameFiltered = dataFrameFiltered.filter( + [dateTimeColumnName, dataColumnName] + ) + + # Create the output file + dataFrameFiltered.to_csv( + outputFile, sep=",", decimal=".", header=False, index=False + ) + else: + print( + "Could not create file: " + + outputFile + + " because column: " + + dataColumnName + + " does not exist" + ) + + +# Read the inputfile +def readInputFile(inputFileName: str) -> pd.DataFrame: + # Read the specified file + print("Loading data: " + inputFileName) + + # Check if we have a supported extension + if inputFileNameExtension == ".csv": + # Read the CSV file + df = pd.read_csv( + inputFileName, + sep=inputFileDataSeperator, + decimal=inputFileDataDecimal, + skiprows=inputFileNumHeaderRows, + skipfooter=inputFileNumFooterRows, + engine="python", + ) + elif (inputFileNameExtension == ".xlsx") or (inputFileNameExtension == ".xls"): + # Read the XLSX/XLS file + df = pd.read_excel( + inputFileName, + sheet_name=inputFileExcelSheetName, + decimal=inputFileDataDecimal, + skiprows=inputFileNumHeaderRows, + skipfooter=inputFileNumFooterRows, + ) + elif inputFileNameExtension == ".json": + # Read the JSON file + jsonData = json.load(open(inputFileName)) + df = pd.json_normalize(jsonData, record_path=inputFileJsonPath) + else: + raise Exception("Unsupported extension: " + inputFileNameExtension) + + return df + + +# Check if all the provided files have the correct extension +def correctFileExtensions(fileNames: list[str]) -> bool: + # Check all filenames for the right extension + for fileName in fileNames: + _, fileNameExtension = os.path.splitext(fileName) + if fileNameExtension != inputFileNameExtension: + return False + return True + + +# Generate the datafiles which can be imported +def generateImportDataFiles(inputFileNames: str): + # Find the file(s) + fileNames = glob.glob(inputFileNames) + if len(fileNames) > 0: + print("Found files based on: " + inputFileNames) + + # Check if all the found files are of the correct type + if correctFileExtensions(fileNames): + # Read all the found files and concat the data + dataFrame = pd.concat( + map(readInputFile, fileNames), ignore_index=True, sort=True + ) + + # Prepare the data + dataFrame = prepareData(dataFrame) + + # Create the output files + for outputFile in outputFiles: + generateImportDataFile( + dataFrame, + outputFile.outputFileName, + outputFile.valueColumnName, + outputFile.dataFilters, + outputFile.recalculate, + ) + + print("Done") + else: + print("Only " + inputFileNameExtension + " datafiles are allowed") + else: + print("No files found based on : " + inputFileNames) + + +# Validate that the script is started from the command prompt +if __name__ == "__main__": + print(energyProviderName + " Data Prepare") + print("") + print( + "This python script prepares " + + energyProviderName + + " data for import into Home Assistant." + ) + print( + "The files will be prepared in the current directory any previous files will be overwritten!" + ) + print("") + if len(sys.argv) == 2: + if ( + input("Are you sure you want to continue [Y/N]?: ").lower().strip()[:1] + == "y" + ): + generateImportDataFiles(sys.argv[1]) + else: + print(energyProviderName + "PrepareData usage:") + print( + energyProviderName + + "PrepareData <" + + energyProviderName + + " " + + inputFileNameExtension + + " filename (wildcard)>" + ) + print() + print( + "Enclose the path/filename in quotes in case wildcards are being used on Linux based systems." + ) + print( + "Example: " + + energyProviderName + + 'PrepareData "*' + + inputFileNameExtension + + '"' + ) diff --git a/Datasources/EnergyControl/README.md b/Datasources/EnergyControl/README.md new file mode 100644 index 0000000..804ec53 --- /dev/null +++ b/Datasources/EnergyControl/README.md @@ -0,0 +1,21 @@ +# Energy provider: EnergyControl + +[EnergyControl](https://www.steige-solutions.de/energy-control/) offers the option to import various types of data, such as water, solar, energy and more. This data can be transformed and used to import into Home Assistant. + +**Data provided** +- Electricity consumption - Tariff 1 - High resolution (day interval) - kWh +- Electricity consumption - Tariff 2 - High resolution (day interval) - kWh +- Electricity production - Tariff 1 - High resolution (day interval) - kWh +- Electricity production - Tariff 2 - High resolution (day interval) - kWh +- Gas consumption - High resolution (day interval) - m³ +- Water consumption - High resolution (day interval) - m³ + +**Tooling needed** +- Python 3 +- Pandas python library ```pip install pandas``` + +**How-to** +- Export data from the [EnergyControl](https://www.steige-solutions.de/energy-control/) app. +- Download the ```EnergyControlDataPrepare.py``` file and place it in the same directory as the exported EnergyControl data. +- Execute the Python script with the exported data file as a parameter: ```python EnergyControlDataPrepare.py data_file.csv```. The python script creates the needed file for the generic import script. +- Follow the steps in the overall how-to diff --git a/Datasources/EnergyControl/Sample files/Water.csv b/Datasources/EnergyControl/Sample files/Water.csv new file mode 100644 index 0000000..3024c71 --- /dev/null +++ b/Datasources/EnergyControl/Sample files/Water.csv @@ -0,0 +1,19 @@ +Wasser;19.09.24; +;; +Datum;Zeit;Zählerstand +29.10.22;11:39;381,000 +21.12.22;16:55;389,000 +23.12.22;19:53;390,000 +24.12.22;22:20;390,000 +12.08.24;05:40;515,980 +19.08.24;08:27;516,780 +26.08.24;08:03;519,340 +02.09.24;09:49;520,400 +09.09.24;07:59;522,140 +16.09.24;21:37;522,940 +;; +Created with EnergyControl v1.4.2(#923);; +Wenn dir unsere App gefällt, freuen wir uns über eine Bewertung im AppStore.;; +https://itunes.apple.com/app/id1478467447?action=write-review;; +;; +Made with love in Cologne, Germany;; \ No newline at end of file diff --git a/Datasources/EnergyControl/Sample files/water_high_resolution.csv b/Datasources/EnergyControl/Sample files/water_high_resolution.csv new file mode 100644 index 0000000..b193996 --- /dev/null +++ b/Datasources/EnergyControl/Sample files/water_high_resolution.csv @@ -0,0 +1,10 @@ +1667043540,381.0 +1671641700,389.0 +1671825180,390.0 +1671920400,390.0 +1723441200,515.98 +1724056020,516.78 +1724659380,519.34 +1725270540,520.4 +1725868740,522.14 +1726522620,522.94 diff --git a/README.md b/README.md index 16cd661..0776124 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ If you want to contribute to this please read the [Contribution guidelines](CONT * Implementation: VanOns * Slyoldfox (https://github.com/slyoldfox) * Implementation: Fluvius +* TylonHH (https://github.com/TylonHH) + * Implementation: EnergyControl (app)

(back to top)