From 3b43427886564f090937f6e944573597a9ccb944 Mon Sep 17 00:00:00 2001 From: Carlos Borrajo Gomez Date: Wed, 23 Apr 2025 11:24:14 +0200 Subject: [PATCH 1/3] Remove related code to cron_eos_usage cronjob --- scripts/cron4eos_usage.sh | 60 ------- src/python/CMSMonitoring/eos_path_size.py | 194 ---------------------- 2 files changed, 254 deletions(-) delete mode 100755 scripts/cron4eos_usage.sh delete mode 100644 src/python/CMSMonitoring/eos_path_size.py diff --git a/scripts/cron4eos_usage.sh b/scripts/cron4eos_usage.sh deleted file mode 100755 index b651a6fd..00000000 --- a/scripts/cron4eos_usage.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash -##H Script to create CMS Eos path sizes with conditions -##H CMSVOC and CMSMONITORING groups are responsible for this script. -set -e -TZ=UTC -myname=$(basename "$0") -script_dir="$(cd "$(dirname "$0")" && pwd)" -# Get nice util functions -. "${script_dir}"/utils.sh - -# Do not change the order of "--output_file"([0],[1]) which is replaced in K8s run -py_input_args=( - --output_file "/eos/user/c/cmsmonit/www/eos-path-size/size.html" - --non_ec_json "/eos/cms/store/accounting/eos_non_ec_accounting.json" - --ec_json "/eos/cms/store/accounting/eos_ec_accounting.json" - --summary_json "/eos/cms/store/accounting/eos_accounting_summary.json" - --static_html_dir "${script_dir}/../src/html/eos_path_size" -) - -# ---------------------------------------------------------------------------------------------------------- Run in K8S -if [ -n "$K8S_ENV" ]; then - # $1: output - # Replace static output file with user arg for testability. - py_input_args[1]=$1 - - util4logi "${myname} is starting.." - util_cron_send_start "$myname" "1h" - - util_kerberos_auth_with_keytab /etc/secrets/keytab - python3 "${script_dir}"/../src/python/CMSMonitoring/eos_path_size.py "${py_input_args[@]}" 2>&1 - - util_cron_send_end "$myname" "1h" "$?" - util4logi "${myname} successfully finished." - exit 0 - # break -fi -# Run in LxPlus for test ---------------------------------------------------------------------------------------------- - -. /cvmfs/sft.cern.ch/lcg/views/LCG_101/x86_64-centos7-gcc8-opt/setup.sh - -# Catch output to not print successful jobs stdout to email, print when failed -output=$(pip install --user schema 2>&1) -ec=$? -if [ $ec -ne 0 ]; then - echo "$output" - exit code: $ec - exit $ec -fi - -if ! [ "$(python -c 'import sys; print(sys.version_info.major)')" = 3 ]; then - echo "It seem python version is not 3.X! Exiting..." - exit 1 -fi - -# Catch output to not print successful jobs stdout to email, print when failed -output=$(python "$HOME"/CMSMonitoring/src/python/CMSMonitoring/eos_path_size.py "${py_input_args[@]}" 2>&1) -ec=$? -if [ $ec -ne 0 ]; then - echo "$output" - exit code: $ec - exit $ec -fi diff --git a/src/python/CMSMonitoring/eos_path_size.py b/src/python/CMSMonitoring/eos_path_size.py deleted file mode 100644 index 2101f68e..00000000 --- a/src/python/CMSMonitoring/eos_path_size.py +++ /dev/null @@ -1,194 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# Author: Ceyhun Uzunoglu -# Create html table for EOS paths' size -# -# acronjob: $HOME/CMSMonitoring/scripts/eos_path_size.sh -# - -import json -import os -import sys -from datetime import datetime - -import click -import pandas as pd -from schema import Schema, Use, SchemaError, Or - -pd.options.display.float_format = "{:,.2f}".format -pd.set_option("display.max_colwidth", None) - -SUMMARY_SCHEMA = Schema([{'path': str, - 'usedterabytes': Use(float), - 'usedlogicalterabytes': Use(float), - 'maxphysicalterabytes': Use(float), - 'maxlogicalterabytes': Use(float), - 'used_logical_space_percentage': Or(float, int, None), - 'used_logical_over_used_raw_percentage': Or(float, int, None), }]) - -SUMMARY_COL_ORDER = {'path': 'Path', - 'usedlogicalterabytes': 'Used [TB] logical ', - 'usedterabytes': 'Used [TB] physical', - 'maxlogicalterabytes': 'Logical quota [TB] ', - 'maxphysicalterabytes': 'Physical quota [TB]', - 'used_logical_space_percentage': '% Logical used', - 'used_logical_over_used_raw_percentage': '% Used logical / Used physical'} - -NON_EC_SCHEMA = Schema([{'path': str, - 'usedfiles': Use(int), - 'usedterabytes': Use(float), - 'usedlogicalterabytes': Use(float), - 'maxterabytes': Use(float), - 'maxlogicalterabytes': Use(float), - 'percentageusedterabytes': Use(float), - 'used_logical_over_used_raw_percentage': Or(float, int, None), - 'quota': str, - 'gid': Use(str), - 'maxfiles': Use(int), - 'statusbytes': str, - 'statusfiles': str, }]) - -NON_EC_COL_ORDER = {'path': 'Path', - 'usedfiles': 'Used Files', - 'usedterabytes': 'Used TB', - 'usedlogicalterabytes': 'Used Logical TB', - 'maxterabytes': 'Max TB', - 'maxlogicalterabytes': 'Max Logical TB', - 'percentageusedterabytes': 'Percentage Used %', - 'used_logical_over_used_raw_percentage': '% Used Logical / Used Raw', - 'quota': 'Quota', - 'gid': 'GID', - 'maxfiles': 'Max Files', - 'statusbytes': 'Status Bytes', - 'statusfiles': 'Status Files'} - -EC_SCHEMA = Schema([{'quota_node': str, - 'max_logical_quota': Use(float), - 'free_physical': Use(float), - 'free_physical_for_ec': Use(float), - 'free_physical_for_rep': Use(float), - 'free_logical': Use(float), - 'total_used_logical_terabytes': Use(float), - 'used_logical_over_used_raw_percentage': Or(float, int, None), - 'logical_rep_terabytes': Use(float), - 'logical_ec_terabytes': Use(float), - 'max_physical_quota': Use(float), - 'total_used_physical_terabytes': Use(float), - 'physical_rep_terabytes': Use(float), - 'physical_ec_terabytes': Use(float), }]) - -EC_COL_ORDER = {'quota_node': 'Path', - 'free_logical': 'Free Logical', - 'total_used_logical_terabytes': 'Total Used Logical TB', - 'max_logical_quota': 'Logical Quota', - 'max_physical_quota': 'Physical Quota', - 'free_physical': 'Free Physical', - 'total_used_physical_terabytes': 'Total Used Physical TB', - 'used_logical_over_used_raw_percentage': 'Used Logical / Used Raw %', - 'free_physical_for_ec': 'Free Physical EC', - 'free_physical_for_rep': 'Free Physical Rep', - 'logical_rep_terabytes': 'Logical Rep TB', - 'logical_ec_terabytes': 'Logical EC TB', - 'physical_rep_terabytes': 'Physical Rep TB', - 'physical_ec_terabytes': 'Physical EC TB', } - - -def tstamp(): - """Return timestamp for logging""" - return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') - - -def get_update_time_of_file(ec_file, non_ec_file, summary_file): - """Create update time depending on reading EOS results from file or directly from command""" - if ec_file and non_ec_file and summary_file: - # Set update time to eos file modification time, minimum of 2 - ec_ts = os.path.getmtime(ec_file) - non_ec_ts = os.path.getmtime(non_ec_file) - summary_ts = os.path.getmtime(summary_file) - try: - return datetime.utcfromtimestamp(min(ec_ts, non_ec_ts, summary_ts)).strftime('%Y-%m-%d %H:%M:%S') - except OSError as e: - print(tstamp(), "ERROR: could not get last modification time of file:", str(e)) - else: - # !! means time did not come from file but cron job time - return "!!" + datetime.utcnow().strftime("%Y-%m-%d H:%M:%S") - - -def get_df_with_validation(json_file, schema, column_order): - """Read json file, validate, cast types and convert to pandas dataframe - """ - try: - with open(json_file) as f: - json_arr = json.load(f) - - json_arr = schema.validate(json_arr) - - # orient values reads json array - return pd.DataFrame(json_arr, columns=column_order.keys()).rename(columns=column_order) - except SchemaError as e: - print(tstamp(), "Data not exist or not valid:", str(e)) - sys.exit(1) - - -def get_html_template(base_html_directory=None): - """ Reads partial html file and return it as strings - """ - if base_html_directory is None: - base_html_directory = os.getcwd() - with open(os.path.join(base_html_directory, "main.html")) as f: - main_html = f.read() - return main_html - - -def prepare_html(df): - html = df.to_html(escape=False, index=False) - # cleanup of the default dump - html = html.replace( - 'table border="1" class="dataframe"', - 'table id="" class="display compact" style="width:90%;"', - ) - html = html.replace('style="text-align: right;"', "") - return html - - -def create_main_html(df_non_ec, df_ec, df_summary, update_time, base_html_directory): - """Create html page with given dataframe - """ - df_non_ec_html = prepare_html(df_non_ec) - df_ec_html = prepare_html(df_ec) - df_summary_html = prepare_html(df_summary) - - # Get main html - main_html = get_html_template(base_html_directory=base_html_directory) - main_html = main_html.replace("__UPDATE_TIME__", update_time) - - # Add pandas dataframe html to main body - main_html = main_html.replace('____NON_EC_BLOCK____', df_non_ec_html) - main_html = main_html.replace('____EC_BLOCK____', df_ec_html) - main_html = main_html.replace('____SUMMARY_BLOCK____', df_summary_html) - return main_html - - -@click.command() -@click.option("--output_file", default=None, required=True, help="For example: /eos/.../www/test/test.html") -@click.option("--non_ec_json", required=True, help="/eos/cms/store/accounting/eos_non_ec_accounting.json") -@click.option("--ec_json", required=True, help="/eos/cms/store/accounting/eos_ec_accounting.json") -@click.option("--summary_json", required=True, help="/eos/cms/store/eos_accounting_summary.json") -@click.option("--static_html_dir", default=None, required=True, - help="Html directory for main html template. For example: ~/CMSMonitoring/src/html/eos_path_size") -def main(output_file=None, non_ec_json=None, ec_json=None, summary_json=None, static_html_dir=None): - """Main function combines xrdcp and EOS results then creates HTML page - """ - joint_update_time = get_update_time_of_file(non_ec_json, ec_json, summary_json) - print("[INFO] Update time of input files:", joint_update_time) - - df_non_ec = get_df_with_validation(non_ec_json, NON_EC_SCHEMA, NON_EC_COL_ORDER) - df_ec = get_df_with_validation(ec_json, EC_SCHEMA, EC_COL_ORDER) - df_summary = get_df_with_validation(summary_json, SUMMARY_SCHEMA, SUMMARY_COL_ORDER) - main_html = create_main_html(df_non_ec, df_ec, df_summary, joint_update_time, static_html_dir) - with open(output_file, "w+") as f: - f.write(main_html) - - -if __name__ == "__main__": - main() From 0ffc73ecf0ad1637b4bc8879041cf135c9d516df Mon Sep 17 00:00:00 2001 From: Carlos Borrajo Gomez Date: Wed, 23 Apr 2025 11:58:51 +0200 Subject: [PATCH 2/3] Remove related code to cron_eos_usage_es cronjob --- scripts/cron4eos_usage_es.sh | 34 ---------- src/python/CMSMonitoring/eos_usage_es.py | 82 ------------------------ 2 files changed, 116 deletions(-) delete mode 100755 scripts/cron4eos_usage_es.sh delete mode 100644 src/python/CMSMonitoring/eos_usage_es.py diff --git a/scripts/cron4eos_usage_es.sh b/scripts/cron4eos_usage_es.sh deleted file mode 100755 index 589c94f0..00000000 --- a/scripts/cron4eos_usage_es.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -##H Script to send CMS EOS usage summary data to MONIT -##H CMS-VOC and CMSMONITORING groups are responsible for this script. -##H Arguments: -##H $1: AMQ credentials json file path (secrets/cms-eos-mon/amq-broker.json) -##H -set -e -TZ=UTC -myname=$(basename "$0") -script_dir="$(cd "$(dirname "$0")" && pwd)" -# Get nice util functions -. "${script_dir}"/utils.sh - -if [ "$1" == "" ] || [ "$1" == "-h" ] || [ "$1" == "--help" ] || [ "$1" == "-help" ]; then - util_usage_help - exit 0 -fi - -py_input_args=( - --creds "$1" - --summary_json "/eos/cms/store/accounting/eos_accounting_summary.json" -) -# ---------------------------------------------------------------------------------------------------------- Run in K8S -if [ -n "$K8S_ENV" ]; then - util4logi "${myname} is starting.." - util_cron_send_start "$myname" "1h" - - util_kerberos_auth_with_keytab /etc/secrets/keytab - python3 "${script_dir}"/../src/python/CMSMonitoring/eos_usage_es.py "${py_input_args[@]}" 2>&1 - - util_cron_send_end "$myname" "1h" "$?" - util4logi "${myname} successfully finished." - exit 0 -fi diff --git a/src/python/CMSMonitoring/eos_usage_es.py b/src/python/CMSMonitoring/eos_usage_es.py deleted file mode 100644 index 73e06cf1..00000000 --- a/src/python/CMSMonitoring/eos_usage_es.py +++ /dev/null @@ -1,82 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# Author: Ceyhun Uzunoglu -# Sends CMS EOS summary calculations to MONIT in each 10 minute -# -# Cron script CMSMOnitoring/scripts/cron4eos_usage_es.sh -import json -import logging -import os -import sys -import time -import click - -# CMSMonitoring modules -try: - from CMSMonitoring.StompAMQ7 import StompAMQ7 -except ImportError: - print("ERROR: Could not import StompAMQ") - sys.exit(1) - - -def credentials(f_name): - if os.path.exists(f_name): - return json.load(open(f_name)) - return {} - - -def to_chunks(data, samples=1000): - length = len(data) - for i in range(0, length, samples): - yield data[i:i + samples] - - -def special_send_to_amq(data, confs, batch_size): - """Sends list of dictionary in chunks""" - ts = int(time.time()) * 1000 - wait_seconds = 0.001 - if confs: - username = confs.get('username', '') - password = confs.get('password', '') - producer = confs.get('producer') - topic = confs.get('topic') - doc_type = confs.get('type', None) - host = confs.get('host') - port = int(confs.get('port')) - cert = confs.get('cert', None) - ckey = confs.get('ckey', None) - for chunk in to_chunks(data, batch_size): - # After each stomp_amq.send, we need to reconnect with this way. - stomp_amq = StompAMQ7(username=username, password=password, producer=producer, topic=topic, - key=ckey, cert=cert, validation_schema=None, host_and_ports=[(host, port)], - loglevel=logging.WARNING) - messages = [] - for msg in chunk: - # Set metadata.timestamp as tstamp_hour of the old data - notif, _, _ = stomp_amq.make_notification(payload=msg, doc_type=doc_type, - producer=producer, ts=ts) - messages.append(notif) - if messages: - stomp_amq.send(messages) - time.sleep(wait_seconds) - time.sleep(1) - print("Message sending is finished") - - -def get_data(json_file): - with open(json_file) as f: - return json.loads(f.read()) - - -@click.command() -@click.option("--creds", required=True, help="secret file path: secrets/cms-eos-mon/amq_broker.json") -@click.option("--summary_json", required=True, help="/eos/cms/store/eos_accounting_summary.json") -def main(creds, summary_json): - """Main function that sends data to MONIT - """ - creds_json = credentials(f_name=creds) - special_send_to_amq(data=get_data(summary_json), confs=creds_json, batch_size=10000) - - -if __name__ == "__main__": - main() From bc0d31a7d538a1395756cf4703a7aa2555c388bc Mon Sep 17 00:00:00 2001 From: Carlos Borrajo Gomez Date: Wed, 23 Apr 2025 13:44:31 +0200 Subject: [PATCH 3/3] Remove code by VOC for archiving purposes --- .../CMSMonitoring/eos_accounting_cmsvoc.py | 299 ------------------ 1 file changed, 299 deletions(-) delete mode 100644 src/python/CMSMonitoring/eos_accounting_cmsvoc.py diff --git a/src/python/CMSMonitoring/eos_accounting_cmsvoc.py b/src/python/CMSMonitoring/eos_accounting_cmsvoc.py deleted file mode 100644 index fed08df1..00000000 --- a/src/python/CMSMonitoring/eos_accounting_cmsvoc.py +++ /dev/null @@ -1,299 +0,0 @@ -# Germano Massullo - germano.massullo@cern.ch -# Dario Mapelli - dario.mapelli@cern.ch - -# This script is running in acrontab in lxplus under the personal account (not 'cmsvoc' service account) of the CMS VOC, because such account has full permissions. This python script cannot be run directly on lxplus, the acrontab must run it from the following bash script -""" -#!bin/bash - -# This script is run by acrontab on lxplus -source /etc/profile -python3 eos_accounting_cmsvoc.py -""" -# A very extensive explanation of this script is available at -# https://its.cern.ch/jira/browse/CMSMONIT-521?focusedId=4724501&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-4724501 -# Its text cannot be pasted here for both reserved comments and lack of layout capabilities in a Python file compared to a comment in a Jira ticket. -# A copy of this script is hosted for archival purposes at https://github.com/dmwm/CMSMonitoring - -import os -import json -import logging -import tempfile - - -EXCLUDED_PATHS = ["/eos/cms/store/cmst3", "/eos/recovered", "/eos/totem"] - -# many numbers comes from EOS tools in form of string. We need to convert them -# to numbers (integer or float) -# to be better handled by the CMS monitoring web page -def convert_to_terabytes_and_cast_from_string_to_number__non_ec(dictionary_list): - for item in dictionary_list: - item['usedbytes'] = float(item['usedbytes']) / (10**12) - item['usedlogicalbytes'] = float(item['usedlogicalbytes']) / (10**12) - item['maxbytes'] = float(item['maxbytes']) / (10**12) - item['maxlogicalbytes'] = float(item['maxlogicalbytes']) / (10**12) - try: - item['used_logical_over_used_raw_percentage'] = item['usedlogicalbytes'] / item['usedbytes'] * 100 - except: - item['used_logical_over_used_raw_percentage'] = None - item['usedterabytes'] = item.pop('usedbytes') - item['usedlogicalterabytes'] = item.pop('usedlogicalbytes') - item['maxterabytes'] = item.pop('maxbytes') - item['maxlogicalterabytes'] = item.pop('maxlogicalbytes') - item['percentageusedbytes'] = float(item['percentageusedbytes']) - item['percentageusedterabytes'] = item.pop('percentageusedbytes') - item['usedfiles'] = int(item['usedfiles']) - item['maxfiles'] = int(item['maxfiles']) - return dictionary_list - -# many numbers comes from EOS tools in form of string. We need to convert them -# to numbers (integer or float) -# to be better handled by the CMS monitoring web page -# ===== -# the reason why following key names do not contain the word "terabytes" -# instead of "bytes" is # because the frontend will break as it expects -# the key names that are currently present in this function -def convert_to_terabytes_and_cast_from_string_to_number__ec(dictionary_list): - for item in dictionary_list: - item['max_logical_quota'] = float(item['max_logical_quota']) / (10**12) - item['max_physical_quota'] = item['max_logical_quota'] * 12 / 10 - item['total_used_logical_bytes'] = float(item['total_used_logical_bytes']) / (10**12) - item['logical_rep_bytes'] = float(item['logical_rep_bytes']) / (10**12) - item['logical_ec_bytes'] = float(item['logical_ec_bytes']) / (10**12) - item['total_used_physical_bytes'] = float(item['total_used_physical_bytes']) / (10**12) - item['physical_rep_bytes'] = float(item['physical_rep_bytes']) / (10**12) - item['physical_ec_bytes'] = float(item['physical_ec_bytes']) / (10**12) - item['free_physical'] = float(item['free_physical']) / (10**12) - item['free_physical_for_ec'] = float(item['free_physical_for_ec']) / (10**12) - item['free_physical_for_rep'] = float(item['free_physical_for_rep']) / (10**12) - item['free_logical'] = float(item['free_logical']) / (10**12) - try: - item['used_logical_over_used_raw_percentage'] = item['total_used_logical_bytes'] / item['total_used_physical_bytes'] * 100 - except: - item['used_logical_over_used_raw_percentage'] = None - item['total_used_logical_terabytes'] = item.pop('total_used_logical_bytes') - item['logical_rep_terabytes'] = item.pop('logical_rep_bytes') - item['logical_ec_terabytes'] = item.pop('logical_ec_bytes') - item['total_used_physical_terabytes'] = item.pop('total_used_physical_bytes') - item['physical_rep_terabytes'] = item.pop('physical_rep_bytes') - item['physical_ec_terabytes'] = item.pop('physical_ec_bytes') - return dictionary_list - - -def get_eos_ec_quota_dump(): - try: - accounting_file = open("/eos/cms/store/accounting/cms_quota_dump.txt", "r") - - except: - logging.exception('Cannot get the eos quota ls output from EOS') - - with open(accounting_file.name) as file: - lines = file.readlines() - dictionary_list = [] - for line in lines: - line = line.strip() - line = line.split(' ') - keys_values_single_line = dict(s.split('=') for s in line) - dictionary_list.append(keys_values_single_line) - accounting_file.close() - return dictionary_list - - -def get_eos_quota_ls_output(): - dictionary_list_temp = [] - dictionary_list = [] - accounting_file = tempfile.NamedTemporaryFile() - try: - # export EOSHOME="" is needed to avoid getting the following two messages everytime the command is run - # ===== - # pre-configuring default route to /eos/user/c/cmsvoc/ - # -use $EOSHOME variable to override - # ===== - os.system('export EOSHOME="" && eos -r 103074 1399 quota ls -m > %s' % accounting_file.name) - - except: - logging.exception('Cannot get the eos quota ls output from EOS') - - with open(accounting_file.name) as file: - lines = file.readlines() - for line in lines: - line = line.strip() - line = line.split(' ') - keys_values_single_line = dict(s.split('=') for s in line) - dictionary_list_temp.append(keys_values_single_line) - accounting_file.close() - i = 0 - """ - each xrdcp entry, when in eos quota ls, it either has gid=all or gid=project, attribute not both of them - Concerning this, Jaroslav Guenther said: - "That is expected, it is a special quota type which does not - allow any other quota node to be defined on the same path. Project - quota books all volume/inode usage under the project subtree to a single - project account (gid 99). E.g. the recycle bin uses this quota type." - """ - while i < len(dictionary_list_temp): - if( ("gid" in dictionary_list_temp[i]) and ( (dictionary_list_temp[i]["gid"] == "ALL") or (dictionary_list_temp[i]["gid"] == "project") ) ): - dictionary_list.append(dictionary_list_temp[i]) - i = i + 1 - - # "eos quota ls" uses 'space' instead of 'path' as attribute name for folders. The following cycle changes this to 'path', so that later is - # easier to write code to compare various outputs - for x in dictionary_list: - x['path'] = x['space'] - del x['space'] - return dictionary_list - - -def get_xrdcp_output(): - accounting_file = tempfile.NamedTemporaryFile() - try: - # export EOSHOME="" is needed to avoid getting the following two messages everytime the command is run - # ===== - # pre-configuring default route to /eos/user/c/cmsvoc/ - # -use $EOSHOME variable to override - # ===== - - #os.system('export EOSHOME="" && xrdcp root://eoscms.cern.ch//eos/cms/proc/accounting - > %s' % accounting_file.name) - os.system('XRD_CPUSEPGWRTRD=0 xrdcp --nopbar root://eoscms.cern.ch//eos/cms/proc/accounting - > %s' % accounting_file.name) - - except: - logging.exception('Cannot get the xrdcp output from EOS') - - with open(accounting_file.name) as json_file: - json_data_temp = json.load(json_file) - accounting_file.close() - data = json_data_temp['storageservice']['storageshares'] - """ - Due how EOS returns JSON output, each data element contains a 'path' key - which value is in form of - ["foo"] (so a list) instead of "foo" (so a string). - This adds useless complexity, so must be removed. - item['path'][0] returns "foo", instead item['path'] returns ["foo"] - That's why [0] is used - """ - for item in data: - item['path'] = item['path'][0] - return data - - -def match_xrdcp_and_eos_quota_ls_entries(): - xrdcp_data = get_xrdcp_output() - eos_quota_ls_data = get_eos_quota_ls_output() - eos_ec_quota = get_eos_ec_quota_dump() - - xrdcp_data_paths = [ i['path'] for i in xrdcp_data] - eos_quota_ls_data_paths = [ j['path'] for j in eos_quota_ls_data] - eos_ec_quota_paths = [ k['quota_node'] for k in eos_ec_quota] - - xrdcp_paths_set = set(xrdcp_data_paths) - eos_quota_ls_paths_set = set(eos_quota_ls_data_paths) - eos_ec_quota_paths_set = set(eos_ec_quota_paths) - - print(eos_quota_ls_paths_set - xrdcp_paths_set) - print("eos_quota_ls_paths_set length is",len(eos_quota_ls_paths_set)) - print("xrdcp_paths_set length is",len(xrdcp_paths_set)) - print("eos_ec_quota_paths_set length is",len(eos_ec_quota_paths_set)) - print("xrdcp & eos_ec_quota_paths_set length is",len(xrdcp_paths_set & eos_ec_quota_paths_set)) - - -def get_non_ec_statistics(): - xrdcp_data = get_xrdcp_output() - eos_quota_ls_data = get_eos_quota_ls_output() - eos_ec_quota = get_eos_ec_quota_dump() - - xrdcp_data_paths = [ i['path'] for i in xrdcp_data] - eos_quota_ls_data_paths = [ j['path'] for j in eos_quota_ls_data] - eos_ec_quota_paths = [ k['quota_node'] for k in eos_ec_quota] - - xrdcp_paths_set = set(xrdcp_data_paths) - eos_quota_ls_paths_set = set(eos_quota_ls_data_paths) - eos_ec_quota_paths_set = set(eos_ec_quota_paths) - paths = xrdcp_paths_set - eos_ec_quota_paths_set - - results = list(filter(lambda x: x['path'] in paths, eos_quota_ls_data)) - return convert_to_terabytes_and_cast_from_string_to_number__non_ec(results) - - -def get_ec_statistics(): - results = get_eos_ec_quota_dump() - return convert_to_terabytes_and_cast_from_string_to_number__ec(results) - -# this function uses the nomenclature of non EC JSON -def produce_summary(): - ec_statistics = get_ec_statistics() - non_ec_statistics = get_non_ec_statistics() - total = {'path' : 'TOTAL', 'usedterabytes' : 0.0, 'usedlogicalterabytes' : 0.0, 'maxlogicalterabytes' : 0.0, 'maxphysicalterabytes' : 0.0, 'used_logical_over_used_raw_percentage' : 0.0, 'used_logical_space_percentage' : 0.0} - - for item in non_ec_statistics: - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del non_ec_statistics[non_ec_statistics.index(item)] - continue - del item['gid'] - del item['maxfiles'] - del item['quota'] - del item['usedfiles'] - del item['statusbytes'] - del item['statusfiles'] - item['used_logical_space_percentage'] = item.pop('percentageusedterabytes') - item['maxphysicalterabytes'] = item.pop('maxterabytes') - total['usedterabytes'] += item['usedterabytes'] - total['usedlogicalterabytes'] += item['usedlogicalterabytes'] - total['maxlogicalterabytes'] += item['maxlogicalterabytes'] - total['maxphysicalterabytes'] += item['maxphysicalterabytes'] - for item in ec_statistics: - item['path'] = item.pop('quota_node') - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del ec_statistics[ec_statistics.index(item)] - continue - item['maxlogicalterabytes'] = item.pop('max_logical_quota') - item['maxphysicalterabytes'] = item.pop('max_physical_quota') - item['usedlogicalterabytes'] = item.pop('total_used_logical_terabytes') - del item['logical_rep_terabytes'] - del item['logical_ec_terabytes'] - item['usedterabytes'] = item.pop('total_used_physical_terabytes') - del item['physical_rep_terabytes'] - del item['physical_ec_terabytes'] - del item['free_physical'] - del item['free_physical_for_ec'] - del item['free_physical_for_rep'] - del item['free_logical'] - try: - item['used_logical_space_percentage'] = item['usedlogicalterabytes'] * 100 / item['maxlogicalterabytes'] - except: - item['used_logical_space_percentage'] = None - total['usedterabytes'] += item['usedterabytes'] - total['usedlogicalterabytes'] += item['usedlogicalterabytes'] - total['maxlogicalterabytes'] += item['maxlogicalterabytes'] - total['maxphysicalterabytes'] += item['maxphysicalterabytes'] - try: - total['used_logical_over_used_raw_percentage'] = total['usedlogicalterabytes'] / total['usedterabytes'] * 100 - except: - total['used_logical_over_used_raw_percentage'] = None - try: - total['used_logical_space_percentage'] = total['usedlogicalterabytes'] * 100 / total['maxlogicalterabytes'] - except: - total['used_logical_space_percentage'] = None - - for item in non_ec_statistics: - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del non_ec_statistics[non_ec_statistics.index(item)] - for item in non_ec_statistics: - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del non_ec_statistics[non_ec_statistics.index(item)] - for item in non_ec_statistics: - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del non_ec_statistics[non_ec_statistics.index(item)] - for item in non_ec_statistics: - if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)): - del non_ec_statistics[non_ec_statistics.index(item)] - # the [] brackets around total are needed to convert it into a list, - # otherwise the addition operator will not work among a list and a dictionary - return non_ec_statistics + ec_statistics + [total] - -with open('/eos/cms/store/accounting/eos_ec_accounting.json', 'w') as json_ec_output: - json.dump(get_ec_statistics(), json_ec_output, indent=4) - -with open('/eos/cms/store/accounting/eos_non_ec_accounting.json', 'w') as json_non_ec_output: - json.dump(get_non_ec_statistics(), json_non_ec_output, indent=4) - -with open('/eos/cms/store/accounting/eos_accounting_summary.json', 'w') as json_summary_output: - json.dump(produce_summary(), json_summary_output, indent=4)