Skip to content

Commit

Permalink
Merge branch 'main' of github.com:DE-RSE/reports-matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
Git Bot committed Mar 12, 2024
2 parents 66090b8 + 46614c5 commit 2af0cbb
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
.matrixcounter.status
matrixcounter.pdf
285 changes: 285 additions & 0 deletions matrixcounter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
#!/usr/bin/env python3
""" Keep track of matrix room user counts over time
Help is available with the "--help" option.
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>.
"""

__author__ = "Frank Löffler"
__contact__ = "frank.loeffler@uni-jena.de"
__copyright__ = "Copyright 2024, Frank Löffler; 2024 Friedrich-Schiller-Universität Jena"
__date__ = "2024-02-22"
__email__ = "frank.loeffler@uni-jena.de"
__license__ = "AGPLv3"
__maintainer__ = "frank.loeffler@uni-jena.de"
__status__ = "Development"
__version__ = "0.0.1"

import sys, os
import argparse
import json
from pprint import pprint
from datetime import datetime

import requests

# sceleton for data scructure for internal state
status_empty = {
'matrix_access_tokens': {},
'rooms': {},
}
# global state, to be saved between invocations
status = None

config = {}
def log(msg):
if config['verbose']:
print(msg)

def parse_commandline():
config = {}
parser = argparse.ArgumentParser(
description="Collect user count statistics of rooms a given matrix user has joined.",
epilog='Example usage (replace the echo with the respective command for your '
"password manager):\n"
'echo matrix_passwd | ./matrixcounter.py '
'--matrixhost synapse.mymatrix.nowhere '
'--matrixuser matrixuser '
'--matrixpass - '
"\n",
add_help=False)
req = parser.add_argument_group('required arguments')
opt = parser.add_argument_group('optional arguments')
opt.add_argument("-h", "--help", action="help", help="show this help message and exit")
req.add_argument('--matrixhost', metavar='host', required=True,
help='hostname of the matrix server (not including https://)')
req.add_argument('--matrixuser', metavar='user', required=True,
help='user name for the matrix server')
req.add_argument('--matrixpass', metavar='password', required=True,
help='password for the user on matrix, read from stdin if == "-". ')
opt.add_argument('--statusfile', metavar='filename', required=False, default='.matrixcounter.status',
help='filename to use to save state, e.g., to be able to use a '
'longer-living matrix access token, as well as past user counts. '
'NEVER share this file, as it does contain the matrix access token. '
'See --counterfile for a shareable file. '
'This file can be shared between invocations of this script with '
'different matrix options (e.g. different matrix users that may '
'share joined rooms). '
'If this file does not exist or cannot be (properly) read, '
'it will be created/overwritten. Default: .matrixcounter.status')
opt.add_argument('--counterfile', metavar='filename', required=False, default='matrixcounter.json',
help='filename to use to save counter data. Note that this will contain '
'all room data also contained in --statusfile (but no authentication '
'data), i.e., will contain data of all rooms of a possibly shared '
'--statusfile. If you do not want this, use separate --statusfile.'
'If this file does not exist or cannot be (properly) read, '
'it will be created/overwritten. Default: matrixcounter.json')
opt.add_argument('--matrix-always-logout', action='store_true',
help='logout of a possibly opened matrix session at the end of operations. '
'This also invalidates the access token, so a new login will be '
'necessary in the future. This is not intended for regular usage, '
'but to close sessions from within the script if it is known that '
'the access token will not be used in the future.')
opt.add_argument('--verbose', action='store_true',
help='Be verbose. By default nothing will be printed if everything works '
'as planned.')
args = vars(parser.parse_args())
config['matrix_host'] = args['matrixhost']
config['matrix_user'] = args['matrixuser']
config['statusfile_name'] = args['statusfile']
config['counterfile_name'] = args['counterfile']
config['matrix_always_logout'] = args['matrix_always_logout']
config['verbose'] = args['verbose']
if args['matrixpass'] == '-':
config['matrix_pass'] = sys.stdin.readline().rstrip('\n')
else:
config['matrix_pass'] = args['matrixpass']
return config

def load_status(statusfile_name):
"""Load status from file and do some sanity checks.
Use the template in case of any error or inconsistancy."""
status = None
try:
statusfile = open(statusfile_name, 'r')
status = json.load(statusfile)
close(statusfile_name)
except Exception as e:
pass
if type(status) != dict:
status = status_empty
for req_key in status_empty.keys():
if req_key not in status:
status = status_empty
if type(status['matrix_access_tokens']) != dict:
status['matrix_access_tokens'] = status_empty['matrix_access_tokens']
matrix_access_token_id = f'{matrix_user}@{matrix_host}'
if (not matrix_access_token_id in status['matrix_access_tokens'] or
type(status['matrix_access_tokens'][matrix_access_token_id]) != str):
status['matrix_access_tokens'][matrix_access_token_id] = None
return status, matrix_access_token_id

# parse the command line options
config = parse_commandline()
# use returned dict to setup local variables
locals().update(config)

# load status information from last invocation from file
status, matrix_access_token_id = load_status(statusfile_name)

class Matrix:
"""Simple class to encapsulate a set-up Matrix configuration."""
access_token = None

def __init__(self, host, user, password, token=None):
"""Initilize the class and login.
This requires host, user and password. Optionally, a pre-existing access-token
can be passed in and will be used instead of user and password. However, the
latter two are still required, as it will fall back to them in case the token
does not work (to obtain a new token)."""
self.host = host
self.s = requests.Session()
if token != None:
# Check if the passed token works. Any API call that requires authentication
# and should always work is fine here. We here request the list of joined
# rooms, but do not use that information later.
r = self.s.get(f'https://{self.host}/_matrix/client/v3/joined_rooms?access_token={token}')
if r.status_code != 200:
token = None
log(f'matrix: existing token invalid, about to obtain a new one')
self.access_token = token
# If no token was passed or the one that was did not work: request a new one
if token == None:
pdata = {
'type' : 'm.login.password',
'user' : user,
'password': password,
}
r = self.s.post(f'https://{self.host}/_matrix/client/v3/login', json=pdata)
if r.status_code != 200:
print(f"Could not login to Matrix: {r.text}.")
sys.exit(1)
token = json.loads(r.text)['access_token']
self.access_token = token
log(f"matrix: just logged in")
else:
log(f"matrix: already logged in")

def logout(self):
"""For completeness, as we usually do not call this: logout of Matrix.
The reason we by default do not call this is that this invalidates the access token
and always obtaining a new one can run into rate limits."""
if self.access_token == None:
print("No token known: cannot logout.")
sys.exit(1)
r = self.s.post(f'https://{self.host}/_matrix/client/v3/logout?access_token={self.access_token}')
if r.status_code != 200:
print("Could not logout.")
# Do not fail here, as we effectively achieved what we wanted.
log("matrix: logged out")

matrix = None
def login_matrix():
global matrix
"""Login to matrix if not already done"""
if matrix is None or matrix.access_token is None:
matrix = Matrix(matrix_host, matrix_user, matrix_pass,
status['matrix_access_tokens'][matrix_access_token_id])
status['matrix_access_tokens'][matrix_access_token_id] = matrix.access_token
return matrix

matrix = login_matrix()

def add_data(dates, values, date, value):
"""append 'date' to 'dates' and 'value' to 'values' or only update last 'dates'
We mostly do not want to record when nothing changed. Thus, this function updates
the time of the last entry in 'dates' with 'value' if 'value' is the same as 'values[-1]',
but append a new time if we also have a new value. This effectively creates singles or
pairs of times with the same value, but not more than pairs.
For most purposes we could also remove the second of the values in a pair, as we know
that the next entry will record the new value and the "next" time, but then plotting the
result would need to use implicit knowledge on the intervals this script is run, and I
would rather not do that to keep things simple and consistent.
"""
if len(dates) != len(values):
print('internal error')
sys.exit(1)
# if nothing changed, only update the last-seen time
if len(dates) > 1 and value == values[-1] and value == values[-2]:
dates[-1] = date
# else, add a new data point
else:
dates.append(date)
values.append(value)

# get list of joined rooms and then get info for each
s = requests.Session()
r = s.get(f'https://{matrix_host}/_matrix/client/v3/joined_rooms?access_token={matrix.access_token}')
if r.status_code != 200:
print("Could not get list of joined rooms")
sys.exit(1)

room_info = {}
unique_users = set()
isodate = datetime.now().replace(microsecond=0).isoformat()
for room_id in json.loads(r.text)['joined_rooms']:
r = s.get(f'https://{matrix_host}/_matrix/client/v3/rooms/{room_id}/joined_members?access_token={matrix.access_token}')
if r.status_code != 200:
print('Could not get list of users of room {room_id}')
else:
users = set(json.loads(r.text)['joined'].keys())
room_info[room_id] = {'users': users}
if not room_id in status['rooms']:
r = s.get(f'https://{matrix_host}/_matrix/client/v3/rooms/{room_id}/state/m.room.name?access_token={matrix.access_token}')
# rooms are allowed have no name, but all we want to monitor do
if r.status_code != 200:
continue
room_name = json.loads(r.text)['name']
status['rooms'][room_id] = {'name': room_name, 'counts': [[], []]}
add_data(status['rooms'][room_id]['counts'][0],
status['rooms'][room_id]['counts'][1],
isodate, len(users)-1) # subtract 1 to exclude this user (supposedly a bot)
unique_users |= users
if not 'total' in status['rooms']:
status['rooms']['total'] = {'name': 'Total', 'counts': [[], []]}
add_data(status['rooms']['total']['counts'][0],
status['rooms']['total']['counts'][1],
isodate, len(unique_users)-1)

if config['matrix_always_logout']:
if matrix is None or matrix.access_token is not None:
matrix = login_matrix()
matrix.logout()
del status['matrix_access_tokens'][matrix_access_token_id]

# save current state. Since this also contains the access token, make sure to create the
# file with safe access permissions.
try:
file_desc = os.open(path=statusfile_name,
flags=os.O_WRONLY|os.O_CREAT|os.O_TRUNC, mode=0o600)
statusfile = open(file_desc, 'w')
json.dump(status, statusfile)
statusfile.close()
except Exception as e:
print(e)
pass

# save counters. This is the same file format as the state, but only contains the counters
# and especially no authorization information
with open(counterfile_name, 'w') as counterfile:
json.dump({'rooms': status['rooms']}, counterfile, indent=0, separators=(',',':'))
counterfile.close()

45 changes: 39 additions & 6 deletions plot_counter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
#!/usr/bin/env python3
""" Plot user counts for different matrix channels
Help is available with the "--help" option.
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>.
"""

__author__ = "Frank Löffler"
__contact__ = "frank.loeffler@uni-jena.de"
__copyright__ = "Copyright 2024, Frank Löffler; 2024 Friedrich-Schiller-Universität Jena"
__date__ = "2024-03-05"
__email__ = "frank.loeffler@uni-jena.de"
__license__ = "AGPLv3"
__maintainer__ = "frank.loeffler@uni-jena.de"
__status__ = "Development"
__version__ = "0.0.1"

import sys, os
from pprint import pprint
import argparse
Expand All @@ -10,10 +35,11 @@

parser = argparse.ArgumentParser(description="")
parser.add_argument('file',
help='input json file to use')
help='input json file to use. The output of the matrixcounter.py script '
'is the intended input.')
parser.add_argument('-o', '--output',
help='output file to write the figure to; extension defines format to the'
'extend matplotlib supports')
'extend matplotlib supports; default: counter_matrix.pdf')
args = vars(parser.parse_args())

try:
Expand All @@ -34,26 +60,33 @@
xmin=datetime.fromisoformat('2999-01-01T00:00:00')
xmax=datetime.fromisoformat('1999-01-01T00:00:00')

# go through all rooms and sort by current (last) user count
for room, roomdata in sorted(data['rooms'].items(), key=lambda x: x[1]['counts'][1][-1]):
# exclude a few rooms; TODO: already do not include those numbers in the collected data
if roomdata['name'].startswith('deRSE-test'):
continue
if roomdata['name'].startswith('deRSE-alt'):
continue
if roomdata['name'].startswith('de-RSE-alt'):
continue
# convert data to the right types for plotting
times = [datetime.fromisoformat(s) for s in roomdata['counts'][0]]
counts = roomdata['counts'][1]
# add one more "fake" datapoint, as stairs() requires len(edges) = len(data)+1
times.append(times[-1])
# get global extrema as limits later
xmin = min(xmin, times[0])
xmax = max(xmax, times[-1])

# the actual plot line
ax.stairs(counts, edges=times, lw=2, label=roomdata['name'])

ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)

# limit to the observed time range and ensure ymin to be 0
ax.set_xlim(xmin=xmin, xmax=xmax)
ax.set_ylim(ymin=0)

# some plot cosmetics
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
for label in ax.get_xticklabels(which='major'):
label.set(rotation=20, horizontalalignment='right')
Expand Down

0 comments on commit 2af0cbb

Please sign in to comment.