From 738fa8452be7630526d2f4adefbb0a2f99998ac6 Mon Sep 17 00:00:00 2001 From: Jonas Linde Date: Thu, 23 Feb 2023 11:14:42 +0100 Subject: [PATCH] Add support for user based default access --- docs/manual/access-control.rst | 20 ++++++++++++++++++++ pywb/warcserver/access_checker.py | 19 ++++++++++++++++++- sample_archive/access/user-default.aclj | 2 ++ tests/config_test_access.yaml | 9 ++++++++- tests/test_acl.py | 8 ++++++++ 5 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 sample_archive/access/user-default.aclj diff --git a/docs/manual/access-control.rst b/docs/manual/access-control.rst index 037272496..2fcbcb46a 100644 --- a/docs/manual/access-control.rst +++ b/docs/manual/access-control.rst @@ -151,6 +151,26 @@ For example, this header may be set based on IP range, or based on password auth Further examples of how to set this header will be provided in the deployments section. +One may also specify default access for different users by adding sub keys to the ``default_access`` setting:: + + collections: + test: + ... + default_access: + default: block + admin: allow + +Note that the ``default`` entry will be applied both if the user name is empty and if it actually is ``default``. +If the ``default`` entry is missing, it will be assumed to be ``allow``:: + + collections: + test: + ... + default_access: + guest: block + +This works whether the ``default_access`` is specified at the top level or for a specific collection. + **Note: Do not use the user-based rules without configuring proper authentication on an Apache or Nginx frontend to set or remove this header, otherwise the 'X-Pywb-ACL-User' can easily be faked.** See the :ref:`config-acl-header` section in Usage for examples on how to configure this header. diff --git a/pywb/warcserver/access_checker.py b/pywb/warcserver/access_checker.py index 46cd7acd9..e3ca7a06d 100644 --- a/pywb/warcserver/access_checker.py +++ b/pywb/warcserver/access_checker.py @@ -107,6 +107,10 @@ def __init__(self, access_source, default_access='allow', embargo=None): self.default_rule['access'] = default_access self.default_rule['default'] = 'true' + if isinstance(self.default_rule['access'], dict): + if 'default' not in self.default_rule['access']: + self.default_rule['access']['default'] = 'allow' + self.embargo = self.parse_embargo(embargo) def parse_embargo(self, embargo): @@ -273,7 +277,17 @@ def find_access_rule(self, url, ts=None, urlkey=None, collection=None, acl_user= if acl_key < tld: break - return last_obj if last_obj else self.default_rule + + if last_obj: + return last_obj + + if isinstance(self.default_rule['access'], dict): + default_rule = dict(self.default_rule) + user = acl_user if acl_user in default_rule['access'] else 'default' + default_rule['access'] = default_rule['access'][user] + return default_rule + + return self.default_rule def __call__(self, res, acl_user): """Wraps the cdx iter in the supplied tuple returning a @@ -334,6 +348,9 @@ def wrap_iter(self, cdx_iter, acl_user): if not access: access = self.default_rule['access'] + if isinstance(access, dict): + access = self.default_rule['access']['default'] + if access == 'allow_ignore_embargo': access = 'allow' diff --git a/sample_archive/access/user-default.aclj b/sample_archive/access/user-default.aclj new file mode 100644 index 000000000..7a5b3c6fc --- /dev/null +++ b/sample_archive/access/user-default.aclj @@ -0,0 +1,2 @@ +com,example)/ - {"access": "block", "user": "staff"} +com,example)/ - {"access": "allow", "user": "staff2"} diff --git a/tests/config_test_access.yaml b/tests/config_test_access.yaml index 8fb352f7c..bf19331ac 100644 --- a/tests/config_test_access.yaml +++ b/tests/config_test_access.yaml @@ -6,7 +6,8 @@ collections: archive_paths: ./sample_archive/warcs/ acl_paths: ./sample_archive/access/pywb.aclj - default_access: block + default_access: + default: block pywb-acl-list: index_paths: ./sample_archive/cdx/ @@ -62,6 +63,12 @@ collections: acl_paths: - ./sample_archive/access/pywb.aclj + pywb-acl-user-default: + index_paths: ./sample_archive/cdx/ + archive_paths: ./sample_archive/warcs/ + acl_paths: ./sample_archive/access/user-default.aclj + default_access: + staff2: block diff --git a/tests/test_acl.py b/tests/test_acl.py index ea7655aa8..d7c929234 100644 --- a/tests/test_acl.py +++ b/tests/test_acl.py @@ -96,5 +96,13 @@ def test_allowed_different_coll_acl_dir(self): assert '"http://httpbin.org/anything/resource.json"' in resp.text + def test_user_default(self): + headers = {"X-Pywb-ACL-User": "staff"} + self.testapp.get('/pywb-acl-user-default/mp_/http://www.iana.org/', headers=headers, status=200) + self.testapp.get('/pywb-acl-user-default/mp_/http://www.example.com/', headers=headers, status=451) + + headers = {"X-Pywb-ACL-User": "staff2"} + self.testapp.get('/pywb-acl-user-default/mp_/http://www.iana.org/', headers=headers, status=451) + self.testapp.get('/pywb-acl-user-default/mp_/http://www.example.com/', headers=headers, status=200)