From 867a02f4dbdb8f55991ace4311824131bf9268c1 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 15 Jan 2026 13:48:48 +0100 Subject: [PATCH 1/4] update saml dev settings --- backend/ianalyzer/settings_saml.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/ianalyzer/settings_saml.py b/backend/ianalyzer/settings_saml.py index 1155594a1..991cd72ab 100644 --- a/backend/ianalyzer/settings_saml.py +++ b/backend/ianalyzer/settings_saml.py @@ -22,10 +22,10 @@ SAML_LOGOUT_REQUEST_PREFERRED_BINDING = saml2.BINDING_HTTP_POST SAML_ATTRIBUTE_MAPPING = { - "uushortid": ("username", ), + "uuShortID": ("username", ), "mail": ("email", ), "givenName": ("first_name", ), - "uuprefixedsn": ("last_name", ), + "uuPrefixSn": ("last_name", ), "saml": ("save_saml_login", ), } @@ -49,6 +49,7 @@ 'sp' : { 'name': 'Textcavator', 'name_id_format': saml2.saml.NAMEID_FORMAT_TRANSIENT, + 'name_id_policy_format': saml2.saml.NAMEID_FORMAT_TRANSIENT, # For Okta add signed logout requests. Enable this: # "logout_requests_signed": True, From e5b9f8b9e8d1cdcc42e364fcf274cfdbb95ddbb3 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 15 Jan 2026 14:31:51 +0100 Subject: [PATCH 2/4] update saml docs --- documentation/First-time-setup.md | 12 ++++++++++-- documentation/SAML.md | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/documentation/First-time-setup.md b/documentation/First-time-setup.md index 0aa4d2be9..23319fe45 100644 --- a/documentation/First-time-setup.md +++ b/documentation/First-time-setup.md @@ -15,8 +15,6 @@ The documentation includes a [recipe for installing the prerequisites on Debian ## First-time setup -For the SAML integration, the following libraries are required: xmlsec, python3-dev, libssl-dev and libsasl2-dev. This has been tested on Unix systems, but installation may be more difficult on Windows. As the dependencies on `xmlsec` are only called in the `settings_saml` module, however, they should not affect running the application when not explicitly using this settings module. - To get an instance running, do all of the following inside an activated `virtualenv`: 1. Create the file `backend/ianalyzer/settings_local.py`.`ianalyzer/settings_local.py` is included in .gitignore and thus not cloned to your machine. It can be used to customise your environment. You can leave the file empty for now. @@ -88,3 +86,13 @@ To add a database-only corpus, you will need a JSON definition of the corpus, an 3. (optional) If you want to use celery, start your local redis server by running `redis-server` in a separate terminal. 4. (optional) If you want to use celery, activate your python environment. Run `yarn celery worker`. Celery is used for long downloads and the word cloud and ngrams visualisations. 5. Start the frontend by running `yarn start-front`. + +## Next steps + +Now that you have a working Textcavator environment, here are some common next steps: + +Configure your environment -> [Django project settings](./Django-project-settings.md) / [Frontend environment settings](./Frontend-environment-settings.md) + +Create a new Python corpus -> [Writing a corpus definition in Python](./Writing-a-corpus-definition-in-Python.md) + +Add SAML intergration in your environment -> [SAML](./SAML.md) diff --git a/documentation/SAML.md b/documentation/SAML.md index 9df315044..cecaf1a3a 100644 --- a/documentation/SAML.md +++ b/documentation/SAML.md @@ -1,5 +1,9 @@ # SAML +Textcavator users can register an account directly, or sign in via Utrecht University (or different connected identity provided). UU login is based on SAML. + +## How it works + In order to login with Solis ID, Textcavator has SAML integration with ITS. For this, it uses the [djangosaml2 library](https://djangosaml2.readthedocs.io/). More information on working with SAML, setting up a local environment to test the SAML integration, etc. can be found [here](https://github.com/UUDigitalHumanitieslab/dh-info/blob/master/SAML.md) The urls exposed by DjangoSaml2 are included as part of our `users` application, e.g., `/users/saml2/login`. DjangoSaml2 takes care of consuming the response from the Identity Provider and logging in the user. The `SAML_ATTRIBUTE_MAPPING` variable contains a dictionary of the data coming in from the identity provider, e.g., `uushortid`, and translating that to the corresponding column in the user table, e.g., `username`. Moreover, the setting `SAML_CREATE_UNKNOWN_USER = True` makes sure that we create a user in our database if it's not present yet. @@ -11,3 +15,23 @@ The only tweaks added on top of the DjangoSaml2 package are: ### Authorisation The setting [SAML_GROUP_NAME](/documentation/Django-project-settings.md#saml_group_name) can be used to control permissions for SAML users. + +## Developing with SAML + +SAML intergration depends on the [`xmlsec` Python library](https://xmlsec.readthedocs.io/en/stable/) which requires additional libraries. The Docker environment also includes these. If you are not using Docker, follow the installation instructions in the `xmlsec` documentation. + +To use SAML login in a development environment, you can use the CDH [Development Identity Provider](https://centrefordigitalhumanities.github.io/Federated-Authentication-Docs/developmentidp/index.html). + +To use the development IdP, import the [SAML development settings](../backend/ianalyzer/settings_saml.py) in your `settings_local.py`: + +```python +from settings_saml import * +``` + +Then start up Textcavator. + +Now follow the README instructions in the Development IdP to run the application and register Textcavator as a service provider. You can find the metadata for Textcavator at `http://localhost:8000/users/saml2/metadata/`. + +Notes: +- If you run the Development IdP in a Docker container, it may not be able to access your application at `localhost:8000`. In that case, just copy-paste the metadata XML. +- If the IdP gives an error because a `validUntil` attribute is missing, enter the metada XML manually and add the following attribute to the root node: `validUntil="2100-01-01"`. From 525d40df9fbbf70f8fb1d8eb3c35792053f10134 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 15 Jan 2026 17:51:47 +0100 Subject: [PATCH 3/4] correct usage for settings_saml --- documentation/Django-project-settings.md | 2 ++ documentation/SAML.md | 8 +++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/documentation/Django-project-settings.md b/documentation/Django-project-settings.md index 15d64525f..72b9b0218 100644 --- a/documentation/Django-project-settings.md +++ b/documentation/Django-project-settings.md @@ -14,6 +14,8 @@ We keep different settings files to handle different environments. `settings_test.py` is used during unit tests. It imports everything configured in `settings.py`, but can add or override some settings. Note that you can also adjust settings for individual tests. +`settings_saml.py` can be used used during development, to enable signing in using the CDH Development Identity Provider. It imports `settings.py` and adds extra configurations. See [SAML](./SAML.md). + ### Using a different settings module Django supports using a different settings module ([more about settings in Django](https://docs.djangoproject.com/en/5.0/topics/settings/)). diff --git a/documentation/SAML.md b/documentation/SAML.md index cecaf1a3a..4ca1911f8 100644 --- a/documentation/SAML.md +++ b/documentation/SAML.md @@ -22,14 +22,12 @@ SAML intergration depends on the [`xmlsec` Python library](https://xmlsec.readth To use SAML login in a development environment, you can use the CDH [Development Identity Provider](https://centrefordigitalhumanities.github.io/Federated-Authentication-Docs/developmentidp/index.html). -To use the development IdP, import the [SAML development settings](../backend/ianalyzer/settings_saml.py) in your `settings_local.py`: +To use the development IdP, import the [SAML development settings](../backend/ianalyzer/settings_saml.py). Start up Textcavator with: -```python -from settings_saml import * +```sh +yarn start-back --settings ianalyzer.settings_saml ``` -Then start up Textcavator. - Now follow the README instructions in the Development IdP to run the application and register Textcavator as a service provider. You can find the metadata for Textcavator at `http://localhost:8000/users/saml2/metadata/`. Notes: From c7d66ca0195ed4a1b55ac7bcf79bfb34bddcf0d8 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Fri, 30 Jan 2026 12:53:45 +0100 Subject: [PATCH 4/4] add review comments --- backend/ianalyzer/settings_saml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/ianalyzer/settings_saml.py b/backend/ianalyzer/settings_saml.py index 991cd72ab..29fae13a2 100644 --- a/backend/ianalyzer/settings_saml.py +++ b/backend/ianalyzer/settings_saml.py @@ -25,7 +25,7 @@ "uuShortID": ("username", ), "mail": ("email", ), "givenName": ("first_name", ), - "uuPrefixSn": ("last_name", ), + "uuPrefixedSn": ("last_name", ), "saml": ("save_saml_login", ), } @@ -43,6 +43,8 @@ # otherwise...without OID will be rejected 'allow_unknown_attributes': True, + 'valid_for': 365 * 24 * 10, # 10 years, for development + # this block states what services we provide 'service': { # we are just a lonely SP