diff --git a/Dockerfile b/Dockerfile old mode 100755 new mode 100644 index 150ae78..6368ac0 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,53 @@ -FROM codeforafrica/ckan:2.8.3 +FROM ckan/ckan-dev:2.9 -EXPOSE 5000/tcp +# Install any extensions needed by your CKAN instance +# - Make sure to add the plugins to CKAN__PLUGINS in the .env file +# - Also make sure all provide all extra configuration options, either by: +# * Adding them to the .env file (check the ckanext-envvars syntax for env vars), or +# * Adding extra configuration scripts to /docker-entrypoint.d folder) to update +# the CKAN config file (ckan.ini) with the `ckan config-tool` command +# +# See README > Extending the base images for more details +# +# For instance: +### OpenAfrica ### +RUN pip3 install -e git+https://github.com/CodeForAfrica/ckanext-openafrica.git@ft/ui-changes#egg=ckanext-openafrica -WORKDIR / +### Datarequests +RUN pip3 install -e git+https://github.com/qld-gov-au/ckanext-datarequests.git#egg=ckanext-datarequests -ADD requirements.txt /requirements.txt -RUN pip install -q -r /requirements.txt && \ - pip install -q -r /src/ckanext-s3filestore/requirements.txt && \ - pip install -q -r /src/ckanext-harvest/pip-requirements.txt && \ - pip install -q -r /src/ckanext-dcat/requirements.txt +### Harvester ### +RUN pip3 install -e git+https://github.com/ckan/ckanext-harvest.git@master#egg=ckanext-harvest && \ + pip3 install -r ${APP_DIR}/src/ckanext-harvest/pip-requirements.txt -# RUN ln -s ./src/ckan/ckan/config/who.ini /who.ini -ADD ckan.ini /ckan.ini +## s3filestore +RUN pip3 install -e git+https://github.com/qld-gov-au/ckanext-s3filestore.git#egg=ckanext-s3filestore && \ + pip3 install -r ${APP_DIR}/src/ckanext-s3filestore/requirements.txt -ADD Procfile /Procfile +## ckan GoogleAnalytics +RUN pip3 install -e git+https://github.com/ckan/ckanext-googleanalytics.git#egg=ckanext-googleanalytics && \ + pip3 install -r ${APP_DIR}/src/ckanext-googleanalytics/requirements.txt -CMD ["gunicorn", "--workers", "3", "--worker-class", "gevent", "--paste", "ckan.ini", "-t", "600"] +## ckanext-showcase +RUN pip3 install -e git+https://github.com/ckan/ckanext-showcase.git#egg=ckanext-showcase && \ + pip3 install -r ${APP_DIR}/src/ckanext-showcase/requirements.txt + +# Clone the extension(s) your are writing for your own project in the `src` folder +# to get them mounted in this image at runtime +# COPY ckanext-openafrica/* {APP_DIR}/src/ckanext-openafrica/ +# RUN cd {APP_DIR}/src/ckanext-openafrica && python3 setup.py develop + +# Copy custom initialization scripts +COPY contrib/ckan/docker-entrypoint.d/* /docker-entrypoint.d/ + +# Apply any patches needed to CKAN core or any of the built extensions (not the +# runtime mounted ones) +# COPY patches ${APP_DIR}/patches + +# RUN for d in $APP_DIR/patches/*; do \ +# if [ -d $d ]; then \ +# for f in `ls $d/*.patch | sort -g`; do \ +# cd $SRC_DIR/`basename "$d"` && echo "$0: Applying patch $f to $SRC_DIR/`basename $d`"; patch -p1 < "$f" ; \ +# done ; \ +# fi ; \ +# done \ No newline at end of file diff --git a/Makefile b/Makefile index 88c6b8d..87ca0fd 100644 --- a/Makefile +++ b/Makefile @@ -24,14 +24,14 @@ issues-init: ckan: - docker build --no-cache --build-arg CKAN_VERSION=2.8.11 -t codeforafrica/ckan:latest -t codeforafrica/ckan:2.8.11 contrib/ckan + docker build --no-cache --build-arg CKAN_VERSION=2.9.0 -t codeforafrica/ckan:latest -t codeforafrica/ckan:2.9.0 contrib/ckan ckan-publish: docker push codeforafrica/ckan:latest - docker push codeforafrica/ckan:2.8.11 + docker push codeforafrica/ckan:2.9.0 solr: - docker build --no-cache --build-arg CKAN_VERSION=2.8.11 -t codeforafrica/ckan-solr:latest -t codeforafrica/ckan-solr:2.8.11 contrib/solr + docker build --no-cache --build-arg CKAN_VERSION=2.9.0 -t codeforafrica/ckan-solr:latest -t codeforafrica/ckan-solr:2.9.0 contrib/solr solr-publish: docker push codeforafrica/ckan-solr:latest diff --git a/Procfile b/Procfile index fc1331a..036ee7d 100644 --- a/Procfile +++ b/Procfile @@ -1,3 +1,5 @@ -web: gunicorn --workers 5 --worker-class gevent --paste ckan.ini -t 10800 -ckan_gather_consumer: paster --plugin=ckanext-harvest harvester gather_consumer --config=ckan.ini -ckan_fetch_consumer: paster --plugin=ckanext-harvest harvester fetch_consumer --config=ckan.ini +web: gunicorn --workers 5 --worker-class gevent --ckan ckan.ini -t 10800 + +ckan_gather_consumer: ckan --config=ckan.ini --plugin=ckanext-harvest harvester gather_consumer + +ckan_fetch_consumer: ckan --config=ckan.ini --plugin=ckanext-harvest harvester fetch_consumer diff --git a/ckan.ini b/ckan.ini old mode 100755 new mode 100644 index 1d3d646..40c4363 --- a/ckan.ini +++ b/ckan.ini @@ -13,31 +13,33 @@ [DEFAULT] -# WARNING: *THIS SETTING MUST BE SET TO FALSE ON A PRODUCTION ENVIRONMENT* -debug = false - -[server:main] -use = egg:Paste#http -host = 0.0.0.0 -port = 5000 +# WARNING: *THIS SETTING MUST BE SET TO FALSE ON A PUBLIC ENVIRONMENT* +# With debug mode enabled, a visitor to your site could execute malicious commands. +debug = true [app:main] use = egg:ckan -full_stack = true + +## Development settings +ckan.devserver.host = localhost +ckan.devserver.port = 5000 + + +## Session settings cache_dir = /tmp/%(ckan.site_id)s/ beaker.session.key = ckan # This is the secret token that the beaker library uses to hash the cookie sent -# to the client. `paster make-config` generates a unique value for this each +# to the client. `ckan generate config` generates a unique value for this each # time it generates a config file. -beaker.session.secret = +beaker.session.secret=6QnZms_5F2tZO8oW3TYYO5ZzrJCYM6TYQxg9aP17QDo -# `paster make-config` generates a unique value for this each time it generates +# `ckan generate config` generates a unique value for this each time it generates # a config file. -app_instance_uuid = +app_instance_uuid = 5e8919ec-96f2-46f9-867b-b3d0b699bb04 # repoze.who config -who.config_file = /src/ckan/ckan/config/who.ini +who.config_file = %(here)s/who.ini who.log_level = warning who.log_file = %(cache_dir)s/who_log.ini # Session timeout (user logged out after period of inactivity, in seconds). @@ -54,11 +56,16 @@ sqlalchemy.url = postgresql://ckan_default:pass@localhost/ckan_default ckan.datastore.default_fts_lang = english ckan.datastore.default_fts_index_method = gist + ## Site Settings ckan.site_url = #ckan.use_pylons_response_cleanup_middleware = true +# Default timeout for Requests +#ckan.requests.timeout = 10 + + ## Authorization Settings ckan.auth.anon_create_dataset = false @@ -66,12 +73,24 @@ ckan.auth.create_unowned_dataset = false ckan.auth.create_dataset_if_not_in_organization = false ckan.auth.user_create_groups = false ckan.auth.user_create_organizations = false -ckan.auth.user_delete_groups = false -ckan.auth.user_delete_organizations = false +ckan.auth.user_delete_groups = true +ckan.auth.user_delete_organizations = true ckan.auth.create_user_via_api = false -ckan.auth.create_user_via_web = true +ckan.auth.create_user_via_web = false ckan.auth.roles_that_cascade_to_sub_groups = admin +ckan.auth.public_user_details = true +ckan.auth.public_activity_stream_detail = true +ckan.auth.allow_dataset_collaborators = false +ckan.auth.create_default_api_keys = false + +## API Token Settings +api_token.nbytes = 60 +api_token.jwt.encode.secret=string:M_BXUHlhDEJZnlkC29My8pgjXJS6SDsy127KTkG1Y1Q +api_token.jwt.decode.secret=string:M_BXUHlhDEJZnlkC29My8pgjXJS6SDsy127KTkG1Y1Q +api_token.jwt.algorithm = HS256 +## API Token: expire_api_token plugin +expire_api_token.default_lifetime = 3600 ## Search Settings @@ -89,7 +108,7 @@ ckan.site_id = default # If cors.origin_allow_all is true, all origins are allowed. # If false, the cors.origin_whitelist is used. -ckan.cors.origin_allow_all = true +# ckan.cors.origin_allow_all = true # cors.origin_whitelist is a space separated list of allowed domains. # ckan.cors.origin_whitelist = http://example1.com http://example2.com @@ -100,12 +119,11 @@ ckan.cors.origin_allow_all = true # Add ``datapusher`` to enable DataPusher # Add ``resource_proxy`` to enable resorce proxying and get around the # same origin policy - -ckan.plugins = stats text_view image_view recline_view recline_grid_view gdoc_view resource_proxy harvest ckan_harvester s3filestore openafrica datarequests showcase datapusher datastore officedocs_view pdf_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface structured_data issues sentry envvars +ckan.plugins = envvars image_view text_view recline_view datastore datapusher openafrica s3filestore datarequests # Define which views should be created by default # (plugins must be loaded in ckan.plugins) -ckan.views.default_views = image_view text_view recline_view recline_grid_view pdf_view gdoc_view +ckan.views.default_views = image_view text_view recline_view # Customize which text formats the text_view plugin will show #ckan.preview.json_formats = json @@ -116,6 +134,7 @@ ckan.views.default_views = image_view text_view recline_view recline_grid_view p #ckan.preview.image_formats = png jpeg jpg gif ## Front-End Settings + ckan.site_title = openAFRICA ckan.site_logo = /base/images/ckan-logo.png ckan.site_description = "Africa's largest independent source for open data" @@ -128,7 +147,6 @@ ckan.display_timezone = server # package_hide_extras = for_search_index_only #package_edit_return_url = http://another.frontend/dataset/ #package_new_return_url = http://another.frontend/dataset/ -#ckan.recaptcha.version = 1 #ckan.recaptcha.publickey = #ckan.recaptcha.privatekey = #licenses_group_url = http://licenses.opendefinition.org/licenses/groups/ckan.json @@ -154,6 +172,11 @@ ckan.feeds.author_link = #ckan.max_resource_size = 10 #ckan.max_image_size = 2 +## Webassets Settings +#ckan.webassets.use_x_sendfile = false +#ckan.webassets.path = /var/lib/ckan/webassets + + ## Datapusher settings # Make sure you have set up the DataStore @@ -167,6 +190,8 @@ ckan.datapusher.assume_task_stale_after = 3600 #ckan.resource_proxy.max_file_size = 1048576 # Size of chunks to read/write. #ckan.resource_proxy.chunk_size = 4096 +# Default timeout for fetching proxied items +#ckan.resource_proxy.timeout = 10 ## Activity Streams Settings @@ -177,8 +202,8 @@ ckan.datapusher.assume_task_stale_after = 3600 ckan.hide_activity_from_users = %(ckan.site_id)s # Bootstrap -ckan.base_public_folder = public-bs2 -ckan.base_templates_folder = templates-bs2 +ckan.base_public_folder = public +ckan.base_templates_folder = templates ## Email settings @@ -189,24 +214,14 @@ ckan.base_templates_folder = templates-bs2 #smtp.user = username@example.com #smtp.password = your_password #smtp.mail_from = +#smtp.reply_to = - -## Harvester settings -ckan.harvest.mq.type = redis -ckan.harvest.log_scope = 0 -ckan.harvest.log_level = info - -# Datastore settings -ckan.datastore.write_url = postgresql://ckan_default:pass@localhost/datastore_default -ckan.datastore.read_url = postgresql://datastore_default:pass@localhost/datastore_default - -# Sentry settings -sentry.configure_logging = True -sentry.log_level = WARN +## Background Job Settings +ckan.jobs.timeout = 180 ## Logging configuration [loggers] -keys = root, ckan, ckanext +keys = root, ckan, ckanext, werkzeug [handlers] keys = console @@ -218,6 +233,12 @@ keys = generic level = WARNING handlers = console +[logger_werkzeug] +level = WARNING +handlers = console +qualname = werkzeug +propagate = 0 + [logger_ckan] level = INFO handlers = console diff --git a/contrib/ckan/Dockerfile b/contrib/ckan/Dockerfile index 3f64506..0b5b5be 100644 --- a/contrib/ckan/Dockerfile +++ b/contrib/ckan/Dockerfile @@ -1,14 +1,53 @@ -FROM python:2.7 +FROM ckan/ckan-dev:2.9 -ENV DEBIAN_FRONTEND noninteractive +# Install any extensions needed by your CKAN instance +# - Make sure to add the plugins to CKAN__PLUGINS in the .env file +# - Also make sure all provide all extra configuration options, either by: +# * Adding them to the .env file (check the ckanext-envvars syntax for env vars), or +# * Adding extra configuration scripts to /docker-entrypoint.d folder) to update +# the CKAN config file (ckan.ini) with the `ckan config-tool` command +# +# See README > Extending the base images for more details +# +# For instance: +# +### OpenAfrica ### +RUN pip3 install -e git+https://github.com/CodeForAfrica/ckanext-openafrica.git@ft/upgrade-ckan-2.9#egg=ckanext-openafrica -ARG CKAN_VERSION=2.8.11 +### XLoader ### +#RUN pip3 install -e 'git+https://github.com/ckan/ckanext-xloader.git@master#egg=ckanext-xloader' && \ +# pip3 install -r ${APP_DIR}/src/ckanext-xloader/requirements.txt && \ +# pip3 install -U requests[security] -RUN pip install -U -q pip setuptools +### Harvester ### +#RUN pip3 install -e 'git+https://github.com/ckan/ckanext-harvest.git@master#egg=ckanext-harvest' && \ +# pip3 install -r ${APP_DIR}/src/ckanext-harvest/pip-requirements.txt +# will also require gather_consumer and fetch_consumer processes running (please see https://github.com/ckan/ckanext-harvest) -RUN pip install -q -e "git+https://github.com/ckan/ckan.git@ckan-${CKAN_VERSION}#egg=ckan" +### Scheming ### +#RUN pip3 install -e 'git+https://github.com/ckan/ckanext-scheming.git@master#egg=ckanext-scheming' -ADD meta.py /src/ckan/ckan/model/meta.py +### Pages ### +#RUN pip3 install -e git+https://github.com/ckan/ckanext-pages.git#egg=ckanext-pages -RUN pip install -q -r /src/ckan/requirements.txt -RUN pip install -q vdm==0.15 sqlalchemy==1.2.19 +### DCAT ### +#RUN pip3 install -e git+https://github.com/ckan/ckanext-dcat.git@v0.0.6#egg=ckanext-dcat && \ +# pip3 install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v0.0.6/requirements.txt + +# Clone the extension(s) your are writing for your own project in the `src` folder +# to get them mounted in this image at runtime + +# Copy custom initialization scripts +COPY docker-entrypoint.d/* /docker-entrypoint.d/ + +# Apply any patches needed to CKAN core or any of the built extensions (not the +# runtime mounted ones) +# COPY patches ${APP_DIR}/patches + +# RUN for d in $APP_DIR/patches/*; do \ +# if [ -d $d ]; then \ +# for f in `ls $d/*.patch | sort -g`; do \ +# cd $SRC_DIR/`basename "$d"` && echo "$0: Applying patch $f to $SRC_DIR/`basename $d`"; patch -p1 < "$f" ; \ +# done ; \ +# fi ; \ +# done \ No newline at end of file diff --git a/contrib/ckan/docker-entrypoint.d/customize_ckan.sh b/contrib/ckan/docker-entrypoint.d/customize_ckan.sh new file mode 100644 index 0000000..d1bb4dc --- /dev/null +++ b/contrib/ckan/docker-entrypoint.d/customize_ckan.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +#Setup CKAN Datarequests (Init DB) +if [[ $CKAN__PLUGINS == *"datarequests"* ]]; then + ckan -c /srv/app/ckan.ini datarequests init-db +else + echo "Datarequests extension not available" +fi + +if [[ $CKAN__PLUGINS == *"openafrica"* ]]; then + ckan config-tool ${CKAN_INI} -s app:main -e ckan.site_title="openAFRICA" + ckan config-tool ${CKAN_INI} -s app:main -e ckan.site_description=""Africa's largest independent source for open data" +else + echo "Not appliyng OpenAfrica customizations" +fi \ No newline at end of file diff --git a/contrib/ckan/meta.py b/contrib/ckan/meta.py deleted file mode 100644 index 0b89c75..0000000 --- a/contrib/ckan/meta.py +++ /dev/null @@ -1,147 +0,0 @@ -# encoding: utf-8 - -import datetime - -from paste.deploy.converters import asbool -from ckan.common import config -"""SQLAlchemy Metadata and Session object""" -from sqlalchemy import MetaData, and_ -import sqlalchemy.orm as orm -from sqlalchemy.orm.session import SessionExtension - -import extension -import ckan.lib.activity_streams_session_extension as activity - -__all__ = ['Session', 'engine_is_sqlite', 'engine_is_pg'] - - -class CkanCacheExtension(SessionExtension): - ''' This extension checks what tables have been affected by - database access and allows us to act on them. Currently this is - used by the page cache to flush the cache when data in the database - is altered. ''' - - def __init__(self, *args, **kw): - super(CkanCacheExtension, self).__init__(*args, **kw) - - def after_commit(self, session): - if hasattr(session, '_object_cache'): - oc = session._object_cache - oc_list = oc['new'] - oc_list.update(oc['changed']) - oc_list.update(oc['deleted']) - objs = set() - for item in oc_list: - objs.add(item.__class__.__name__) - - -class CkanSessionExtension(SessionExtension): - - def before_flush(self, session, flush_context, instances): - if not hasattr(session, '_object_cache'): - session._object_cache= {'new': set(), - 'deleted': set(), - 'changed': set()} - - changed = [obj for obj in session.dirty if - session.is_modified(obj, include_collections=False, passive=True)] - - session._object_cache['new'].update(session.new) - session._object_cache['deleted'].update(session.deleted) - session._object_cache['changed'].update(changed) - - - def before_commit(self, session): - session.flush() - try: - obj_cache = session._object_cache - revision = session.revision - except AttributeError: - return - if getattr(session, 'revisioning_disabled', False): - return - new = obj_cache['new'] - changed = obj_cache['changed'] - deleted = obj_cache['deleted'] - for obj in new | changed | deleted: - if not hasattr(obj, '__revision_class__'): - continue - revision_cls = obj.__revision_class__ - revision_table = orm.class_mapper(revision_cls).mapped_table - ## when a normal active transaction happens - - ### this is an sql statement as we do not want it in object cache - session.execute( - revision_table.update().where( - and_(revision_table.c.id == obj.id, - revision_table.c.current == True) - ).values(current=False) - ) - - q = session.query(revision_cls) - q = q.filter_by(expired_timestamp=datetime.datetime(9999, 12, 31), id=obj.id) - results = q.all() - for rev_obj in results: - values = {} - if rev_obj.revision_id == revision.id: - values['revision_timestamp'] = revision.timestamp - else: - values['expired_timestamp'] = revision.timestamp - session.execute( - revision_table.update().where( - and_(revision_table.c.id == rev_obj.id, - revision_table.c.revision_id == rev_obj.revision_id) - ).values(**values) - ) - - def after_commit(self, session): - if hasattr(session, '_object_cache'): - del session._object_cache - - def after_rollback(self, session): - if hasattr(session, '_object_cache'): - del session._object_cache - -# __all__ = ['Session', 'engine', 'metadata', 'mapper'] - -# SQLAlchemy database engine. Updated by model.init_model() -engine = None - -Session = orm.scoped_session(orm.sessionmaker( - autoflush=False, - autocommit=False, - expire_on_commit=False, - extension=[CkanCacheExtension(), - CkanSessionExtension(), - extension.PluginSessionExtension(), - activity.DatasetActivitySessionExtension()], -)) - -create_local_session = orm.sessionmaker( - autoflush=False, - autocommit=False, - expire_on_commit=False, - extension=[CkanCacheExtension(), - CkanSessionExtension(), - extension.PluginSessionExtension(), - activity.DatasetActivitySessionExtension()], -) - -#mapper = Session.mapper -mapper = orm.mapper - -# Global metadata. If you have multiple databases with overlapping table -# names, you'll need a metadata for each database -metadata = MetaData() - - -def engine_is_sqlite(sa_engine=None): - # Returns true iff the engine is connected to a sqlite database. - return (sa_engine or engine).url.drivername == 'sqlite' - - -def engine_is_pg(sa_engine=None): - # Returns true iff the engine is connected to a postgresql database. - # According to http://docs.sqlalchemy.org/en/latest/core/engines.html#postgresql - # all Postgres driver names start with `postgres` - return (sa_engine or engine).url.drivername.startswith('postgres') diff --git a/contrib/solr/Dockerfile b/contrib/solr/Dockerfile index f73cfd9..3b59ac8 100644 --- a/contrib/solr/Dockerfile +++ b/contrib/solr/Dockerfile @@ -3,7 +3,7 @@ MAINTAINER Open Knowledge ## Taken from https://github.com/ckan/ckan/blob/ckan-2.7.0/contrib/docker/solr/Dockerfile # Updated the ARG values -ARG CKAN_VERSION=2.8.11 +ARG CKAN_VERSION=2.9.0 # Enviroment ENV SOLR_CORE ckan diff --git a/docker-compose.yml b/docker-compose.yml index ef05cb1..e084595 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,8 +14,7 @@ services: env_file: - .env volumes: - - ./ckan.ini:/ckan.ini - - ckan-filestore:/var/lib/ckan/default + - ./ckanext-openafrica/:/srv/app/src/ckanext-openafrica/ ckan_gather_consumer: build: . @@ -25,16 +24,16 @@ services: - redis env_file: - .env - command: paster --plugin=ckanext-harvest harvester gather_consumer --config=ckan.ini + command: ckan --config=ckan.ini --plugin=ckanext-harvest harvester gather-consumer volumes: - - ./ckan.ini:/ckan.ini + # - ./ckan.ini:/ckan.ini - ckan-filestore:/var/lib/ckan/default - datapusher: - build: contrib/ckan-datapusher - depends_on: - - web + container_name: datapusher + image: clementmouchet/datapusher + ports: + - "8800:8800" db: image: postgres:13.7 @@ -51,7 +50,7 @@ services: - ./contrib/postgres:/docker-entrypoint-initdb.d solr: - image: codeforafrica/ckan-solr:2.8.11 + image: ckan/ckan-solr:2.9 ports: - "8983:8983" volumes: diff --git a/requirements.txt b/requirements.txt index 0d049fa..e49415e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,15 @@ -gunicorn==19.7.1 -gevent==1.2.2 +gunicorn==19.7.0 +gevent==20.9.0 +cryptography +pyOpenSSL + # CKAN Extensions --e git+https://github.com/CodeForAfrica/ckanext-openafrica.git#egg=ckanext-openafrica +-e git+https://github.com/CodeForAfrica/ckanext-openafrica.git@ft/ui-changes#egg=ckanext-openafrica +# -e git+https://github.com/CodeForAfrica/ckanext-openafrica.git#egg=ckanext-openafrica + +# -e git+https://github.com/CodeForAfrica/ckanext-openafrica.git#egg=ckanext-openafrica -e git+https://github.com/okfn/ckanext-envvars.git@0.0.1#egg=ckanext-envvars -e git+https://github.com/okfn/ckanext-s3filestore.git@v0.1.1#egg=ckanext-s3filestore