Skip to content

Splitting resource and dataset stats #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: Tests
on: [push, pull_request]
on: [push]
jobs:
test:
runs-on: ubuntu-latest
Expand Down
16 changes: 16 additions & 0 deletions ckanext/api_tracking/actions/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ckanext.api_tracking.queries.api import (
get_all_token_usage,
get_most_accessed_dataset_with_token,
get_most_accessed_resource_with_token,
get_most_accessed_token,
)

Expand All @@ -21,6 +22,21 @@ def most_accessed_dataset_with_token(context, data_dict):
return data


@toolkit.side_effect_free
def most_accessed_resource_with_token(context, data_dict):
""" Get most accessed resource with token
Params in data_dict:
limit: int, default 10

"""
toolkit.check_access('most_accessed_resource_with_token', context, data_dict)
data = get_most_accessed_resource_with_token(
limit=data_dict.get('limit', 10)
)

return data


@toolkit.side_effect_free
def most_accessed_token(context, data_dict):
""" Get most accessed token
Expand Down
4 changes: 4 additions & 0 deletions ckanext/api_tracking/auth/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ def most_accessed_dataset_with_token_csv(context, data_dict):
return {'success': False}


def most_accessed_resource_with_token_csv(context, data_dict):
return {'success': False}


def most_accessed_token_csv(context, data_dict):
return {'success': False}

Expand Down
4 changes: 4 additions & 0 deletions ckanext/api_tracking/auth/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ def most_accessed_dataset_with_token(context, data_dict={}):
return {'success': False}


def most_accessed_resource_with_token(context, data_dict={}):
return {'success': False}


def most_accessed_token(context, data_dict):
return {'success': False}

Expand Down
13 changes: 13 additions & 0 deletions ckanext/api_tracking/blueprints/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
all_token_usage_data,
most_accessed_token_data,
most_accessed_dataset_with_token_data,
most_accessed_resource_with_token_data,
)


Expand Down Expand Up @@ -52,6 +53,18 @@ def most_accessed_dataset_with_token_csv():
)


@tracking_csv_blueprint.route('/most-accessed-resource-with-token.csv', methods=["GET"])
def most_accessed_resource_with_token_csv():
""" Get most accessed (using a API token) resources """

return _csv_response(
'most_accessed_resource_with_token_csv',
most_accessed_resource_with_token_data,
{'limit': 10},
'most-accessed-resoure-with-token.csv',
)


@tracking_csv_blueprint.route('/most-accessed-token.csv', methods=["GET"])
def most_accessed_token_csv():
""" Get most accessed tokens """
Expand Down
1 change: 1 addition & 0 deletions ckanext/api_tracking/blueprints/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def api_token_usage_aggregated():
usage = get_api_token_usage_aggregated(limit=50)
extra_vars = {
'by_dataset': usage['by_dataset'],
'by_resource': usage['by_resource'],
'by_token_name': usage['by_token_name'],
'active': 'api-aggregated',
'links': usage['links'],
Expand Down
6 changes: 6 additions & 0 deletions ckanext/api_tracking/dashboard/stats_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
all_token_usage_data,
most_accessed_token_data,
most_accessed_dataset_with_token_data,
most_accessed_resource_with_token_data,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -35,17 +36,22 @@ def get_latest_api_token_usage(limit=50):
def get_api_token_usage_aggregated(limit=50):
""" Agreggate the API token usage """
log.debug('Getting aggregated api token usage')
download_by_resource_csv = toolkit.url_for('tracking_csv.most_accessed_resource_with_token_csv')
download_by_dataset_csv = toolkit.url_for('tracking_csv.most_accessed_dataset_with_token_csv')
download_by_token_csv = toolkit.url_for('tracking_csv.most_accessed_token_csv')
json_by_resource = toolkit.url_for('api.action', ver=3, logic_function='most_accessed_resource_with_token')
json_by_dataset = toolkit.url_for('api.action', ver=3, logic_function='most_accessed_dataset_with_token')
json_by_token = toolkit.url_for('api.action', ver=3, logic_function='most_accessed_token')
ret = {
'links': {
'download_by_resource_csv': download_by_resource_csv,
'download_by_dataset_csv': download_by_dataset_csv,
'download_by_token_csv': download_by_token_csv,
'json_by_resource': json_by_resource,
'json_by_dataset': json_by_dataset,
'json_by_token': json_by_token,
},
'by_resource': most_accessed_resource_with_token_data(limit=limit),
'by_dataset': most_accessed_dataset_with_token_data(limit=limit),
'by_token_name': most_accessed_token_data(limit=limit),
}
Expand Down
3 changes: 3 additions & 0 deletions ckanext/api_tracking/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def get_auth_functions(self):
"all_token_usage_csv": auth_csv.all_token_usage_csv,
"most_accessed_dataset_with_token": auth_queries.most_accessed_dataset_with_token,
"most_accessed_dataset_with_token_csv": auth_csv.most_accessed_dataset_with_token_csv,
"most_accessed_resource_with_token": auth_queries.most_accessed_resource_with_token,
"most_accessed_resource_with_token_csv": auth_csv.most_accessed_resource_with_token_csv,
"most_accessed_token": auth_queries.most_accessed_token,
"most_accessed_token_csv": auth_csv.most_accessed_token_csv,
}
Expand All @@ -60,6 +62,7 @@ def get_actions(self):
return {
"all_token_usage": action_queries.all_token_usage,
"most_accessed_dataset_with_token": action_queries.most_accessed_dataset_with_token,
"most_accessed_resource_with_token": action_queries.most_accessed_resource_with_token,
"most_accessed_token": action_queries.most_accessed_token,
}

Expand Down
20 changes: 20 additions & 0 deletions ckanext/api_tracking/queries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@
from ckanext.api_tracking.models import TrackingUsage


def get_most_accessed_resource_with_token(limit=10):
"""
Get most accessed resources with token
Returns a query result with the most accessed resources with token

"""
query = model.Session.query(
TrackingUsage.object_id,
func.count(TrackingUsage.object_id).label('total')
).filter(
TrackingUsage.object_id.isnot(None),
TrackingUsage.token_name.isnot(None),
TrackingUsage.object_type == 'resource'
).group_by(TrackingUsage.object_id).order_by(
desc('total')
).limit(limit)

return query.all()


def get_most_accessed_dataset_with_token(limit=10):
"""
Get most accessed datasets with token
Expand Down
1 change: 1 addition & 0 deletions ckanext/api_tracking/queries/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

from ckanext.api_tracking.queries.data.all import all_token_usage_data
from ckanext.api_tracking.queries.data.dataset import most_accessed_dataset_with_token_data
from ckanext.api_tracking.queries.data.resource import most_accessed_resource_with_token_data
from ckanext.api_tracking.queries.data.token import most_accessed_token_data
66 changes: 66 additions & 0 deletions ckanext/api_tracking/queries/data/resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Post-preocessed data after the DB queries and before the CSV generation
"""

import logging
from ckan import model
from ckan.plugins import toolkit

from ckanext.api_tracking.queries.api import get_most_accessed_resource_with_token


log = logging.getLogger(__name__)


def most_accessed_resource_with_token_data(limit=10):
data = get_most_accessed_resource_with_token(limit=limit)

# Create CSV including package details
rows = []
known_packages = {}
know_orgs = {}
for row in data:
object_id = row['object_id']
obj_title = None
object_url = None
package_title = None
package_url = None
org_title = None
org_url = None
org_id = None

obj = model.Resource.get(object_id)
if obj:
obj_title = obj.name if obj.name else f'Resource ID {obj.id}'
package_id = obj.package_id
if package_id in known_packages:
package = known_packages[package_id]
else:
package = model.Package.get(package_id)
package_name = package.name
object_url = toolkit.url_for('dataset_resource.read', id=package_name, resource_id=object_id, qualified=True)
package_title = package.title or package.name
package_url = toolkit.url_for('dataset.read', id=package_name, qualified=True)
if package.owner_org in know_orgs:
org = know_orgs[package.owner_org]
else:
org = model.Group.get(package.owner_org)
if org:
org_id = org.id
org_title = org.title
org_url = toolkit.url_for('organization.read', id=org.name, qualified=True)

rows.append({
'resource_id': object_id,
'resource_title': obj_title,
'resource_url': object_url,
'package_id': package_id,
'package_title': package_title,
'package_url': package_url,
'organization_title': org_title,
'organization_url': org_url,
'organization_id': org_id,
'total': row['total'],
})

return rows
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,49 @@

<section id="stats-latest-api" class="module-content tab-content active">
<h2>{{ _('API token usage by data file') }}</h2>
<p>
<a class="btn btn-primary" href="{{ links.download_by_resource_csv }}">{{ _('Download as CSV') }}</a>
<a class="btn btn-primary" href="{{ links.json_by_resource }}" target="_blank">{{ _('View API') }}</a>
</p>
<table class="table table-chunky table-bordered table-striped">
<thead>
<tr>
<th>{{ _("Resource") }}</th>
<th>{{ _("Total") }}</th>
<th>{{ _("Dataset") }}</th>
<th>{{ _("Organization") }}</th>
</tr>
</thead>
<tbody>
{% for row in by_resource %}
<tr>
<th>
{% if row.resource_title %}
<a href="{{ row.resource_url }}">{{ row.resource_title }}</a>
{% else %}
<span title="Resource ID {{ row.resource_id }} (probably deleted)"><small>resource probably deleted</small></span>
{% endif %}
</th>
<td>{{ row.total }}</td>
<td>
{% if row.package_url %}
<a href="{{ row.package_url }}">{{ row.package_title }}</a>
{% endif %}
</td>
<td>
{% if row.organization_url %}
<a href="{{ row.organization_url }}">{{ row.organization_title }}</a>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>

</section>

<section id="stats-latest-api" class="module-content tab-content active">
<h2>{{ _('API token usage by dataset') }}</h2>
<p>
<a class="btn btn-primary" href="{{ links.download_by_dataset_csv }}">{{ _('Download as CSV') }}</a>
<a class="btn btn-primary" href="{{ links.json_by_dataset }}" target="_blank">{{ _('View API') }}</a>
Expand Down
6 changes: 6 additions & 0 deletions ckanext/api_tracking/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class TrackingUsageAPIDataset(TrackingUsageF):
object_type = 'dataset'


class TrackingUsageAPIResource(TrackingUsageF):
tracking_type = 'api'
tracking_sub_type = 'show'
object_type = 'resource'


class TrackingUsageAPIResourceDownload(TrackingUsageF):
tracking_type = 'api'
tracking_sub_type = 'download'
Expand Down
Loading
Loading