Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

WIP: Add import_redirects command to allow csv redirects import #204

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions django-verdant/rca/management/commands/import_redirects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from csv import DictReader

from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.utils.six.moves.urllib.parse import urlparse

from wagtail.wagtailcore.models import Site, Page
from wagtail.wagtailredirects.models import Redirect


def get_page_from_path(path):
""" Takes a full url. Roughly reproduces wagtail.wagtailcore.views.serve.
"""
parsed_path = urlparse(path)
site = Site.objects.get(hostname=parsed_path.netloc)

path_components = [component for component in parsed_path.path.split('/')
if component]
page = site.root_page
while path_components:
child_slug = path_components[0]
path_components = path_components[1:]
page = page.get_children().get(slug=child_slug)
return page


class Command(BaseCommand):
help = "Creates Wagtail redirects from a csv with a 'from' and 'to' "
"column, where entries are URLs with domains."

def add_arguments(self, parser):
parser.add_argument('file_path', help="Path to a csv file")
parser.add_argument('--dry-run', action='store_true')
parser.add_argument('--from-header', default='from',
help="Header for old-path column if not 'from'")
parser.add_argument('--to-header', default='to',
help="Header for new-path column if not 'to'")

def handle(self, *args, **options):
file_path = options['file_path']
dry_run = options['dry_run']
from_header = options['from_header']
to_header = options['to_header']

updated_count = 0
created_count = 0
error_count = 0

with open(file_path, 'r') as f:
reader = DictReader(f)

for row in reader:
old_path = row[from_header].strip()
new_path = row[to_header].strip()

if not old_path:
continue

if not new_path:
continue

# urlparse requires at least a '//' to avoid identifying the
# domain as a path component
if '//' not in old_path:
old_path = '//' + old_path

netloc = urlparse(old_path).netloc
if not netloc:
print("Line {} - No domain provided: {}".format(reader.line_num, old_path))
continue

try:
old_site = Site.objects.get(hostname=netloc)
except Site.DoesNotExist:
print("Line {} - Site does not exist: {}".format(reader.line_num, netloc))
error_count += 1
continue

normalised_path = Redirect.normalise_path(old_path)

if len(normalised_path) > 255:
print(
"Line {} - 'From' path is too long ({} characters, maximum is 255)".format(
reader.line_num, len(normalised_path))
)
error_count += 1
continue

# We don't use .get_or_create because we want to support the
# --dry-run flag

try:
redirect = Redirect.objects.get(site=old_site, old_path=normalised_path)
updated_count += 1
except Redirect.DoesNotExist:
redirect = Redirect(site=old_site, old_path=normalised_path)
created_count += 1

try:
target_page = get_page_from_path(new_path) #optimally, get Page for redirect
redirect.redirect_page = target_page
except Page.DoesNotExist:
print("Line {} - Page does not exist: {}. Linking to URL.".format(reader.line_num, new_path))
target_url = new_path #else link to URL directly
redirect.redirect_link = target_url
if not dry_run:
redirect.save()

print("\n")
print("Created: {}".format(created_count))
print("Updated: {}".format(updated_count))
print("Errored (so no action taken): {}".format(error_count))
print("\nDone!")