From 762d683399a1d45896100413a1c93ab4b55e3da1 Mon Sep 17 00:00:00 2001
From: "jacobtm@torchbox.com" <jacobtoppm>
Date: Wed, 11 Sep 2019 08:26:17 +0000
Subject: [PATCH 1/4] Add import_redirects command to allow csv redirects
 import Adapted from Ambition with additions: - links directly  to URL if page
 object cannot be found - wrapped in transactions.atomic()

---
 .../management/commands/import_redirects.py   | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 django-verdant/rca/management/commands/import_redirects.py
diff --git a/django-verdant/rca/management/commands/import_redirects.py b/django-verdant/rca/management/commands/import_redirects.py
new file mode 100644
index 000000000..e209e3f05
--- /dev/null
+++ b/django-verdant/rca/management/commands/import_redirects.py
@@ -0,0 +1,118 @@
+from csv import DictReader
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from django.utils.six.moves.urllib.parse import urlparse
+
+from wagtail.wagtailcore.models import Site, Page
+from wagtail.wagtailredirects.models import Redirect
+
+
+def get_page_from_path(path):
+    """ Takes a full url. Roughly reproduces wagtail.wagtailcore.views.serve.
+    """
+    parsed_path = urlparse(path)
+    try:
+        site = Site.objects.get(hostname=parsed_path.netloc)
+    except Site.DoesNotExist:
+        import pdb; pdb.set_trace()
+
+    path_components = [component for component in parsed_path.path.split('/')
+                       if component]
+    page = site.root_page
+    while path_components:
+        child_slug = path_components[0]
+        path_components = path_components[1:]
+        page = page.get_children().get(slug=child_slug)
+    return page
+
+
+class Command(BaseCommand):
+    help = "Creates Wagtail redirects from a csv with a 'from' and 'to' "
+    "column, where entries are URLs with domains."
+
+    def add_arguments(self, parser):
+        parser.add_argument('file_path', help="Path to a csv file")
+        parser.add_argument('--dry-run', action='store_true')
+        parser.add_argument('--from-header', default='from',
+                            help="Header for old-path column if not 'from'")
+        parser.add_argument('--to-header', default='to',
+                            help="Header for new-path column if not 'to'")
+
+    def handle(self, *args, **options):
+        file_path = options['file_path']
+        dry_run = options['dry_run']
+        from_header = options['from_header']
+        to_header = options['to_header']
+
+        updated_count = 0
+        created_count = 0
+        error_count = 0
+        local_netloc = True
+
+        with open(file_path, 'r') as f:
+            reader = DictReader(f)
+
+            for row in reader:
+                old_path = row[from_header]
+                new_path = row[to_header]
+
+                if old_path and new_path:
+
+                    # urlparse requires at least a '//' to avoid identifying the
+                    # domain as a path component
+                    if '//' not in old_path:
+                        old_path = '//' + old_path
+
+                    netloc = urlparse(old_path).netloc
+                    if not netloc:
+                        print("Line {} - No domain provided: {}".format(reader.line_num, old_path))
+                        continue
+
+                    try:
+                        old_site = Site.objects.get(hostname=netloc)
+                    except Site.DoesNotExist:
+                        print("Line {} - Site does not exist: {}".format(reader.line_num, netloc))
+                        error_count += 1
+                        continue
+
+                    normalised_path = Redirect.normalise_path(old_path)
+
+                    if len(normalised_path) > 255:
+                        print(
+                            "Line {} - 'From' path is too long ({} characters, maximum is 255)".format(
+                                reader.line_num, len(normalised_path))
+                        )
+                        error_count += 1
+                        continue
+
+                    # We don't use .get_or_create because we want to support the
+                    # --dry-run flag
+                    with transaction.atomic():
+                        try:
+                            redirect = Redirect.objects.get(site=old_site,
+                                                            old_path=normalised_path)
+                            updated_count += 1
+                        except Redirect.DoesNotExist:
+                            redirect = Redirect(site=old_site,
+                                                old_path=normalised_path)
+                            created_count += 1
+
+                        try:
+                            target_page = get_page_from_path(new_path) #optimally, get Page for redirect
+                            if not dry_run:
+                                redirect.redirect_page = target_page
+                                redirect.save()
+                        except Page.DoesNotExist:
+                            print("Line {} - Page does not exist: {}. Linking to URL.".format(reader.line_num, new_path))
+                            target_url = new_path #else link to URL directly
+                            if not dry_run:
+                                redirect.redirect_link = target_url
+                                redirect.save()
+                            continue
+
+        print("\n")
+        print("Created: {}".format(created_count))
+        print("Updated: {}".format(updated_count))
+        print("Errored (so no action taken): {}".format(error_count))
+        print("\nDone!")

From 1d607de7c762b7e0828d4bc6402bbbad75a6a9fe Mon Sep 17 00:00:00 2001
From: "jacobtm@torchbox.com" <jacobtoppm>
Date: Wed, 11 Sep 2019 14:54:32 +0000
Subject: [PATCH 2/4] Removed unused variable local_netloc

---
 django-verdant/rca/management/commands/import_redirects.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/django-verdant/rca/management/commands/import_redirects.py b/django-verdant/rca/management/commands/import_redirects.py
index e209e3f05..4d6d8e4db 100644
--- a/django-verdant/rca/management/commands/import_redirects.py
+++ b/django-verdant/rca/management/commands/import_redirects.py
@@ -48,7 +48,6 @@ def handle(self, *args, **options):
         updated_count = 0
         created_count = 0
         error_count = 0
-        local_netloc = True
 
         with open(file_path, 'r') as f:
             reader = DictReader(f)

From 111d0da38bbb78a9ca4ef01ff288181a647245e5 Mon Sep 17 00:00:00 2001
From: "jacobtm@torchbox.com" <jacobtoppm>
Date: Thu, 12 Sep 2019 15:53:30 +0000
Subject: [PATCH 3/4] Remove unnecessary try/catch blocks, debugger reference,
 transaction.atomic()

---
 .../management/commands/import_redirects.py   | 113 +++++++++---------
 1 file changed, 57 insertions(+), 56 deletions(-)

diff --git a/django-verdant/rca/management/commands/import_redirects.py b/django-verdant/rca/management/commands/import_redirects.py
index 4d6d8e4db..8359d2530 100644
--- a/django-verdant/rca/management/commands/import_redirects.py
+++ b/django-verdant/rca/management/commands/import_redirects.py
@@ -12,10 +12,7 @@ def get_page_from_path(path):
     """ Takes a full url. Roughly reproduces wagtail.wagtailcore.views.serve.
     """
     parsed_path = urlparse(path)
-    try:
-        site = Site.objects.get(hostname=parsed_path.netloc)
-    except Site.DoesNotExist:
-        import pdb; pdb.set_trace()
+    site = Site.objects.get(hostname=parsed_path.netloc)
 
     path_components = [component for component in parsed_path.path.split('/')
                        if component]
@@ -53,62 +50,66 @@ def handle(self, *args, **options):
             reader = DictReader(f)
 
             for row in reader:
-                old_path = row[from_header]
-                new_path = row[to_header]
-
-                if old_path and new_path:
-
-                    # urlparse requires at least a '//' to avoid identifying the
-                    # domain as a path component
-                    if '//' not in old_path:
-                        old_path = '//' + old_path
-
-                    netloc = urlparse(old_path).netloc
-                    if not netloc:
-                        print("Line {} - No domain provided: {}".format(reader.line_num, old_path))
-                        continue
-
-                    try:
-                        old_site = Site.objects.get(hostname=netloc)
-                    except Site.DoesNotExist:
-                        print("Line {} - Site does not exist: {}".format(reader.line_num, netloc))
-                        error_count += 1
-                        continue
-
-                    normalised_path = Redirect.normalise_path(old_path)
-
-                    if len(normalised_path) > 255:
-                        print(
-                            "Line {} - 'From' path is too long ({} characters, maximum is 255)".format(
-                                reader.line_num, len(normalised_path))
+                old_path = row[from_header].strip()
+                new_path = row[to_header].strip()
+
+                if not old_path:
+                    continue
+
+                if not new_path:
+                    continue
+
+                # urlparse requires at least a '//' to avoid identifying the
+                # domain as a path component
+                if '//' not in old_path:
+                    old_path = '//' + old_path
+
+                netloc = urlparse(old_path).netloc
+                if not netloc:
+                    print("Line {} - No domain provided: {}".format(reader.line_num, old_path))
+                    continue
+
+                try:
+                    old_site = Site.objects.get(hostname=netloc)
+                except Site.DoesNotExist:
+                    print("Line {} - Site does not exist: {}".format(reader.line_num, netloc))
+                    error_count += 1
+                    continue
+
+                normalised_path = Redirect.normalise_path(old_path)
+
+                if len(normalised_path) > 255:
+                    print(
+                        "Line {} - 'From' path is too long ({} characters, maximum is 255)".format(
+                            reader.line_num, len(normalised_path))
                         )
-                        error_count += 1
-                        continue
+                    error_count += 1
+                    continue
 
                     # We don't use .get_or_create because we want to support the
                     # --dry-run flag
-                    with transaction.atomic():
-                        try:
-                            redirect = Redirect.objects.get(site=old_site,
-                                                            old_path=normalised_path)
-                            updated_count += 1
-                        except Redirect.DoesNotExist:
-                            redirect = Redirect(site=old_site,
-                                                old_path=normalised_path)
-                            created_count += 1
-
-                        try:
-                            target_page = get_page_from_path(new_path) #optimally, get Page for redirect
-                            if not dry_run:
-                                redirect.redirect_page = target_page
-                                redirect.save()
-                        except Page.DoesNotExist:
-                            print("Line {} - Page does not exist: {}. Linking to URL.".format(reader.line_num, new_path))
-                            target_url = new_path #else link to URL directly
-                            if not dry_run:
-                                redirect.redirect_link = target_url
-                                redirect.save()
-                            continue
+
+                try:
+                    redirect = Redirect.objects.get(site=old_site,
+                                                    old_path=normalised_path)
+                    updated_count += 1
+                except Redirect.DoesNotExist:
+                    redirect = Redirect(site=old_site,
+                                        old_path=normalised_path)
+                    created_count += 1
+
+                try:
+                    target_page = get_page_from_path(new_path) #optimally, get Page for redirect
+                    if not dry_run:
+                        redirect.redirect_page = target_page
+                        redirect.save()
+                except Page.DoesNotExist:
+                    print("Line {} - Page does not exist: {}. Linking to URL.".format(reader.line_num, new_path))
+                    target_url = new_path #else link to URL directly
+                    if not dry_run:
+                        redirect.redirect_link = target_url
+                        redirect.save()
+                        continue
 
         print("\n")
         print("Created: {}".format(created_count))

From 2aef907aded003f4ecc326e84528ae50c8edf5a4 Mon Sep 17 00:00:00 2001
From: "jacobtm@torchbox.com" <jacobtoppm>
Date: Thu, 12 Sep 2019 16:06:20 +0000
Subject: [PATCH 4/4] Remove unncessary nesting

---
 .../rca/management/commands/import_redirects.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/django-verdant/rca/management/commands/import_redirects.py b/django-verdant/rca/management/commands/import_redirects.py
index 8359d2530..b713352ad 100644
--- a/django-verdant/rca/management/commands/import_redirects.py
+++ b/django-verdant/rca/management/commands/import_redirects.py
@@ -90,26 +90,21 @@ def handle(self, *args, **options):
                     # --dry-run flag
 
                 try:
-                    redirect = Redirect.objects.get(site=old_site,
-                                                    old_path=normalised_path)
+                    redirect = Redirect.objects.get(site=old_site, old_path=normalised_path)
                     updated_count += 1
                 except Redirect.DoesNotExist:
-                    redirect = Redirect(site=old_site,
-                                        old_path=normalised_path)
+                    redirect = Redirect(site=old_site, old_path=normalised_path)
                     created_count += 1
 
                 try:
                     target_page = get_page_from_path(new_path) #optimally, get Page for redirect
-                    if not dry_run:
-                        redirect.redirect_page = target_page
-                        redirect.save()
+                    redirect.redirect_page = target_page
                 except Page.DoesNotExist:
                     print("Line {} - Page does not exist: {}. Linking to URL.".format(reader.line_num, new_path))
                     target_url = new_path #else link to URL directly
-                    if not dry_run:
-                        redirect.redirect_link = target_url
-                        redirect.save()
-                        continue
+                    redirect.redirect_link = target_url
+                if not dry_run:
+                    redirect.save()
 
         print("\n")
         print("Created: {}".format(created_count))