From 8b37e9d760b495153bb71e9153818e7638234467 Mon Sep 17 00:00:00 2001 From: Parker Higgins Date: Mon, 8 Jan 2024 22:32:21 -0500 Subject: [PATCH] mckinsey: less selector specificity but probably still enough (#162) McKinsey's class names have not been totally stable, but the link order seems to be. I think it's enough to grab the first one that matches this path start --- xword_dl/downloader/mckinseydownloader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xword_dl/downloader/mckinseydownloader.py b/xword_dl/downloader/mckinseydownloader.py index 770559f..88c8b7b 100644 --- a/xword_dl/downloader/mckinseydownloader.py +++ b/xword_dl/downloader/mckinseydownloader.py @@ -43,8 +43,7 @@ def find_latest(self): index_res = requests.get(index_url) index_soup = BeautifulSoup(index_res.text, "html.parser") - latest_fragment = next(a for a in index_soup.select('a.mdc-c-link-heading[href^="/featured-insights/the-mckinsey-crossword/"]') - if a.find('div'))['href'] + latest_fragment = next(a for a in index_soup.select('a[href^="/featured-insights/the-mckinsey-crossword/"]') if a.find('div'))['href'] latest_absolute = urllib.parse.urljoin('https://www.mckinsey.com', latest_fragment)