From af810d6d40e8082b7d1a0763df9eb69b1f5778b9 Mon Sep 17 00:00:00 2001 From: Mohamad Ali Nasser <38099584+mohamad-ali-nasser@users.noreply.github.com> Date: Fri, 23 Aug 2019 17:31:23 +0300 Subject: [PATCH] Google image downloader Straightforward code. --- part5 - get_images.py | 46 ++++++++----------------------------------- 1 file changed, 8 insertions(+), 38 deletions(-) diff --git a/part5 - get_images.py b/part5 - get_images.py index fadb2a2..a8a0c24 100644 --- a/part5 - get_images.py +++ b/part5 - get_images.py @@ -1,40 +1,10 @@ -import os -import urllib.request as ulib -from bs4 import BeautifulSoup as Soup -import json +""" +Download 300 images or whatever limit you set staright from google +""" -url_a = 'https://www.google.com/search?ei=1m7NWePfFYaGmQG51q7IBg&hl=en&q={}' -url_b = '\&tbm=isch&ved=0ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ&start={}' -url_c = '\&yv=2&vet=10ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ.1m7NWePfFYaGmQG51q7IBg' -url_d = '\.i&ijn=1&asearch=ichunk&async=_id:rg_s,_pms:s' -url_base = ''.join((url_a, url_b, url_c, url_d)) +from google_images_download import google_images_download -headers = {'User-Agent': 'Chrome/41.0.2228.0 Safari/537.36'} - - -def get_links(search_name): - search_name = search_name.replace(' ', '+') - url = url_base.format(search_name, 0) - request = ulib.Request(url, None, headers) - json_string = ulib.urlopen(request).read() - page = json.loads(json_string) - new_soup = Soup(page[1][1], 'lxml') - images = new_soup.find_all('img') - links = [image['src'] for image in images] - return links - - -def save_images(links, search_name): - directory = search_name.replace(' ', '_') - if not os.path.isdir(directory): - os.mkdir(directory) - - for i, link in enumerate(links): - savepath = os.path.join(directory, '{:06}.png'.format(i)) - ulib.urlretrieve(link, savepath) - - -if __name__ == '__main__': - search_name = 'fidget kid spinner toys' - links = get_links(search_name) - save_images(links, search_name) +response = google_images_download.googleimagesdownload() +arguments = {"keywords": "fidget spinners", "limit": 300, + "print_urls": True} +absolute_image_paths = response.download(arguments)