ex_12_online_retrievehtml.py

# To run this, you can install BeautifulSoup
# https://pypi.python.org/pypi/beautifulsoup4

# Or download the file
# http://www.py4e.com/code3/bs4.zip
# and unzip it in the same directory as this file

import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

url = input('Enter -  ')
count = input('Enter count: ')
count = int(count)
position = input('Enter position: ')
position = int(position)

i = 0

while i < count:
    i += 1
    html = urllib.request.urlopen(url, context=ctx).read()
    soup = BeautifulSoup(html, 'html.parser')
    # Retrieve all of the anchor tags
    tags = soup('a')
#    print('count=', count)
    p = 0
    for tag in tags:
        newurl = tag.get('href', None)
        if p < position:
            p += 1
#            print('position=', p)
#            print(newurl)
            url = newurl
        else:
            break

    print('Retrieving:', url)