import requests from bs4 import BeautifulSoup import os base_url = "https://www.hkubs.hku.hk/people/faculty?pg={}&staff_type=faculty&subject_area=all&track=professoriate®ion=hku" os.makedirs('profile_photos', exist_ok=True) for page in range(1, 11): url = base_url.format(page) print(f"Fetching page: {url}") response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') photo_divs = soup.find_all('div', class_='people-item') for idx, div in enumerate(photo_divs): img_tag = div.find('img') if img_tag and 'src' in img_tag.attrs: photo_url = img_tag['src'] img_response = requests.get(photo_url) if img_response.status_code == 200: photo_filename = f'profile_photos/photo_page{page}_{idx + 1}.jpg' with open(photo_filename, 'wb') as f: f.write(img_response.content) print(f'Downloaded: {photo_filename}') else: print(f'Failed to download photo from {photo_url}') else: print(f'Failed to retrieve the page. Status code: {response.status_code}')