from bs4 import BeautifulSoup
import requests
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0"}
with open('douban250.txt', 'w', encoding='utf-8') as f:
for start_num in range(0,250,25):
html = requests.get(f'https://movie.douban.com/top250?start={start_num}', headers=head).text # 这是一个专门给爬虫学习的爬虫专用网站
soup = BeautifulSoup(html, 'html.parser')
all_titles = soup.findAll("span", attrs={"class": "title"})
for title in all_titles:
tittle_string = title.string
if "/" not in tittle_string:
f.write(tittle_string + '\n')
f.close()
贡献者:陈文杰