豆瓣数据|南沪龙委

from bs4 import BeautifulSoup
import requests

head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0"}

with open('douban250.txt', 'w', encoding='utf-8') as f:

    for start_num in range(0,250,25):

        html = requests.get(f'https://movie.douban.com/top250?start={start_num}', headers=head).text  # 这是一个专门给爬虫学习的爬虫专用网站
        soup = BeautifulSoup(html, 'html.parser')
        all_titles = soup.findAll("span", attrs={"class": "title"})

        for title in all_titles:

            tittle_string = title.string
            if "/" not in tittle_string:

                f.write(tittle_string + '\n')

f.close()

贡献者：陈文杰

凌烟阁办公室模型

欢迎来到凌烟阁网站

无我民仆