本帖最后由 MrCao 于 2024-8-28 08:59 编辑
贴个Java代码
[Java] 纯文本查看 复制代码 import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DoubanTop250 {
public static void main(String[] args) {
try {
for (int i = 1; i <= 10; i++) {
int page = (i - 1) * 25;
String url = "https://movie.douban.com/top250?start=" + page + "&filter=";
// 设置请求头信息
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36");
connection.setRequestMethod("GET");
// 读取响应
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8"));
StringBuilder response = new StringBuilder();
String line;
while ((line = in.readLine()) != null) {
response.append(line);
}
in.close();
// 使用正则表达式匹配电影信息
String regex = "<div class=\"item\">.*?<span class=\"title\">(.*?)</span>"
+ ".*?<br>(.*?) .*?<span class=\"rating_num\" property=\"v:average\">"
+ "(.*?)</span>.*?<span>(.*?)人评价</span>";
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(response.toString());
// 输出匹配结果
while (matcher.find()) {
String name = matcher.group(1).trim();
String year = matcher.group(2).trim().replaceAll("[^0-9]", "");
String score = matcher.group(3).trim();
String num = matcher.group(4).trim();
System.out.println("Name: " + name);
System.out.println("Year: " + year);
System.out.println("Score: " + score);
System.out.println("Number of Reviews: " + num);
System.out.println("----------------------------");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
|