欢迎来到
银狐的个人博客

爬取Leetcode的每日一题(Java/Python)

抓取Leetcode的每日一题信息 思路一(发送GraphQL Query获取数据)

参考文章:https://www.cnblogs.com/ZhaoxiCheung/p/9333476.html

接口分析

主要的数据存在于graphql/接口中:

https://leetcode-cn.com/graphql/

首页热门题目接口

是否AC状态查看接口

每日一题接口

构造 GraphQL Query来获取信息

在Headers下的Request Payload中我们可以看到一个query字段,这是我们要构造的 GraphQL Query 的一个重要信息。

利用Postman来分析接口

我们并不一开始就用代码来获取题目信息,而是先利用 Postman 来看看如何获取题目信息。右键 Network 下的 graphql 文件—>Copy—>Copy as cURL(bash)

接着我们打开Postman,点击左上角File里的import,然后找到Raw text栏

将copy下来的cURL粘贴到Raw text中,点击continue,就可以在Postman中查看

在这之前遇到了一个小问题,把copy all as cURL看成了copy as cURL,导致在Postman中解析错误。

curl解析的结果如下:

从解析的结果看,和我们在Headers中看到的query字段类似,不过有一些细节需要更改。

当然,如果不想直接粘贴复制的 cURL,那么我们可以自己在 Postman 中写 Header 和 Body,需要注意的是这边的 Content-Typeapplication/graphql,Body 中的 GraphQL 构造,参照 Request Payload 中的query的字段来构造

利用Java的Jsoup和okhttp库来发送http请求和解析Json数据

package com.example.leetcode_card.utils;import com.alibaba.fastjson.JSONObject;import okhttp3.*;import org.jsoup.Connection;import org.jsoup.Jsoup;import java.io.IOException;import java.util.Map;import java.util.Objects;public class GraphqlUtil {    private static String BASE_URL = "https://leetcode-cn.com";    private static String questionUrl = "https://leetcode-cn.com/problems/two-sum/description/";    private static String GRAPHQL_URL = "https://leetcode-cn.com/graphql";    public GraphqlUtil() {    }    public static String getContent(String title) throws IOException {        Connection.Response response = Jsoup.connect(questionUrl)                .method(Connection.Method.GET)                .execute();        String csrftoken = response.cookie("aliyungf_tc");        String __cfduid = response.cookie("__cfduid");        OkHttpClient client = new OkHttpClient.Builder()                .followRedirects(false)                .followSslRedirects(false)                .build();        String query = "query{   question(titleSlug:\\"%s\\") {  questionId   translatedTitle    translatedContent    difficulty   }   }";        String postBody = String.format(query,title);        assert csrftoken != null;        Request request = new Request.Builder()                .addHeader("Content-Type","application/graphql")                .addHeader("Referer",questionUrl)                .addHeader("Cookie","__cfduid=" + __cfduid + ";" + "csrftoken=" + csrftoken)                .addHeader("x-csrftoken",csrftoken)                .url(GRAPHQL_URL)                .post(RequestBody.create(MediaType.parse("application/graphql; charset=utf-8"),postBody))                .build();        Response response1 = client.newCall(request).execute();        //由于json的原因,返回的数据中文变成了Unicode码,需要另外解码        return unicodetoString(response1.body().string());    }    //获取每日一题的题目内容(英文),用来构建完整的请求API    public static String getTitle() throws IOException {        Connection.Response response = Jsoup.connect(questionUrl)                .method(Connection.Method.GET)                .execute();        String csrftoken = response.cookie("aliyungf_tc");        String __cfduid = response.cookie("__cfdui爬取Leetcode的每日一题(Java/Python) 第1张图片-银狐博客d");        OkHttpClient client = new OkHttpClient.Builder()                .followRedirects(false)                .followSslRedirects(false)                .build();        // 获取LeetCode题目标题时的查询字符串        String postBody = "query questionOfToday { todayRecord { question { questionFrontendId questionTitleSlug __typename } lastSubmission { id __typename } date userStatus __typename }}";        assert csrftoken != null;        Request request = new Request.Builder()                .addHeader("Content-Type","application/graphql")                .addHeader("Referer",questionUrl)                .addHeader("Cookie","__cfduid=" + __cfduid + ";" + "csrftoken=" + csrftoken)                .addHeader("x-csrftoken",csrftoken)                .url(GRAPHQL_URL)                .post(RequestBody.create(MediaType.parse("application/graphql; charset=utf-8"),postBody))                .build();        Response response1 = client.newCall(request).execute();        String titleInfo = unicodetoString(response1.body().string());        //将title解析出来        JSONObject jsonObject = JSONObject.parseObject(titleInfo);        return jsonObject.getJSONObject("data")                .getJSONArray("todayRecord")                .getJSONObject(0)                .getJSONObject("question")                .getString("questionTitleSlug");    }    //解码    public static String unicodetoString(String unicode) {        if (unicode == null || "".equals(unicode)) {            return null;        }        StringBuilder sb = new StringBuilder();        int i = -1;        int pos = 0;        while ((i = unicode.indexOf("\\\\u", pos)) != -1) {            sb.append(unicode.substring(pos, i));            if (i + 5 < unicode.length()) {                pos = i + 6;                sb.append((char) Integer.parseInt(unicode.substring(i + 2, i + 6), 16));            }        }        sb.append(unicode.substring(pos));        return sb.toString();    }}

引入的maven库:

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>org.example</groupId>    <artifactId>LeetcodeSpider</artifactId>    <version>1.0-SNAPSHOT</version>    <dependencies>        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->        <dependency>            <groupId>org.jsoup</groupId>            <artifactId>jsoup</artifactId>            <version>1.14.3</version>        </dependency>        <!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->        <dependency>            <groupId>com.squareup.okhttp3</groupId>            <artifactId>okhttp</artifactId>            <version>4.9.2</version>        </dependency>        <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->        <dependency>            <groupId>org.apache.httpcomponents</groupId>            <artifactId>httpclient</artifactId>            <version>4.5.12</version>        </dependency>        <!-- https://mvnrepository.com/artifact/top.jfunc.common/converter -->        <dependency>            <groupId>top.jfunc.common</groupId>            <artifactId>converter</artifactId>            <version>1.8.0</version>        </dependency>    </dependencies></project>

思路二(利用python爬虫爬取GraphQL接口)

参考文章:https://blog.csdn.net/malloc_can/article/details/113004579

# coding=<encoding name> : # coding=utf-8from datetime import datetimeimport requestsimport jsonimport smtplibfrom email.mime.text import MIMETextbase_url = 'https://leetcode-cn.com'# 获取今日每日一题的题名(英文)response = requests.post(base_url + "/graphql", json={    "operationName": "questionOfToday",    "variables": {},    "query": "query questionOfToday { todayRecord {   question {     questionFrontendId     questionTitleSlug     __typename   }   lastSubmission {     id     __typename   }   date   userStatus   __typename }}"})leetcodeTitle = json.loads(response.text).get('data').get('todayRecord')[0].get("question").get('questionTitleSlug')# 获取今日每日一题的所有信息url = base_url + "/problems/" + leetcodeTitleresponse = requests.post(base_url + "/graphql",                         json={"operationName": "questionData", "variables": {"titleSlug": leetcodeTitle},                               "query": "query questionData($titleSlug: String!) {  question(titleSlug: $titleSlug) {    questionId    questionFrontendId    boundTopicId    title    titleSlug    content    translatedTitle    translatedContent    isPaidOnly    difficulty    likes    dislikes    isLiked    similarQuestions    contributors {      username      profileUrl      avatarUrl      __typename    }    langToValidPlayground    topicTags {      name      slug      translatedName      __typename    }    companyTagStats    codeSnippets {      lang      langSlug      code      __typename    } 爬取Leetcode的每日一题(Java/Python) 第2张图片-银狐博客   stats    hints    solution {      id      canSeeDetail      __typename    }    status    sampleTestCase    metaData    judgerAvailable    judgeType    mysqlSchemas    enableRunCode    envInfo    book {      id      bookName      pressName      source      shortDescription      fullDescription      bookImgUrl      pressImgUrl      productUrl      __typename    }    isSubscribed    isDailyQuestion    dailyRecordStatus    editorType    ugcQuestionId    style    __typename  }}"})# 转化成json格式jsonText = json.loads(response.text).get('data').get("question")# 题目题号no = jsonText.get('questionFrontendId')# 题名(中文)leetcodeTitle = jsonText.get('translatedTitle')# 题目难度级别level = jsonText.get('difficulty')# 题目内容context = jsonText.get('translatedContent')# print(leetcodeTitle)# print(context)# print(level)# print(no)# 早安语录接口(天行数据API,自行申请免费))response = requests.get("")json = json.loads(response.text)# 得到语录数据ana = json.get('newslist')[0].get('content')# 表情链接face_url = 'http://wx3.sinaimg.cn/large/007hyfXLly1g0uj7x5jpaj301o02a0sw.jpg'# 开始运行时间(可通过配置文件解耦)begin_time = datetime(2020, 12, 23)# 脚本运行时间计算info = "<span style='color:cornflowerblue'>本脚本已运行{0}天<span>".format(    (datetime.today() - begin_time).days.__str__())# 数据全部HTML化htmlText = """ <head>        <meta charset=UTF-8>        <link rel="stylesheet">        <style>            code {                color: blue;                font-size: larger;            }        </style>        </link>    </head>    <body>    <div> </B><BR></B><FONT            style="FONT-SIZE: 12pt; FILTER: shadow(color=#af2dco); WIDTH: 100%; COLOR: #730404; LINE-HEIGHT: 100%; FONT-FAMILY: 华文行楷"            size=6><span style="COLOR: cornflowerblue">早安语录:</span>""" + ana + """</FONT><img width="40px"  src=""" + face_url + """"><div>    <h3>Leetcode-每日一题</h3>    <h4>""" + no + '.' + leetcodeTitle + '.' + level + """</h4>""" + context + '本题连接:<a href=' + url + ">" + url + "</a></div>" + info
赞(0) 打赏
版权声明:本文采用知识共享 署名4.0国际许可协议 [BY-NC-SA] 进行授权
文章名称:《爬取Leetcode的每日一题(Java/Python)》
文章链接:https://www.yinhu3.com/2302.html
本站资源仅供个人学习交流,请于下载后24小时内删除,不允许用于商业用途,否则法律问题自行承担。
如果文章侵犯到你的权益,请查看本站免责声明:《免责声明》

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

愿意请我喝杯矿泉水吗

支付宝扫一扫打赏