爬虫与反爬虫的斗争:可知阅读平台——第二弹

数次尝试直接读取那找不见rsa私钥失败(js功力不够),又想hook相关函数,可惜还是不会写js...

最终还是放弃了直接读取变量找rsa private key和hook的想法,目光这时锁定在了jsencrypt.js上,为什么呢?既然它都是加密解密,就一定会调用这个js,为何不从这个js直接入手,使用override插入我们的修改版js呢?
说干就干!(无奈js真的不会,只能复制粘贴了)
这是tampermoneky脚本,负责获取地址和加密后的解密key。

// ==UserScript==
// @name         keledge
// @namespace    http://tampermonkey.net/
// @version      0.1
// @grant    GM_xmlhttpRequest
// @description  try to take over the world!
// @author       You
// @match        m.keledge.com/*
// @grant    GM_setClipboard
// @grant    none
// ==/UserScript==
let allText = {};
(function (open) {
    XMLHttpRequest.prototype.open = function () {
        if (arguments[1].includes("authorize")) {
            console.log("Authorize");
            this.addEventListener("load", function () {
                let responseOBJ = JSON.parse(this.responseText);
                window.dnurl = responseOBJ.Data.Url;
                //alert("curl -o '"+responseOBJ.Data.Title+"' "+responseOBJ.Data.Url);
            }, false);
        }
/*        if (arguments[1].includes("Detail")) {
            console.log("Detail");
            this.addEventListener("load", function () {
                let responseOBJ = JSON.parse(this.responseText);
                window.title=responseOBJ.Data.Title;
                //alert("curl -o '"+responseOBJ.Data.Title+"' "+localStorage["dnurl"]);
            }, false);
        }*/
        open.apply(this, arguments);
    };
})(XMLHttpRequest.prototype.open);

这是jsencrypt.js的插入片段

    RSAKey.prototype.decrypt = function (ctext) {
        var c = parseBigInt(ctext, 16);
        var m = this.doPrivate(c);
        if (m == null) {
            return null;
        }
        var s = pkcs1unpad2(m, (this.n.bitLength() + 7) >> 3);
        //alert(window.btoa(s));
        if (window.location.href.split("?")[1].split("&")[1].split("=")[1].split("-").reverse()[0] == "Epub"){
        /*    fetch(window.dnurl).then(res => res.blob().then(blob => {
                var a = document.createElement('a');
                var url = window.URL.createObjectURL(blob);
                var filename = window.location.href.split("?")[1].split("&")[1].split("=")[1]+"-"+decodeURI(window.location.href.split("?")[1].split("&")[2].split("=")[1])+"_"+window.btoa(s)+".epub";
                a.href = url;
                a.download = filename;
                a.click();
                window.URL.revokeObjectURL(url);
            }))*/
            var b = {"title":decodeURI(window.location.href.split("?")[1].split("&")[2].split("=")[1]),"key":window.btoa(s),"url":window.dnurl};
            var text = JSON.stringify(b)
            var filename = window.location.href.split("?")[1].split("&")[1].split("=")[1];
            var pom = document.createElement('a');
            pom.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
            pom.setAttribute('download', filename);

            if (document.createEvent) {
                var event = document.createEvent('MouseEvents');
                event.initEvent('click', true, true);
                pom.dispatchEvent(event);
            }
            else {
                pom.click();
            }
        }

大概执行流程就是网页会先抓取authorize,被tampermonkey捕捉并得到url,接着会下载第一页资源,并调用jsencrypt.js解密,此时被我们修改的jsencrypt直接捕获解密明文key,再调用浏览器下载就万事大吉啦!
这时大部分的epub都能下载啦,但是有部分epub下载失败,pdf资源抓取相对麻烦,就懒得写啦~

标签: none

添加新评论