爬虫
定义爬取目标(URL)
在浏览器网络部分能让我们看见真正需要的url

#定义urlurl = f"https://www.mashangpa.com/api/problem-detail/1/data/?page={i}"
但大多数情况我们需要携带请求头
#声明请求头----带cookie
headers = {"accept": "*/*","accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6","priority": "u=1, i","referer": "https://www.mashangpa.com/problem-detail/1/","sec-ch-ua": "\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"","sec-ch-ua-mobile": "?0","sec-ch-ua-platform": "\"Windows\"","sec-fetch-dest": "empty","sec-fetch-mode": "cors","sec-fetch-site": "same-origin","user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0"
}
cookies = {"sessionid": "766fuslyt5ngd3nr6sfyluiili3hmfxn","Hm_lvt_0d2227abf9548feda3b9cb6fddee26c0": "1768642776,1768713959","HMACCOUNT": "48E38AF4922BDFCB","Hm_lpvt_0d2227abf9548feda3b9cb6fddee26c0": "1768714024"
}
这一步可以让工具帮我们
https://spidertools.cn/#/curl2Request

复制bash请求放入工具里面即可拿到请求头格式
接下来就是发送请求了
#发起get请求response = requests.get(url, headers=headers, cookies=cookies)sum_data += sum(response.json()["current_array"])
码上爬第一题exp
import requests#声明请求头----带cookie
headers = {"accept": "*/*","accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6","priority": "u=1, i","referer": "https://www.mashangpa.com/problem-detail/1/","sec-ch-ua": "\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"","sec-ch-ua-mobile": "?0","sec-ch-ua-platform": "\"Windows\"","sec-fetch-dest": "empty","sec-fetch-mode": "cors","sec-fetch-site": "same-origin","user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0"
}
cookies = {"sessionid": "766fuslyt5ngd3nr6sfyluiili3hmfxn","Hm_lvt_0d2227abf9548feda3b9cb6fddee26c0": "1768642776,1768713959","HMACCOUNT": "48E38AF4922BDFCB","Hm_lpvt_0d2227abf9548feda3b9cb6fddee26c0": "1768714024"
}#加和变量
sum_data = 0for i in range (1, 21):
#定义urlurl = f"https://www.mashangpa.com/api/problem-detail/1/data/?page={i}"
#发起get请求response = requests.get(url, headers=headers, cookies=cookies)sum_data += sum(response.json()["current_array"])
#获取数据print(sum_data)
第二题一样的
浏览器调试被禁用(无限debugger) lv3

打开开发者模式发现被禁用,观察该文件明白是无限debugger
(function anonymous(
) {
debugger
})
现在我们需要解决他

法一 一律不暂停
右键代码所在行数3,选择不暂停

然后刷新发现没有成功,依旧空白,尝试法2
法二 本地替换js文件

我们跟进调用堆栈,看见了该debugger代码
而这块代码让我们触发反爬后进入空白页面、
window.location.href = "about:blank";
!function () {if (window.outerWidth - window.innerWidth > 210 || window.outerHeight - window.innerHeight > 210) {document.getElementsByTagName("body")[0].innerHTML = '检测到非法调试, 请关闭调试终端后刷新本页面重试!<br/>Welcome for People, Not Welcome for Machine!<br/>';window.location.href = "about:blank";}
那我们直接把反爬代码删掉就行了,我们要修改本地js文件
edge浏览器的替换方式

谷歌浏览器直接右键js代码,会有替换键

然后替换自己想要的一个文件夹,我这里创建了一个新文件夹叫js文件替换,然后右键要替换的文件,选择修改

这时候就进入了本地修改,有紫色标识,现在进行注释,并保存

刷新界面,成功越过

之后就是爬取文件和数字了
md5加密逆向分析 lv4

提示是sign加密了,我们查找sign跟进发现了这个文件

function loadPage(pageNumber) {const timestamp = new Date().getTime()window.token = window.md5("tuling" + timestamp + pageNumber)const params = {page: pageNumber,sign: window.token,_ts: timestamp,};
对sign打上断点,点击第二页查看我们找对没


md5值对上了,证明这段加密我们没找错
window.token = window.md5("tuling" + timestamp + pageNumber)
第一个值是加盐值,第二个值是通过时间来的
const timestamp = new Date().getTime()
至此我们的加密逻辑出来了,我们自己组装一下
function loadPage(pageNumber) {const timestamp = new Date().getTime()const sign = window.md5("tuling" + timestamp + pageNumber)console.log(sign)
}loadPage(1)
现在有个问题,window里面没有md5,所以我们还需要去找他写的加密逻辑
window = this, function (n) {function r(n, r) {var t = (65535 & n) + (65535 & r);return (n >> 16) + (r >> 16) + (t >> 16) << 16 | 65535 & t}function t(n, t, o, u, e, c) {return r(function (n, r) {return n << e | n >>> 32 - e}(r(r(t, n), r(u, c))), o)}function o(n, r, o, u, e, c, f) {return t(r & o | ~r & u, n, r, e, c, f)}function u(n, r, o, u, e, c, f) {return t(r & u | o & ~u, n, r, e, c, f)}function e(n, r, o, u, e, c, f) {return t(r ^ o ^ u, n, r, e, c, f)}function c(n, r, o, u, e, c, f) {return t(o ^ (r | ~u), n, r, e, c, f)}function f(n, t) {var f, i, a, h, g;n[t >> 5] |= 128 << t % 32, n[14 + (t + 64 >>> 9 << 4)] = t;var l = 1732584193, d = -271733879, v = -1732584194, C = 271733878;for (f = 0; f < n.length; f += 16) d = c(d = c(d = c(d = c(d = e(d = e(d = e(d = e(d = u(d = u(d = u(d = u(d = o(d = o(d = o(d = o(a = d, v = o(h = v, C = o(g = C, l = o(i = l, d, v, C, n[f], 7, -680876936), d, v, n[f + 1], 12, -389564586), l, d, n[f + 2], 17, 606105819), C, l, n[f + 3], 22, -1044525330), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 4], 7, -176418897), d, v, n[f + 5], 12, 1200080426), l, d, n[f + 6], 17, -1473231341), C, l, n[f + 7], 22, -45705983), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 8], 7, 1770035416), d, v, n[f + 9], 12, -1958414417), l, d, n[f + 10], 17, -42063), C, l, n[f + 11], 22, -1990404162), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 12], 7, 1804603682), d, v, n[f + 13], 12, -40341101), l, d, n[f + 14], 17, -1502002290), C, l, n[f + 15], 22, 1236535329), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 1], 5, -165796510), d, v, n[f + 6], 9, -1069501632), l, d, n[f + 11], 14, 643717713), C, l, n[f], 20, -373897302), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 5], 5, -701558691), d, v, n[f + 10], 9, 38016083), l, d, n[f + 15], 14, -660478335), C, l, n[f + 4], 20, -405537848), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 9], 5, 568446438), d, v, n[f + 14], 9, -1019803690), l, d, n[f + 3], 14, -187363961), C, l, n[f + 8], 20, 1163531501), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 13], 5, -1444681467), d, v, n[f + 2], 9, -51403784), l, d, n[f + 7], 14, 1735328473), C, l, n[f + 12], 20, -1926607734), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 5], 4, -378558), d, v, n[f + 8], 11, -2022574463), l, d, n[f + 11], 16, 1839030562), C, l, n[f + 14], 23, -35309556), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 1], 4, -1530992060), d, v, n[f + 4], 11, 1272893353), l, d, n[f + 7], 16, -155497632), C, l, n[f + 10], 23, -1094730640), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 13], 4, 681279174), d, v, n[f], 11, -358537222), l, d, n[f + 3], 16, -722521979), C, l, n[f + 6], 23, 76029189), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 9], 4, -640364487), d, v, n[f + 12], 11, -421815835), l, d, n[f + 15], 16, 530742520), C, l, n[f + 2], 23, -995338651), v = c(v, C = c(C, l = c(l, d, v, C, n[f], 6, -198630844), d, v, n[f + 7], 10, 1126891415), l, d, n[f + 14], 15, -1416354905), C, l, n[f + 5], 21, -57434055), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 12], 6, 1700485571), d, v, n[f + 3], 10, -1894986606), l, d, n[f + 10], 15, -1051523), C, l, n[f + 1], 21, -2054922799), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 8], 6, 1873313359), d, v, n[f + 15], 10, -30611744), l, d, n[f + 6], 15, -1560198380), C, l, n[f + 13], 21, 1309151649), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 4], 6, -145523070), d, v, n[f + 11], 10, -1120210379), l, d, n[f + 2], 15, 718787259), C, l, n[f + 9], 21, -343485551), l = r(l, i), d = r(d, a), v = r(v, h), C = r(C, g);return [l, d, v, C]}function i(n) {var r, t = "", o = 32 * n.length;for (r = 0; r < o; r += 8) t += String.fromCharCode(n[r >> 5] >>> r % 32 & 255);return t}function a(n) {var r, t = [];for (t[(n.length >> 2) - 1] = void 0, r = 0; r < t.length; r += 1) t[r] = 0;var o = 8 * n.length;for (r = 0; r < o; r += 8) t[r >> 5] |= (255 & n.charCodeAt(r / 8)) << r % 32;return t}function h(n) {var r, t, o = "0123456789abcdef", u = "";for (t = 0; t < n.length; t += 1) r = n.charCodeAt(t), u += o.charAt(r >>> 4 & 15) + o.charAt(15 & r);return u}function g(n) {return unescape(encodeURIComponent(n))}function l(n) {return function (n) {return i(f(a(n), 8 * n.length))}(g(n))}function d(n, r) {return function (n, r) {var t, o, u = a(n), e = [], c = [];for (e[15] = c[15] = void 0, 16 < u.length && (u = f(u, 8 * n.length)), t = 0; t < 16; t += 1) e[t] = 909522486 ^ u[t], c[t] = 1549556828 ^ u[t];return o = f(e.concat(a(r)), 512 + 8 * r.length), i(f(c.concat(o), 640))}(g(n), g(r))}window.md5 = function (n, r, t) {return r ? t ? d(r, n) : function (n, r) {return h(d(n, r))}(r, n) : t ? l(n) : function (n) {return h(l(n))}(n)}
}();
我腰眼牌,牌没有问题

至此加密逆向完成
然后让python拿到我们的js解密的内容,带入访问然后爬虫就行了
#读取demo.js文件
with open("demo.js", "r", encoding="utf-8") as f:js_code = f.read()
#把一段 JavaScript 代码字符串 进行编译 / 预处理 / 加载,生成一个「JS 执行环境对象」,赋值给变量 ctx
ctx = execjs.compile(js_code)**********for i in range(1, 21):json_data =ctx.call("loadPage", i,)
#call函数,在 Python 代码中,调用「你编译好的 JS 代码 (js_code)」中定义的【JavaScript 函数】params = {"page": i,"sign": json_data["sign"],"_ts": json_data["ts"],}
贴源码
demo.js
window = this, function (n) {function r(n, r) {var t = (65535 & n) + (65535 & r);return (n >> 16) + (r >> 16) + (t >> 16) << 16 | 65535 & t}function t(n, t, o, u, e, c) {return r(function (n, r) {return n << e | n >>> 32 - e}(r(r(t, n), r(u, c))), o)}function o(n, r, o, u, e, c, f) {return t(r & o | ~r & u, n, r, e, c, f)}function u(n, r, o, u, e, c, f) {return t(r & u | o & ~u, n, r, e, c, f)}function e(n, r, o, u, e, c, f) {return t(r ^ o ^ u, n, r, e, c, f)}function c(n, r, o, u, e, c, f) {return t(o ^ (r | ~u), n, r, e, c, f)}function f(n, t) {var f, i, a, h, g;n[t >> 5] |= 128 << t % 32, n[14 + (t + 64 >>> 9 << 4)] = t;var l = 1732584193, d = -271733879, v = -1732584194, C = 271733878;for (f = 0; f < n.length; f += 16) d = c(d = c(d = c(d = c(d = e(d = e(d = e(d = e(d = u(d = u(d = u(d = u(d = o(d = o(d = o(d = o(a = d, v = o(h = v, C = o(g = C, l = o(i = l, d, v, C, n[f], 7, -680876936), d, v, n[f + 1], 12, -389564586), l, d, n[f + 2], 17, 606105819), C, l, n[f + 3], 22, -1044525330), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 4], 7, -176418897), d, v, n[f + 5], 12, 1200080426), l, d, n[f + 6], 17, -1473231341), C, l, n[f + 7], 22, -45705983), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 8], 7, 1770035416), d, v, n[f + 9], 12, -1958414417), l, d, n[f + 10], 17, -42063), C, l, n[f + 11], 22, -1990404162), v = o(v, C = o(C, l = o(l, d, v, C, n[f + 12], 7, 1804603682), d, v, n[f + 13], 12, -40341101), l, d, n[f + 14], 17, -1502002290), C, l, n[f + 15], 22, 1236535329), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 1], 5, -165796510), d, v, n[f + 6], 9, -1069501632), l, d, n[f + 11], 14, 643717713), C, l, n[f], 20, -373897302), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 5], 5, -701558691), d, v, n[f + 10], 9, 38016083), l, d, n[f + 15], 14, -660478335), C, l, n[f + 4], 20, -405537848), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 9], 5, 568446438), d, v, n[f + 14], 9, -1019803690), l, d, n[f + 3], 14, -187363961), C, l, n[f + 8], 20, 1163531501), v = u(v, C = u(C, l = u(l, d, v, C, n[f + 13], 5, -1444681467), d, v, n[f + 2], 9, -51403784), l, d, n[f + 7], 14, 1735328473), C, l, n[f + 12], 20, -1926607734), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 5], 4, -378558), d, v, n[f + 8], 11, -2022574463), l, d, n[f + 11], 16, 1839030562), C, l, n[f + 14], 23, -35309556), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 1], 4, -1530992060), d, v, n[f + 4], 11, 1272893353), l, d, n[f + 7], 16, -155497632), C, l, n[f + 10], 23, -1094730640), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 13], 4, 681279174), d, v, n[f], 11, -358537222), l, d, n[f + 3], 16, -722521979), C, l, n[f + 6], 23, 76029189), v = e(v, C = e(C, l = e(l, d, v, C, n[f + 9], 4, -640364487), d, v, n[f + 12], 11, -421815835), l, d, n[f + 15], 16, 530742520), C, l, n[f + 2], 23, -995338651), v = c(v, C = c(C, l = c(l, d, v, C, n[f], 6, -198630844), d, v, n[f + 7], 10, 1126891415), l, d, n[f + 14], 15, -1416354905), C, l, n[f + 5], 21, -57434055), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 12], 6, 1700485571), d, v, n[f + 3], 10, -1894986606), l, d, n[f + 10], 15, -1051523), C, l, n[f + 1], 21, -2054922799), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 8], 6, 1873313359), d, v, n[f + 15], 10, -30611744), l, d, n[f + 6], 15, -1560198380), C, l, n[f + 13], 21, 1309151649), v = c(v, C = c(C, l = c(l, d, v, C, n[f + 4], 6, -145523070), d, v, n[f + 11], 10, -1120210379), l, d, n[f + 2], 15, 718787259), C, l, n[f + 9], 21, -343485551), l = r(l, i), d = r(d, a), v = r(v, h), C = r(C, g);return [l, d, v, C]}function i(n) {var r, t = "", o = 32 * n.length;for (r = 0; r < o; r += 8) t += String.fromCharCode(n[r >> 5] >>> r % 32 & 255);return t}function a(n) {var r, t = [];for (t[(n.length >> 2) - 1] = void 0, r = 0; r < t.length; r += 1) t[r] = 0;var o = 8 * n.length;for (r = 0; r < o; r += 8) t[r >> 5] |= (255 & n.charCodeAt(r / 8)) << r % 32;return t}function h(n) {var r, t, o = "0123456789abcdef", u = "";for (t = 0; t < n.length; t += 1) r = n.charCodeAt(t), u += o.charAt(r >>> 4 & 15) + o.charAt(15 & r);return u}function g(n) {return unescape(encodeURIComponent(n))}function l(n) {return function (n) {return i(f(a(n), 8 * n.length))}(g(n))}function d(n, r) {return function (n, r) {var t, o, u = a(n), e = [], c = [];for (e[15] = c[15] = void 0, 16 < u.length && (u = f(u, 8 * n.length)), t = 0; t < 16; t += 1) e[t] = 909522486 ^ u[t], c[t] = 1549556828 ^ u[t];return o = f(e.concat(a(r)), 512 + 8 * r.length), i(f(c.concat(o), 640))}(g(n), g(r))}window.md5 = function (n, r, t) {return r ? t ? d(r, n) : function (n, r) {return h(d(n, r))}(r, n) : t ? l(n) : function (n) {return h(l(n))}(n)}
}();function loadPage(pageNumber) {const timestamp = new Date().getTime()const sign = window.md5("tuling" + timestamp + pageNumber)return {"sign" : sign,"ts" : timestamp}
}
lv4.py
import requests
import execjs#读取demo.js文件
with open("demo.js", "r", encoding="utf-8") as f:js_code = f.read()
#把一段 JavaScript 代码字符串 进行编译 / 预处理 / 加载,生成一个「JS 执行环境对象」,赋值给变量 ctx
ctx = execjs.compile(js_code)
headers = {"accept": "*/*","accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6","cache-control": "no-cache","pragma": "no-cache","priority": "u=1, i","referer": "https://www.mashangpa.com/problem-detail/4/","sec-ch-ua": "\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"","sec-ch-ua-mobile": "?0","sec-ch-ua-platform": "\"Windows\"","sec-fetch-dest": "empty","sec-fetch-mode": "cors","sec-fetch-site": "same-origin","user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0"
}
cookies = {"sessionid": "766fuslyt5ngd3nr6sfyluiili3hmfxn","Hm_lvt_0d2227abf9548feda3b9cb6fddee26c0": "1768642776,1768713959","HMACCOUNT": "48E38AF4922BDFCB","Hm_lpvt_0d2227abf9548feda3b9cb6fddee26c0": "1768720771"
}
url = "https://www.mashangpa.com/api/problem-detail/4/data/"sum_data = 0#循环遍历页码+加密值
for i in range(1, 21):json_data =ctx.call("loadPage", i,)params = {"page": i,"sign": json_data["sign"],"_ts": json_data["ts"],}response = requests.get(url, headers=headers, cookies=cookies, params=params)sum_data += sum(response.json()["current_array"])
print(sum_data)