3

拷贝漫画获取章节API JavaScript加密逆向分析

 3 years ago
source link: https://blog.skyju.cc/post/copymanga-chapter-reverse-engineering/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

拷贝漫画获取章节API JavaScript加密逆向分析

技术

拷贝漫画获取章节API JavaScript加密逆向分析

拷贝漫画(copymanga)网页端获取漫画章节使用JavaScript API异步请求数据,相关请求代码经过混淆、返回数据经过加密。通过逆向分析,发现数据使用AES加密。于是编写脚本在本地解密数据。

Sep 06, 2022   By  居正

阅读时长: 7 分钟

近期对拷贝漫画(copymanga)进行数据分析,发现网页端获取漫画章节使用JavaScript API异步请求数据,相关请求代码经过混淆、返回数据经过加密。因此进行逆向分析。

示例网址:https://copymanga.site/comic/nizaonanlema

f76bece839c29c61.png

首先,查看网页源代码发现漫画简介等元数据在HTML代码中,而如上图所示的章节列表则没有。抓包分析,发现漫画章节应该是这个URL返回的:

https://copymanga.site/comicdetail/nizaonanlema/chapters

a31904fd0b663fbf.png

直接访问,很明显results是数据,经过加密。

抓包发现另一处js请求:

https://hi77-overseas.mangafuna.xyz/static/websitefree/js20190704/comic_detail_pass20210918.js

返回的内容是一大堆混淆过的js代码:

90097ce192308ec0.png

该代码整体是一个eval函数,具备直接可执行性。在浏览器控制台执行效果如下:

17bb76783c9eeea3.png

执行后,界面出现章节详细列表,可确定这段代码就是需要解密的代码。

反混淆JS

将代码粘贴入jsnice在线工具:http://jsnice.org/

  • jsnice 是一个反混淆利器之一,可以将混淆后的代码进行更加有好的展示,从而提升代码的可读性;
  • jsnice 在元素关系的建立上大部分来自于 AST 语法树,同时采用了概率图模型进行 推理联想,通过样本学习推测出未混淆JS脚本的 概率图

得到的代码结构如下(关键部分已写注释):

'use strict';
/** @type {!Array} */
var _0x4a5d = ["datetime_created", "exports", "Utf8", "undefined", "Base64", "innerText", "Module", "success", "Hex", "readyState", "symbol", "table-default-title", "innerHTML", "querySelector", "hasOwnProperty", "Pkcs7", "enc", "onclick", "type", "headers", "function", '<a href="/comic/', "splice", '"><li>', "removeChild", "last_chapter", "POST", "decrypt", "mode", "results", "iterator", '" target="_blank" title="', "</li></a>", "tab-content", "tab-pane fade"/* 省略... */];
// 定义_0x4a5d变量,存储间接调用的关键词
(function(data, i) {                        
    /**                                     
     * @param {number} isLE                 
     * @return {undefined}                  
     */                                     
    var write = function(isLE) {            
        for (; --isLE;) {                   
            data["push"](data["shift"]());  
        }                                   
    };                                      
    write(++i);                             
})(_0x4a5d, 440);// 将间接调用的关键词通过某种方式二次处理
/**                                         
 * @param {string} i                        
 * @param {?} parameter1                    
 * @return {?}                              
 */                                         
var _0x2f1f = function(i, parameter1) {     
    /** @type {number} */                   
    i = i - 0;                              
    var oembedView = _0x4a5d[i];            
    return oembedView;                      
};// 允许通过16进制字符串的形式引用间接调用的关键词
// 省略...
var itemData = function(prob_list) { 
    // 使用间接调用关键词
    var value = ems[_0x2f1f("0x33")][_0x2f1f("0x2b")]["parse"](prob_list);  
    var minyMin = ems[_0x2f1f("0x33")][_0x2f1f("0x27")]["stringify"](value);
    return ems[_0x2f1f("0x11")][_0x2f1f("0x3e")](minyMin, artistTrack, {    
        "iv" : iv,                                                          
        "mode" : ems[_0x2f1f("0x3f")]["CBC"],                               
        "padding" : ems["pad"][_0x2f1f("0x32")]                             
    })[_0x2f1f("0x2")](ems["enc"][_0x2f1f("0x25")])[_0x2f1f("0x2")]();      
}(all_probs);                                                               

找到如上函数,传入的参数中包括iv、mode、padding等键名,推测是使用了AES加密处理数据。

但代码中很多关键词都使用_0x2f1f函数间接调用表示,这其实是一种针对名称的混淆。我们需要把这个混淆还原。

上面将_0x4a5d数组进行了二次处理,之后写回原变量。而原变量使用var声明,泄露在了全局命名空间里。因此可以直接在浏览器控制台获取到经过二次处理后的_0x4a5d数组:

alert(JSON.stringify(_0x4a5d))

544b94d0d8a7e4ae.png

编写Node.js代码如下:

const fs = require("fs")
// 从浏览器获取到的二次处理后的关键词数组
let _0x4a5d = ["setAttribute","計算時間","toString","groups","nav nav-tabs","constructor","className","defineProperty","/comic/","table-default","path_word","setRequestHeader","<span>更新內容:</span><a href=\"/comic/","parse","status","substring","\" target=\"_blank\" >","AES","string","querySelectorAll","__esModule","加載失敗,點擊重新加載","appendChild","data","application/x-www-form-urlencoded;charset=UTF-8","/comicdetail/","/chapter/","name","send","chapters","</a></li>","div","response","createElement","bind","datetime_created","exports","Utf8","undefined","Base64","innerText","Module","success","Hex","readyState","symbol","table-default-title","innerHTML","querySelector","hasOwnProperty","Pkcs7","enc","onclick","type","headers","function","<a href=\"/comic/","splice","\"><li>","removeChild","last_chapter","POST","decrypt","mode","results","iterator","\" target=\"_blank\" title=\"","</li></a>","tab-content","tab-pane fade","call","build","push","slice","comic_path_word","create","substr","default","page-all comic-detail-page",".wargin","length","error","prototype","apply","location","tablist","tab-pane fade show active","\" role=\"tab\">","<li class=\"nav-item\"><a class=\"nav-link disabled\"  data-toggle=\"tab\" href=\"#","table-default-box","timeEnd","open","table-default-right","onreadystatechange","GET"]

let _0x2f1f = function (i, parameter1) {
    /** @type {number} */
    i = i - 0
    let oembedView = _0x4a5d[i]
    return oembedView
}
let content = fs.readFileSync('copymanga.js', 'utf-8')
// 使用正则替换,将关键词回写到js代码中
let match = new RegExp('_0x2f1f\\("(.*?)"\\)').exec(content)
while (match != null) {
    content = content.replace(match[0], '"' + _0x2f1f(match[1]) + '"')
    match = new RegExp('_0x2f1f\\("(.*?)"\\)').exec(content)
}
fs.writeFileSync('copymanga.replaced.js',content)

得到关键词反混淆后的代码,这样就可以分析AES加密的逻辑了。

AES加密分析

将关键代码分析如下:

 var ems = $(6);// ems应该就是CryptoJS对象,JavaScript一个常用的加密解密库
 var cacheB = headB;// 由代码分析可得headB是请求章节返回的JSON中的results,这个函数传入了一个lagOffset["results"]作为参数
 var v = cacheB["substring"](0, 16);// 取前16位为iv
 var all_probs = cacheB["substring"](16, cacheB["length"]);// 剩下的内容作为加密内容,hex string编码
 var artistTrack = ems["enc"]["Utf8"]["parse"](dio);// 加密的key是一个叫做dio的变量
 var iv = ems["enc"]["Utf8"]["parse"](v);// 将utf8编码的16位string转为bytes作为iv
 var itemData = function(prob_list) {                                              
     var value = ems["enc"]["Hex"]["parse"](prob_list);                            
     var minyMin = ems["enc"]["Base64"]["stringify"](value);                       
     return ems["AES"]["decrypt"](minyMin, artistTrack, {
         "iv" : iv,                                                                
         "mode" : ems["mode"]["CBC"],// 使用CBC方式
         "padding" : ems["pad"]["Pkcs7"]// 使用Pkcs7作为padding
     })["toString"](ems["enc"]["Utf8"])["toString"]();                             
 }(all_probs);                                                                     

dio这个变量在js文件中搜索不到,推测可能是全局命名空间的变量,在浏览器控制台输入,果不其然,是一个固定key:

使用加解密测试工具CyberChef ,验证猜想是否正确:

可以成功解密。

下面是Go语言的实现:

func decryptMangaDetail(result string) (*MangaDetail, error) {
	var mangaDetail MangaDetail
	iv := result[0:16]
	contentHex := result[16:]
	contentBytes, err := hex.DecodeString(contentHex)
	if err != nil {
		return nil, err
	}
	block, err := aes.NewCipher([]byte("xxxmanga.woo.key"))
	if err != nil {
		return nil, err
	}
	stream := cipher.NewCBCDecrypter(block, []byte(iv))
	dst := make([]byte, len(contentBytes))
	stream.CryptBlocks(dst, contentBytes)
	dst, err = pkcs7pad.Unpad(dst)
	if err != nil {
		return nil, err
	}
	err = json.Unmarshal(dst, &mangaDetail)
	if err != nil {
		return nil, err
	}
	return &mangaDetail, nil
}

其中类型定义如下:

type MangaDetail struct {
	Build struct {
		PathWord string `json:"path_word"`
		Type     []struct {
			Id   int    `json:"id"`
			Name string `json:"name"`
		} `json:"type"`
	} `json:"build"`
	Groups struct {
		Default struct {
			PathWord string `json:"path_word"`
			Count    int    `json:"count"`
			Name     string `json:"name"`
			Chapters []struct {
				Type int    `json:"type"`
				Name string `json:"name"`
				Id   string `json:"id"`
			} `json:"chapters"`
			LastChapter struct {
				Index           int         `json:"index"`
				Uuid            string      `json:"uuid"`
				Count           int         `json:"count"`
				Ordered         int         `json:"ordered"`
				Size            int         `json:"size"`
				Name            string      `json:"name"`
				ComicId         string      `json:"comic_id"`
				ComicPathWord   string      `json:"comic_path_word"`
				GroupId         interface{} `json:"group_id"`
				GroupPathWord   string      `json:"group_path_word"`
				Type            int         `json:"type"`
				ImgType         int         `json:"img_type"`
				News            string      `json:"news"`
				DatetimeCreated string      `json:"datetime_created"`
				Prev            string      `json:"prev"`
				Next            interface{} `json:"next"`
			} `json:"last_chapter"`
		} `json:"default"`
	} `json:"groups"`
}

Go的AES加密实现比较偏向底层,不像js那样传几个参数进去就完了。注意AES加密是先做Padding再加密,AES解密是先解密完再Unpadding。在配置好padding的前提下,解密结果的长度和加密内容的长度是相同的。

Licensed under CC BY-NC-SA 4.0

</article


Recommend

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK