网页逆向-对某混淆加密的代码解混淆

https://www.superbed.cn/

查看网页源码,发现是js加密,v7的混淆

字符串解密

现在要做的是把字符串解密

把整段代码拷贝到文件中

在还原之前,需要知道里面用了哪些混淆的手段,混淆还原是代码帮助去实现思路,而不是代码把一切都做了

通过源代码中的一些片段可以猜测,这里的_0x1c34函数就是字符串解密函数

在源代码中找到该函数,然后保存到test.js中,代码的第一行var _0x408fc0 = _0x408f();调用了_0x408f函数,这个函数实际上是一个字符串数组

这个数组不能直接拿来用,而且是乱序的,还原数组顺序的代码在代码的开头,明显这里有unshift等字符

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
var _0xodV = 'jsjiami.com.v7';
var _0x242699 = _0x1c34;
(function(_0x3a1be9, _0x3aed09, _0x3a5fad, _0xea80fc, _0x33c703, _0x113dc2, _0xbcf3b4) {
return _0x3a1be9 = _0x3a1be9 >> 0x8,
_0x113dc2 = 'hs',
_0xbcf3b4 = 'hs',
function(_0x5e21d7, _0x3b431f, _0x3ed0b5, _0x3aae96, _0x41b0b9) {
var _0x457660 = _0x1c34;
_0x3aae96 = 'tfi',
_0x113dc2 = _0x3aae96 + _0x113dc2,
_0x41b0b9 = 'up',
_0xbcf3b4 += _0x41b0b9,
_0x113dc2 = _0x3ed0b5(_0x113dc2),
_0xbcf3b4 = _0x3ed0b5(_0xbcf3b4),
_0x3ed0b5 = 0x0;
var _0x194433 = _0x5e21d7();
while (!![] && --_0xea80fc + _0x3b431f) {
try {
_0x3aae96 = parseInt(_0x457660(0x191, 'jSfG')) / 0x1 + parseInt(_0x457660(0x7dd, 'pm#g')) / 0x2 * (parseInt(_0x457660(0x7d9, 'ntPS')) / 0x3) + parseInt(_0x457660(0x196, 'JRwz')) / 0x4 + parseInt(_0x457660(0x763, 'mCY8')) / 0x5 * (-parseInt(_0x457660(0x36e, 'jSfG')) / 0x6) + parseInt(_0x457660(0x66b, 'pm#g')) / 0x7 * (parseInt(_0x457660(0x2c0, '5Pg*')) / 0x8) + parseInt(_0x457660(0x503, '6oXn')) / 0x9 + -parseInt(_0x457660(0x362, '21Kl')) / 0xa;
} catch (_0x5843bf) {
_0x3aae96 = _0x3ed0b5;
} finally {
_0x41b0b9 = _0x194433[_0x113dc2]();
if (_0x3a1be9 <= _0xea80fc)
_0x3ed0b5 ? _0x33c703 ? _0x3aae96 = _0x41b0b9 : _0x33c703 = _0x41b0b9 : _0x3ed0b5 = _0x41b0b9;
else {
if (_0x3ed0b5 == _0x33c703['replace'](/[AHTfkyeDFIYgUBNOWtp=]/g, '')) {
if (_0x3aae96 === _0x3b431f) {
_0x194433['un' + _0x113dc2](_0x41b0b9);
break;
}
_0x194433[_0xbcf3b4](_0x41b0b9);
}
}
}
}
}(_0x3a5fad, _0x3aed09, function(_0x885bf6, _0x4aaec1, _0x4af83a, _0x42c9fa, _0x58033a, _0x525257, _0x2a9b24) {
return _0x4aaec1 = '\x73\x70\x6c\x69\x74',
_0x885bf6 = arguments[0x0],
_0x885bf6 = _0x885bf6[_0x4aaec1](''),
_0x4af83a = '\x72\x65\x76\x65\x72\x73\x65',
_0x885bf6 = _0x885bf6[_0x4af83a]('\x76'),
_0x42c9fa = '\x6a\x6f\x69\x6e',
(0x19c6cd,
_0x885bf6[_0x42c9fa](''));
});
}(0xbf00, 0x6d3f1, _0x408f, 0xc1),
_0x408f) && (_0xodV = 0x26da);

最简单的处理方式就是打断点,然后把还原之后的数组copy到代码中替换_0x408f函数

后面就可以正常解密字符串了

现在要做的就是找到源代码中所有调用了_0x408f函数的地方,把函数调用的结果替换到原来的位置

可以发现代码中函数的赋值语句是var _0x4dcb36 = _0x1c34,该类语句的AST是VariableDeclaration节点,里面有

比如这里的赋值语句

1
2
3
4
5
var _0x4dcb36 = _0x1c34, _0x5931aa = {
'lrrge': _0x4dcb36(0x868, '%mrq'),
'GiGHq': _0x4dcb36(0x2f8, 'Q6ts'),
'MLWYc': 'error'
}

通过逗号分隔,是三个赋值语句,在VariableDeclaration节点下有declarations数组,该数组有三个VariableDeclarators节点,该节点的id存放的是赋值语句=左边的标识符,init存放的是赋值语句=右边的标识符

现在需要遍历VariableDeclarators节点,当节点的init是目标函数_0x1c34的时候,取出节点的id属性,然后根据id属性,去遍历标识符的绑定,找到所有引用到该id标识符的地方,取出里面的参数,手动调用解密函数,再把调用的结果填回去

丢给AI写就完了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
let bigArr = [...]
let args = []
function _0x1c34(_0x1524a4, _0x1aee01) {
...
}
// 遍历 AST
traverse(ast, {
VariableDeclarator(path) {
// 取出init属性
let init = path.node.init;
// 判断 init 是 Identifier 并且名字是 _0x1c34
if (init && init.type === 'Identifier' && init.name === '_0x1c34') {
// 取出 id 的属性值
let id = path.node.id;
console.log('匹配到的变量名:', id.name);
// 取出 id_name
let funcName = id.name;
// 获取绑定
let binding = path.scope.getBinding(funcName);
if (!binding) return;
// 遍历引用
binding.referencePaths.forEach(refPath => {
const parent = refPath.parent;
// 是函数调用:_0x4dcb36(...)
if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
console.log(`调用位置 -> ${funcName}(`);
// 清空 args,每次只保留当前调用参数
args = [];
parent.arguments.forEach((arg, i) => {
if (arg.type === 'NumericLiteral') {
args.push(arg.value);
//console.log(` 参数${i}: 数字 -> ${arg.value}`);
} else if (arg.type === 'StringLiteral') {
args.push(arg.value); // 推入 args
//console.log(` 参数${i}: 字符串 -> "${arg.value}"`);
} else {
args.push(null); // 占位防错
//console.log(` 参数${i}: 其它类型 -> ${arg.type}`);
}
});
console.log(args)
// console.log(`)`);
const result = _0x1c34(args[0], args[1]);
console.log(`解密结果 -> ${result}`)
// 替换为字符串字面量
refPath.parentPath.replaceWith(types.stringLiteral(result));
}
})
}
}
});
let code = generator(ast).code;
fs.writeFile('./demoNew.js', code, (err)=>{});

但是这里,不是所有的函数被引用的地方都是CallExpression,还有其他情况,可以打印出来

1
2
3
4
5
6
if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
//...
}
else {
console.log(refPath.parentPath + '')
}

输出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
_0x350a03 = _0x242699
_0x5f2cac = _0x379b39
_0x10780b = _0x46a457
_0x2fc4a9 = _0x4dcb36
_0x9f416d = _0x4dcb36
_0x59a36f = _0x149afc
_0x156e5c = _0x149afc
_0x42e822 = _0x149afc
_0x54f06d = _0x149afc
_0x146e87 = _0x149afc
_0x2f567 = _0x149afc
_0x396f76 = _0x149afc
_0x1bb606 = _0x332ed8
_0x354745 = _0x332ed8
_0x225487 = _0x332ed8
_0x4a387a = _0x332ed8
_0x4ccddb = _0x332ed8
_0x32e01c = _0x332ed8
_0x2d3aae = _0x395074
_0x5e7074 = _0x395074

这表示存在函数的递归赋值,也就是说,原先的解密函数是_0x1c34,把_0x1c34赋值给了_0x395074,然后又把_0x395074赋值给了_0x2d3aae,最终调用_0x2d3aae函数

解决方法是遍历所有CallExpression节点的最原始的绑定,如果最原始的绑定是_0x1c34,就去获取参数并解密,递归寻找祖先函数的代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// 寻找祖先函数
// 需要判断是不是Identifier节点
// 如果是var _0x2d3aae = _0x395074,这就不是Identifier节点,没有path.node.name属性
// 如果是 _0x2d3aae = _0x395074,就是Identifier节点
function resolveCalleeName(path) {
const seen = new Set();

let name;

// 如果是标识符,直接拿名字
if (path.isIdentifier()) {
name = path.node.name;
}
// 如果是变量声明,例如 const a = b; 从 id.name 拿名字
else if (path.isVariableDeclarator()) {
if (path.node.init && path.node.init.type === 'Identifier') {
name = path.node.init.name; // 从 init 拿初始值名,比如 b
} else {
return null;
}
} else {
return null;
}

while (name && !seen.has(name)) {
seen.add(name);
const binding = path.scope.getBinding(name);
if (!binding || !binding.path.isVariableDeclarator()) break;

const init = binding.path.node.init;
if (!init || init.type !== 'Identifier') break;

name = init.name;
}

return name;
}

最终的递归解密代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demo.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
let bigArr = [...]
let args = []
// 解密函数
function _0x1c34(_0x1524a4, _0x1aee01) {
//...
}
// 寻找祖先函数
function resolveCalleeName(path) {
const seen = new Set();

let name;

// 如果是标识符,直接拿名字
if (path.isIdentifier()) {
name = path.node.name;
}
// 如果是变量声明,例如 const a = b; 从 id.name 拿名字
else if (path.isVariableDeclarator()) {
if (path.node.init && path.node.init.type === 'Identifier') {
name = path.node.init.name; // 从 init 拿初始值名,比如 b
} else {
return null;
}
} else {
return null;
}

while (name && !seen.has(name)) {
seen.add(name);
const binding = path.scope.getBinding(name);
if (!binding || !binding.path.isVariableDeclarator()) break;

const init = binding.path.node.init;
if (!init || init.type !== 'Identifier') break;

name = init.name;
}

return name;
}

// 遍历 AST
traverse(ast, {
VariableDeclarator(path) {
const init = path.node.init;
const id = path.node.id;

// 排除无效节点
if (!init || !id || id.type !== 'Identifier') return;
const funcName = id.name;
console.log(`找到的函数名称:${funcName}`);

const binding = path.scope.getBinding(funcName);
if (!binding) return;
// 取真正的函数名(处理赋值引用链)
const realFuncName = resolveCalleeName(path);
console.log(`该函数的祖先函数名称:${realFuncName}`);
// 如果祖先函数不是解密函数,直接返回
if (realFuncName !== '_0x1c34') return;
binding.referencePaths.forEach(refPath => {
const parent = refPath.parent;
// 是函数调用:_0x4dcb36(...)
if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
console.log(`${funcName}函数调用了`);
// 清空 args,每次只保留当前调用参数
args = [];
parent.arguments.forEach((arg, i) => {
if (arg.type === 'NumericLiteral') {
args.push(arg.value);
//console.log(` 参数${i}: 数字 -> ${arg.value}`);
} else if (arg.type === 'StringLiteral') {
args.push(arg.value); // 推入 args
//console.log(` 参数${i}: 字符串 -> "${arg.value}"`);
} else {
args.push(null); // 占位防错
//console.log(` 参数${i}: 其它类型 -> ${arg.type}`);
}
});
//console.log(args)
// console.log(`)`);
const result = _0x1c34(args[0], args[1]);
console.log(`解密结果 -> ${result}`)
// 替换为字符串字面量
refPath.parentPath.replaceWith(types.stringLiteral(result));
}
})
}
})
let code = generator(ast).code;
fs.writeFile('./demoNewNew.js', code, (err)=>{});

去除字符串花指令

以解密之后的这个函数为例

1
2
3
4
5
6
7
8
9
10
11
12
13
function changeUserState(_0x168850) {
var _0x59f106 = _0x1c34,
_0x422da4 = {
'FNUmX': ".logout",
'XOXOd': "layui-hide",
'voMHG': '.login',
'OtQuj': function (_0x152ee4, _0x4f25e3) {
return _0x152ee4(_0x4f25e3);
},
'eXLpj': ".username"
};
$(_0x422da4["FNUmX"])["removeClass"](_0x422da4["XOXOd"]), $(_0x422da4['voMHG'])["addClass"](_0x422da4["XOXOd"]), _0x422da4['OtQuj']($, _0x422da4["eXLpj"])["text"](_0x168850['username'] || _0x168850["nickName"]), _0x168850["admin"] && $('.user-admin')["removeClass"]('layui-hide');
}

访问_0x422da4["FNUmX"]实际上就是".logout",类似这样替换

思路是遍历节点,找到类似_0x422da4这种标识符,我感觉应该是VariableDeclator的init要是ObjectExpression,获取名字,找到绑定,找到引用的地方,如果引用的父节点是MemberExpression,就获取value属性,然后根据value去找到对象对应的属性值,如果属性值是字符串,就获取字符串,去替换节点,如果属性值是其他(函数等),就跳过。

但是注意,如果对象的声明没有varlet,那么节点信息就变了,此时应该是AssignmentExpressionright要是ObjectExpression

也就是说,变量声明式的对象绑定

1
2
3
4
5
var _0x422da4 = {
'FNUmX': '.logout',
'XOXOd': 'layui-hide',
// ...
};

在AST中表现为

1
2
3
VariableDeclarator
id.name: _0x422da4
init.type: ObjectExpression

赋值表达式式的对象绑定

1
2
3
4
5
_0x422da4 = {
'FNUmX': '.logout',
'XOXOd': 'layui-hide',
// ...
};

在AST中表现为

1
2
3
4
ExpressionStatement
expression.type: AssignmentExpression
left.name: _0x422da4
right.type: ObjectExpression

对于逗号分隔的表达式

1
2
3
4
5
6
_0x59f106 = _0x1c34,
_0x422da4 = {
'FNUmX': ".logout",
'XOXOd': "layui-hide",
// ...
};

在AST中对应的是

1
2
3
ExpressionStatement
└── expression: SequenceExpression
└── expressions: [ AssignmentExpression, AssignmentExpression ]

核心处理函数如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
function handleObfuscatedObject(path, objName, properties) {
// properties是一个ObjectProperty的数组
// objName是对象标识符的名称
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};
for (const prop of properties) {
if (prop.key.type === 'StringLiteral' && prop.value.type === 'StringLiteral') {
propMap[prop.key.value] = prop.value.value;
}
}

binding.referencePaths.forEach(refPath => {
const parentPath = refPath.parentPath;
const parentNode = parentPath.node;
// 如果父节点是MemberExpression并且是指定对象的表达式
if (parentNode.type === 'MemberExpression' &&
parentNode.object.name === objName &&
parentNode.property.type === 'StringLiteral') {
const propKey = parentNode.property.value;
const value = propMap[propKey];

if (typeof value === 'string') {
parentPath.replaceWith(types.stringLiteral(value));
}
}
});
}

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleObfuscatedObject(path, objName, properties) {
// properties是一个ObjectProperty的数组
// objName是对象标识符的名称
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};
for (const prop of properties) {
if (prop.key.type === 'StringLiteral' && prop.value.type === 'StringLiteral') {
propMap[prop.key.value] = prop.value.value;
}
}

binding.referencePaths.forEach(refPath => {
const parentPath = refPath.parentPath;
const parentNode = parentPath.node;
// 如果父节点是MemberExpression并且是指定对象的表达式
if (parentNode.type === 'MemberExpression' &&
parentNode.object.name === objName &&
parentNode.property.type === 'StringLiteral') {
const propKey = parentNode.property.value;
const value = propMap[propKey];

if (typeof value === 'string') {
console.log(`找到指定表达式,标识符名称为${objName}`)
parentPath.replaceWith(types.stringLiteral(value));
}
}
});
}

traverse(ast, {
VariableDeclarator(path) {
if (path.node.init?.type === 'ObjectExpression') {
handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
}
},
ExpressionStatement(path) {
const expr = path.node.expression;

// 1. 是多个表达式(逗号分隔)
if (expr.type === 'SequenceExpression') {
expr.expressions.forEach(item => {
if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, item.left.name, item.right.properties);
}
});
}
// 2. 单个赋值
else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, expr.left.name, expr.right.properties);
}
}
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew2.js', code, (err)=>{});

去除函数花指令

还是以上面的函数为例

1
2
3
4
5
6
7
8
9
10
11
12
13
function changeUserState(_0x168850) {
var _0x59f106 = _0x1c34,
_0x422da4 = {
'FNUmX': ".logout",
'XOXOd': "layui-hide",
'voMHG': '.login',
'OtQuj': function (_0x152ee4, _0x4f25e3) {
return _0x152ee4(_0x4f25e3);
},
'eXLpj': ".username"
};
$(".logout")["removeClass"]("layui-hide"), $(".login")["addClass"]("layui-hide"), _0x422da4['OtQuj']($, ".username")["text"](_0x168850['username'] || _0x168850["nickName"]), _0x168850["admin"] && $('.user-admin')["removeClass"]('layui-hide');
}

_0x422da4['OtQuj']($, ".username") 其实就是$(".username")

思路就是先找出对象标识符,取出属性,然后丢到封装好的函数里面去。先找到对象标识符,然后把节点信息丢到封装的函数里,在函数中处理,函数逻辑如下,先去找到引用,然后判断引用的父节点是不是MemberExpression并且父节点的父节点是不是CallExpression,如果是,取出MemberExpression的参数,这个参数就是对象标识符的属性,然后拿着这个属性去对象标识符里去找属性值,如果属性值是函数,并且只有一个return语句,并且return语句的返回就是参数1(参数2)的形式(也就是返回的是CallExpression),那么就拿着引用那个地方的两个参数,构造一个新的CallExpression,替换原来的CallExpression

这里还需要判断一下函数返回的是a(b)还是b(a),关系到怎么构造CallExpression

核心代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
function handleObfuscatedObject(path, objName, properties) {
// 找到对象标识符的绑定
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};
// properties是ObjectProperty的数组
for (const prop of properties) {
// 如果属性是string,属性值是函数,并且函数体只有1个return
if (
prop.key.type === 'StringLiteral' &&
prop.value.type === 'FunctionExpression' &&
prop.value.body.body.length === 1)
{
const returnStmt = prop.value.body.body[0];
// 如果return的是函数形式,并且是a(b)或者b(a)的形式
if (
returnStmt.type === 'ReturnStatement' &&
returnStmt.argument?.type === 'CallExpression')
{
// callExpr存储的是return的函数的函数名也就是a,params是函数的参数数组也就是[a,b]
const callExpr = returnStmt.argument;
const params = prop.value.params;
// 如果return的函数的参数只有1个,并且参数数组长度为2
if (
callExpr.callee.type === 'Identifier' &&
callExpr.arguments.length === 1 &&
callExpr.arguments[0].type === 'Identifier'
&& params.length === 2)
{
const param1 = params[0].name;
const param2 = params[1].name;
// 判断返回的return是a(b)还是b(a)形式的
let isNormalOrder = null;
if (callExpr.callee.name === param1 && callExpr.arguments[0].name === param2) {
isNormalOrder = true; // 正常顺序 a(b)
} else if (callExpr.callee.name === param2 && callExpr.arguments[0].name === param1) {
isNormalOrder = false; // 反顺序 b(a)
} else {
continue; // 不符合格式
}
// propMapde的属性是对象标识符的属性名,也就是_0x422da4['OtQuj']($, ".username")里的OtQuj
// 属性值是是否为正常顺序调用
propMap[prop.key.value] = { isNormalOrder };
}
}
}
}

// 遍历引用位置,替换调用
binding.referencePaths.forEach(refPath => {
const parentPath = refPath.parentPath;
const parentNode = parentPath.node;

// 形如 obj['key'](...)
// 判断引用的父节点是不是MemberExpression,也就是_0x422da4['OtQuj']形式
// 判断引用的爷爷节点是不是CallExpression,也就是_0x422da4['OtQuj']($, ".username")形式
if (
parentNode.type === 'MemberExpression' &&
parentNode.object.name === objName &&
parentNode.property.type === 'StringLiteral' &&
parentPath.parentPath.node.type === 'CallExpression' &&
parentPath.parentPath.node.callee === parentNode
) {
const propKey = parentNode.property.value;
const callInfo = propMap[propKey];
if (!callInfo) return;

const callPath = parentPath.parentPath;
const args = callPath.node.arguments;

if (args.length !== 2) return; // 确保是两个参数

const newCall = callInfo.isNormalOrder
? types.callExpression(args[0], [args[1]]) // a(b)
: types.callExpression(args[1], [args[0]]); // b(a)

callPath.replaceWith(newCall);
}
});
}

去除二元运算花指令

寻找对象标识符的代码和上面字符串花指令是一样的,只是函数处理部分的区别,判断如果属性值是函数,并且返回值是BinaryExpression,取出里面的operator,注意也要判断一下调用顺序,然后找到objname的引用,判断引用的父节点是MemberExpression,爷爷节点是CallExpression,取出参数,构造一个BinaryExpression,替换CallExpression

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
function handleObfuscatedObjectFunctionCall(path, objName, properties) {
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};

for (const prop of properties) {
if (
prop.type === 'ObjectProperty' &&
prop.value.type === 'FunctionExpression' &&
prop.value.body.body.length === 1 &&
prop.value.body.body[0].type === 'ReturnStatement'
) {
const returnNode = prop.value.body.body[0].argument;

if (
returnNode?.type === 'BinaryExpression' &&
returnNode.left.type === 'Identifier' &&
returnNode.right.type === 'Identifier' &&
prop.value.params.length === 2
) {
const leftName = returnNode.left.name;
const isNormal = leftName === prop.value.params[0].name;

propMap[prop.key.value] = {
operator: returnNode.operator,
isNormal: isNormal
};
}
}
}

binding.referencePaths.forEach(refPath => {
const memberExpr = refPath.parentPath;
const callExpr = memberExpr?.parentPath;

if (
memberExpr.node.type === 'MemberExpression' &&
memberExpr.node.object.name === objName &&
memberExpr.node.property.type === 'StringLiteral' &&
callExpr.node.type === 'CallExpression'
) {
const propKey = memberExpr.node.property.value;
const funcInfo = propMap[propKey];
const args = callExpr.node.arguments;

if (!funcInfo || args.length !== 2) return;

const binaryExpr = funcInfo.isNormal
? types.binaryExpression(funcInfo.operator, args[0], args[1])
: types.binaryExpression(funcInfo.operator, args[1], args[0]);

callExpr.replaceWith(binaryExpr);
}
});
}

直接运行报错ReferenceError: Container is falsy,报错的地方在callExpr.replaceWith(binaryExpr);此时需要进行调试,可以打印一下中间值

一般的打印方法就是打印节点和节点的code

1
2
3
console.log(memberExpr+'',generator(callExpr.node).code);
console.log(memberExpr+'',generator(binaryExpr).code);
callExpr.replaceWith(binaryExpr);

输出报错部分信息如下

1
2
3
4
_0x5c8922["wVUIT"] _0x5c8922["wVUIT"](_0x5c8922["zfnda"](0x1, Math['random']()), 0x10000)
_0x5c8922["wVUIT"] _0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000
_0x5c8922["zfnda"] _0x5c8922["zfnda"](0x1, Math['random']())
_0x5c8922["zfnda"] 0x1 + Math['random']()

这是什么意思呢?

其实问题出在“从外往里”替换节点时,可能会破坏 AST 结构,导致 Babel 内部找不到“容器”来放你新的节点,从而抛出 ReferenceError: Container is falsy 这个错

举个例子,比如如下代码

1
_0x5c8922["wVUIT"](_0x5c8922["zfnda"](0x1, Math['random']()), 0x10000);

对应AST是这样的

1
2
3
4
5
6
7
CallExpression (wVUIT)
├── callee: MemberExpression (_0x5c8922["wVUIT"])
└── arguments:
├── CallExpression (zfnda)
│ ├── callee: MemberExpression (_0x5c8922["zfnda"])
│ └── arguments: ...
└── NumericLiteral 0x10000

如果先替换外层的 CallExpression,假设把整个 _0x5c8922["wVUIT"] 替换成一个 BinaryExpression,也就替换成了

1
_0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000;

此时,内部的那个 _0x5c8922["zfnda"] 已经被当作纯数值用了,不是一个 CallExpression 节点了,而下一轮想再替换 _0x5c8922["zfnda"] 的时候,Babel 的 path 就找不到它对应的“容器”了(也就是 AST 父节点的 child list),这时 path.replaceWith(...) 就会抛错。

怎么理解“内部的那个 _0x5c8922["zfnda"] 已经被当作纯数值用了”这句话?假设执行了这一句

1
callExpr.replaceWith(types.binaryExpression('*', innerExpr, outerArg));

假设 callExprwVUIT(...)Path,把它整个替换成了

1
_0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000

Babel 的角度,这个 PathreplaceWith 之后,它就“死了”,Babel 不再追踪这个Path

但问题是此时还保留着 _0x5c8922["zfnda"](...)Path,因为Path是在binding.referencePaths.forEach这里被赋值的,也就是说referencePaths 里下一个 Path 还是旧的 zfnda(...) 节点的Path,它指向的是之前 AST中的节点结构,而这个结构已经在replaceWith(...) 之后被重建了,压根就找不到。

也就是说,解决的方法就是,要从内往外处理,而不是从外往里处理。

处理方式是这样,binding.referencePaths.forEach(refPath => {不是从前往后遍历referencePaths数组吗?把数组反转过来,也就是从后往前遍历,只需要把遍历顺序反转一下或者把数组反转一下,可以通过先 .slice() 一下复制一份再 .reverse(),从而达到从后往前遍历的目的,也就是binding.referencePaths.slice().reverse().forEach

最终代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew3.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleObfuscatedObject(path, objName, properties) {
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};

for (const prop of properties) {
if (
prop.type === 'ObjectProperty' &&
prop.value.type === 'FunctionExpression' &&
prop.value.body.body.length === 1 &&
prop.value.body.body[0].type === 'ReturnStatement'
) {
const returnNode = prop.value.body.body[0].argument;

if (
returnNode?.type === 'BinaryExpression' &&
returnNode.left.type === 'Identifier' &&
returnNode.right.type === 'Identifier' &&
prop.value.params.length === 2
) {
const leftName = returnNode.left.name;
const isNormal = leftName === prop.value.params[0].name;

propMap[prop.key.value] = {
operator: returnNode.operator,
isNormal: isNormal
};
}
}
}

binding.referencePaths.slice().reverse().forEach(refPath => {
const memberExpr = refPath.parentPath;
const callExpr = memberExpr?.parentPath;

if (
memberExpr.node.type === 'MemberExpression' &&
memberExpr.node.object.name === objName &&
memberExpr.node.property.type === 'StringLiteral' &&
callExpr.node.type === 'CallExpression'
) {
const propKey = memberExpr.node.property.value;
const funcInfo = propMap[propKey];
const args = callExpr.node.arguments;

if (!funcInfo || args.length !== 2) return;

const binaryExpr = funcInfo.isNormal
? types.binaryExpression(funcInfo.operator, args[0], args[1])
: types.binaryExpression(funcInfo.operator, args[1], args[0]);
console.log(memberExpr+'',generator(callExpr.node).code);
console.log(memberExpr+'',generator(binaryExpr).code);
callExpr.replaceWith(binaryExpr);
}
});
}
traverse(ast, {
VariableDeclarator(path) {
if (path.node.init?.type === 'ObjectExpression') {
handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
}
},
ExpressionStatement(path) {
const expr = path.node.expression;

// 1. 是多个表达式(逗号分隔)
if (expr.type === 'SequenceExpression') {
expr.expressions.forEach(item => {
if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, item.left.name, item.right.properties);
}
});
}
// 2. 单个赋值
else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, expr.left.name, expr.right.properties);
}
}
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew4.js', code, (err)=>{});

不过还得考虑一下这种情况

1
2
3
4
'BKHUw': function (_a, _b) { 
var _junk = _0x42e822;
return _a === _b;
}

属性值是函数,但是函数体不是1句return,中间加了一句var _junk = _0x42e822;垃圾语句,还需要修改一下判断条件,只判断函数体中最后一条语句是否是一个合法的 return BinaryExpression,并且检查 return 中的 leftright 是否是函数参数里的变量

修改之后的代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
function handleObfuscatedObject(path, objName, properties) {
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};

for (const prop of properties) {
if (
prop.type === 'ObjectProperty' &&
prop.value.type === 'FunctionExpression' &&
prop.value.params.length === 2
) {
const bodyStatements = prop.value.body.body;
const lastStmt = bodyStatements[bodyStatements.length - 1];

if (
lastStmt.type === 'ReturnStatement' &&
lastStmt.argument?.type === 'BinaryExpression'
) {
const returnNode = lastStmt.argument;
const left = returnNode.left;
const right = returnNode.right;

if (
left.type === 'Identifier' &&
right.type === 'Identifier'
) {
const [paramA, paramB] = prop.value.params.map(p => p.name);

const isNormal = left.name === paramA && right.name === paramB ||
left.name === paramB && right.name === paramA;

if (isNormal) {
const useParamOrder = left.name === paramA;

propMap[prop.key.value] = {
operator: returnNode.operator,
isNormal: useParamOrder
};
}
}
}
}
}

binding.referencePaths.slice().reverse().forEach(refPath => {
const memberExpr = refPath.parentPath;
const callExpr = memberExpr?.parentPath;

if (
memberExpr.node.type === 'MemberExpression' &&
memberExpr.node.object.name === objName &&
memberExpr.node.property.type === 'StringLiteral' &&
callExpr.node.type === 'CallExpression'
) {
const propKey = memberExpr.node.property.value;
const funcInfo = propMap[propKey];
const args = callExpr.node.arguments;

if (!funcInfo || args.length !== 2) return;

const binaryExpr = funcInfo.isNormal
? types.binaryExpression(funcInfo.operator, args[0], args[1])
: types.binaryExpression(funcInfo.operator, args[1], args[0]);

console.log(memberExpr + '', generator(callExpr.node).code);
console.log(memberExpr + '', generator(binaryExpr).code);
callExpr.replaceWith(binaryExpr);
}
});
}

去除虚假指令

虚假指令,就是明显不会去执行的那种指令,比如

1
if ('dXFzu' !== "PIrIS") return layer["msg"]("\u91CD\u590D\u6B21\u6570\u53EA\u80FD\u57281\u523020\u4E4B\u95F4"), ![];else _0x390fdb['push']("[img]" + _0x13e9ed + "[/img]");

这里的if条件判断肯定为真,也就是说else那里不会执行到

对于这种语句,直接取真正执行的那条语句,替换掉整个语句

if语句的语法树如下

1
2
3
4
5
6
7
8
9
10
IfStatement
├── test: BinaryExpression (!==)
│ ├── left: StringLiteral ('dXFzu')
│ └── right: StringLiteral ('PIrIS')
├── consequent: BlockStatement
│ └── ReturnStatement
│ ├── ExpressionStatement: CallExpression (layer["msg"](...))
│ └── UnaryExpression (![])
└── alternate: ExpressionStatement
└── CallExpression (_0x390fdb["push"](...))

思路就是遍历所有的IfStatement节点,取出里面的test节点,如果test节点的leftright都是字符串的话,然后取出test节点中的operator进行计算,如果为真,则取出consequent节点的return语句,否则取出alternate里面的语句,把取出的语句替换掉整个IfStatement节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedIfStatement(path) {
// 获取test节点
const test = path.node.test;

if (
// 得是BinaryExpression且左右都是字符串
test.type === "BinaryExpression" &&
test.left.type === "StringLiteral" &&
test.right.type === "StringLiteral"
) {
let result;
result = eval(generator(test).code);
console.log(generator(test).code,result);

const replacement = result ? path.node.consequent : path.node.alternate;
if (!replacement) {
path.remove(); // 例如:if (...); 这种无内容的情况
} else if (replacement.type === 'BlockStatement') {
// 如果代码块中有多条语句,不能直接replaceWith,replaceWith只能替换1条
path.replaceWithMultiple(replacement.body);
} else {
path.replaceWith(replacement);
}
}
}

traverse(ast, {
IfStatement(path) {
handleObfuscatedIfStatement(path);
}
});
let code = generator(ast).code;
fs.writeFile('./aaaNew.js', code, (err)=>{});

这里加的这条判断逻辑是为了处理if或者else的代码块中有多条语句的情况

1
2
3
4
else if (replacement.type === 'BlockStatement') {
// 如果代码块中有多条语句,不能直接replaceWith,replaceWith只能替换1条
path.replaceWithMultiple(replacement.body);
}

比如这种代码

1
2
3
4
5
if('a' === 'b')
{
1+1;
2+2
}

里面的if代码块也就是consequent是一个BlockStatementBlockStatementbody是一个ExpressionStatement的数组,也就是存储了多条语句

此外,还有一种虚假指令,就是?表达式,这种表达式的AST语法是一个ConditionalExpressiontest子节点是一个BinaryExpression,如果BinaryExpression节点的leftright都是字符串的话,就进行计算,如果为真则取出consequent节点的return语句,否则取出alternate里面的语句,把取出的语句替换掉整个ConditionalExpression节点

1
if (_0x34db88['err']) layer["msg"](_0x34db88["msg"]);else "dXuwI" !== "dXuwI" ? _0x164735['rmYNL'](_0xd9f6eb) : (layer["msg"]("\u5220\u9664\u6210\u529F"), _0x50e443["remove"]());

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
function handleObfuscatedConditionalExpression(path) {
const { node } = path;

// 只处理 test 是 BinaryExpression 的三元表达式
if (
node.test.type === 'BinaryExpression' &&
node.test.left.type === 'StringLiteral' &&
node.test.right.type === 'StringLiteral'
) {
const { left, right, operator } = node.test;

// 计算真假
const result = eval(`"${left.value}" ${operator} "${right.value}"`);

// 选择保留的分支
const replacement = result ? node.consequent : node.alternate;

// 直接替换三元表达式
path.replaceWith(replacement);
}
}

去除未引用代码

就是找到没有被调用的标识符,也就是遍历标识符的绑定,然后如果引用为0,就删除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
function logout() {
var _0x46a457 = _0x1c34,
_0x450081 = {
'IvBNU': function (_0x53be2d, _0x5a0425) {
return _0x53be2d(_0x5a0425);
},
'fyixX': "span",
'NwDqh': function (_0x2da198, _0x472408) {
return _0x2da198 - _0x472408;
},
'OVGne': function (_0x1e906e, _0x2aae97) {
return _0x1e906e !== _0x2aae97;
},
'PLaKy': "pqBCQ",
'JRihc': "endpoints"
};
localStorage['removeItem']("user"), removeCookie('token'), removeCookie("endpoints"), $["get"]("/logout", function () {
var _0x10780b = _0x46a457,
_0x19e1f1 = {
'kdxpu': function (_0x3c46fc, _0x7175de) {
return _0x3c46fc(_0x7175de);
},
'SEAUA': function (_0x3ae857, _0x3bb569) {
var _0x1a7a44 = _0x1c34;
return _0x3ae857(_0x3bb569);
},
'LnNMW': "span",
'hFLNZ': function (_0x334318, _0x859ce6) {
var _0x1243d6 = _0x10780b;
return _0x334318 - _0x859ce6;
},
'kyPvQ': function (_0xfad184, _0x3a964c) {
return _0xfad184(_0x3a964c);
}
};
location["href"] = '/';
});
}

比如这段代码,思路是遍历Identifier节点,然后判断Identifier节点的父节点是否为var _0x46a457 = _0x1c34这种表达式,也就是目标Identifier节点得是变量声明语句VariableDeclarator中的Identifier,然后判断VariableDeclaratorinit是否为对象表达式ObjectExpression或者是Identifier,如果是,就删除Identifier节点的父节点,比如var _0x46a457 = _0x1c34_0x46a457是遍历到的Identifier节点,如果这个节点没有被引用,就删除整个VariableDeclarator变量声明

但是要注意,为什么这里不直接判断Identifier的引用为0,然后直接删除呢?因为有的变量声明的值携带了setInterval等函数时,哪怕变量没有被引用,也会触发setInterval内设置的函数

1
2
3
var test = setInterval(function (){
console.log("xxxxxxxxxxxxxxxxxx")
},1000)

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew7.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleTrashIntruction(path){
let binding = path.scope.getBinding(path.node.name);
if(binding && binding.referencePaths.length === 0){
if(path.parentPath.isVariableDeclarator()){
if(types.isObjectExpression(path.parentPath.node.init) || types.isIdentifier(path.parentPath.node.init)){
console.log(path.parentPath + '');
path.parentPath.remove();
}
}
}
}

traverse(ast, {
Identifier(path) {
handleTrashIntruction(path);
}
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew8.js', code, (err)=>{});

但是这样经过处理之后的代码量还是很大,于是我考虑是不是之前去除函数花指令的时候,没有把情况考虑完全。

去除函数花指令——优化

之前去除函数花指令部分还不够完善,比如对象的属性值是函数,但是参数有很多个

1
2
3
4
5
6
7
8
9
10
'RisKf': function (_0x23cfb3, _0x53e1a7, _0x57712f) {                
return _0x23cfb3(_0x53e1a7, _0x57712f);
},
'gkGGU': function (_0x50ca5b, _0x404328, _0x147c72, _0x16bae0) {
return _0x50ca5b(_0x404328, _0x147c72, _0x16bae0);
}
'aUMKF': function (_0x77f479, _0x215b9e) {
var _0x5402f5 = _0x1c34;
return _0x77f479(_0x215b9e);
}

也就是说,处理的逻辑是,只要 return 的是一个 CallExpression,并且它是调用函数参数列表中的第一个参数作为函数名(callee),其余所有参数作为 arguments,就可以认为是要还原为直接调用形式。

并且函数体代码不能仅仅局限于只有1条return,还需要扩大范围,比如上面的第三种情况,虽然 body.body.length > 1,但实际上只要 最后一句是合法的 return 调用,就可以继续处理。正确的处理流程应该如下

1
2
3
4
函数体中最后一条语句是 ReturnStatement
return的内容是 CallExpression
callee是Identifier,且是函数第一个参数
arguments是后面所有参数

最终的代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew5.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedObject(path, objName, properties) {
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propMap = {};

for (const prop of properties) {
if (prop.key.type === 'StringLiteral' && prop.value.type === 'FunctionExpression') {
const fn = prop.value;
const params = fn.params;
if (params.length < 1) continue;
const returnStmt = prop.value.body.body[prop.value.body.body.length-1];
if (returnStmt.type === 'ReturnStatement' && returnStmt.argument?.type === 'CallExpression') {
const callExpr = returnStmt.argument;
// 判断:callee 是第一个参数,参数列表是后续所有参数
if (callExpr.callee.type === 'Identifier' && callExpr.callee.name === params[0].name && callExpr.arguments.length === params.length - 1){
let isValid = true;
for (let i = 1; i < params.length; i++){
if (callExpr.arguments[i - 1].type !== 'Identifier' || callExpr.arguments[i - 1].name !== params[i].name) {
isValid = false;
break;
}
}
if (!isValid) continue;
// 满足条件,记录映射
propMap[prop.key.value] = { paramCount: params.length };
}
}
}
}

binding.referencePaths.forEach(refPath => {
const parentPath = refPath.parentPath;
const parentNode = parentPath.node;
// console.log(objName);
// console.log(generator(refPath.parentPath.parentPath.node).code);
// console.log(parentNode.type);
// console.log(parentNode.property.type);
// console.log(parentPath.parentPath.node.type);


if (
parentNode.type === 'MemberExpression' &&
parentNode.object.name === objName &&
parentNode.property.type === 'StringLiteral' &&
parentPath.parentPath.node.type === 'CallExpression' &&
parentPath.parentPath.node.callee === parentNode
) {
const propKey = parentNode.property.value;
const callInfo = propMap[propKey];
if (!callInfo) return;

const callPath = parentPath.parentPath;
const args = callPath.node.arguments;
// console.log('参数构成:',args);

if (args.length < 2) return;

const callee = args[0];
const fnArgs = args.slice(1); // 剩余作为调用参数

const newCall = types.callExpression(callee, fnArgs);
callPath.replaceWith(newCall);
}
});
}



traverse(ast, {
VariableDeclarator(path) {
if (path.node.init?.type === 'ObjectExpression') {
handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
}
},
ExpressionStatement(path) {
const expr = path.node.expression;

// 1. 是多个表达式(逗号分隔)
if (expr.type === 'SequenceExpression') {
expr.expressions.forEach(item => {
if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, item.left.name, item.right.properties);
}
});
}
// 2. 单个赋值
else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, expr.left.name, expr.right.properties);
}
}
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew6.js', code, (err)=>{});

去除函数花指令——第二次优化

类似于这种函数的属性

1
2
3
'eHAHE': function (_0x23a4d9) {                
return _0x23a4d9();
},

也就是接收一个参数,把参数直接当成returnCallExpressioncalleename返回的

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew6.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedObject(path, objName, properties) {
const binding = path.scope.getBinding(objName);
if (!binding) return;

const propSet = new Set();

for (const prop of properties) {
if (
prop.key.type === 'StringLiteral' &&
prop.value.type === 'FunctionExpression' &&
prop.value.params.length === 1
) {
const paramName = prop.value.params[0].name;
const body = prop.value.body.body;

const returnStmt = body.find(stmt =>
stmt.type === 'ReturnStatement' &&
stmt.argument?.type === 'CallExpression' &&
stmt.argument.callee.type === 'Identifier' &&
stmt.argument.callee.name === paramName &&
stmt.argument.arguments.length === 0
);

if (returnStmt) {
propSet.add(prop.key.value);
}
}
}

binding.referencePaths.forEach(refPath => {
const memberExpr = refPath.parentPath;
const callExpr = memberExpr?.parentPath;

if (
memberExpr.node.type === 'MemberExpression' &&
memberExpr.node.object.name === objName &&
memberExpr.node.property.type === 'StringLiteral' &&
callExpr.node.type === 'CallExpression'
) {
const propKey = memberExpr.node.property.value;
if (!propSet.has(propKey)) return;

const args = callExpr.node.arguments;
if (args.length !== 1) return;

const newCall = types.callExpression(args[0], []);
callExpr.replaceWith(newCall);
}
});
}

traverse(ast, {
VariableDeclarator(path) {
if (path.node.init?.type === 'ObjectExpression') {
handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
}
},
ExpressionStatement(path) {
const expr = path.node.expression;

// 1. 是多个表达式(逗号分隔)
if (expr.type === 'SequenceExpression') {
expr.expressions.forEach(item => {
if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, item.left.name, item.right.properties);
}
});
}
// 2. 单个赋值
else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
handleObfuscatedObject(path, expr.left.name, expr.right.properties);
}
}
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew7.js', code, (err)=>{});

此时还原出的代码,从2700多行减少到800行,然后剩下的一些代码可以手动删一下,比如大数组、大数组还原、字符串解密函数等代码。

去除switch混淆

这里只针对这一种情况,因为也是代码中的情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
function imageToCanvas(_0x16d162) {
var _0x2030a7 = "3|0|2|1|4"["split"]('|'),
_0x222463 = 0x0;
while (!![]) {
switch (_0x2030a7[_0x222463++]) {
case '0':
_0x4e7708['width'] = _0x16d162["width"];
continue;
case '1':
_0x4e7708["getContext"]('2d')["drawImage"](_0x16d162, 0x0, 0x0);
continue;
case '2':
_0x4e7708["height"] = _0x16d162['height'];
continue;
case '3':
var _0x4e7708 = document["createElement"]("canvas");
continue;
case '4':
return _0x4e7708;
}
break;
}
}

这里给两个代码

第一个如下,这个代码只能处理单层switch混淆,目前无法处理switch嵌套的混淆,如果有多层switch嵌套混淆,建议使用第二个,该代码的思路如下

1
2
3
4
提取 switchKeyArray = "3|0|2|1|4".split('|')
映射出 caseMap = { '0': Node, '1': Node, ... }
依次取出对应 case 节点,拼成新语句列表
替换原始 while-switch 为这些语句
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
function handleObfuscatedSwitch(path) {
const { node } = path;

// 1. 判断是否是 while(true) + switch 的混淆结构
if (node.type !== 'WhileStatement') return;

const body = node.body.body;
// 判断是否有case块
if (!body || body[0].type !== 'SwitchStatement') return;

const switchNode = body[0];
// discriminant就是switch (_0x2030a7[_0x222463++])里的_0x2030a7[_0x222463++]
const discriminant = switchNode.discriminant;

// 2. switch 的判断变量必须是数组索引访问,例如:_0x2030a7[_0x222463++]
if (discriminant.type !== 'MemberExpression' || discriminant.object.type !== 'Identifier' || discriminant.property.type !== 'UpdateExpression') return;
// 获取到_0x2030a7[_0x222463++]里的_0x2030a7,也就是数组的变量名
const arrayName = discriminant.object.name;
const switchCases = switchNode.cases;

// 3. 向上找到数组的 split 声明,例如 var _0x2030a7 = "3|0|2|1|4".split("|");
let parentBody = path.parentPath.node.body || [];
if (!Array.isArray(parentBody)) return;

let orderArray = null;
// parentBody存储的就是函数体BlockStatement里的所有节点数组
for (const stmt of parentBody) {
// 如果是变量声明类型的话,就是要找的变量声明
// var _0x2030a7 = "3|0|2|1|4".split("|");...
// VariableDeclaration里可能有多个声明,比如var a=1,b=2;
// 此时需要取出VariableDeclaration里declarations数组去遍历,每个数组都是一个VariableDeclarator
if (stmt.type === 'VariableDeclaration') {
for (const decl of stmt.declarations) {
if (decl.id.name === arrayName && decl.init && decl.init.type === 'CallExpression'
&& decl.init.callee.type === 'MemberExpression' && decl.init.callee.property.value === 'split') {
// 取出"3|0|2|1|4"
const rawString = decl.init.callee.object.value;
// 生成数组[3,0,2,1,4]
orderArray = rawString.split('|');
break;
}
}
}
}

if (!orderArray) return;

// 4. 构建 case 映射表
const caseMap = {};
// caseNode是每一个case代码块
for (const caseNode of switchCases) {
// 判断一下case块是否存在
if (caseNode.test && caseNode.consequent) {
// caseNode.test.value是case后面的字符串
// caseNode.consequent.filter(n => n.type !== 'ContinueStatement')的意思是
// 把 switch-case 语句中 case 对应的执行语句(consequent)中,去掉 continue 语句,然后放到 caseMap 对应的 key 下
// 因为ContinueStatement也就是continue这行代码用不上
caseMap[caseNode.test.value] = caseNode.consequent.filter(n => n.type !== 'ContinueStatement');
}
}
console.log(caseMap);
// 5. 按照顺序拼接语句数组
const orderedStatements = [];
for (const key of orderArray) {
if (caseMap[key]) {
orderedStatements.push(...caseMap[key]);
}
}
// 6. 替换整个 WhileStatement 节点为顺序语句
path.replaceWithMultiple(orderedStatements);
}

traverse(ast, {
WhileStatement(path) {
handleObfuscatedSwitch(path);
// 每遍历一轮ast,只处理一个switch混淆
// 因为防止switch嵌套的话,处理可能出问题
path.stop();
}
});

第二个代码思路如下:

1
2
3
4
5
找MemberExpression,也就是"3|0|2|1|4"["split"]('|')
MemberExpression的父节点就是变量声明节点var _0x2030a7 = "3|0|2|1|4"["split"]('|')
变量声明节点的兄弟节点,在这里是声明节点的后一句,就是while循环的地方,通过.getSibling(varPath.key + 1)
然后取出索引和代码,构造数组
在后面就和前面的代码一样了,只不过没有替换,而是删掉旧节点

code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
encoding: "utf-8"
});
let ast = parser.parse(jscode);
for (let i = 0; i < 20; i++){
traverse(ast, {
MemberExpression(path) {
if (types.isStringLiteral(path.node.object) && types.isStringLiteral(path.node.property, {value: 'split'})) {
let varPath = path.findParent(function (p) {
return types.isVariableDeclaration(p);
});
let whilePath = varPath.getSibling(varPath.key + 1);
let myArr = {};
whilePath.node.body.body[0].cases.map(function (p) {
myArr[p.test.value] = p.consequent[0];
});
let parentPath = whilePath.parent;
varPath.remove();
whilePath.remove();
let shufferArr = path.node.object.value.split("|");
shufferArr.map(function (v) {
parentPath.body.push(myArr[v]);
});
//每遍历一轮ast,只处理一个switch流程平坦化就停止遍历
path.stop();
}
}
});
}
let code = generator(ast).code;
fs.writeFile('./aaa3.js', code, (err)=>{});

总结

其实有的地方还不够完善,只是针对大部分混淆的情况,还有一些表达式和函数的情况没有完全考虑进去,毕竟AST混淆的话针对每个混淆都不尽相同,遇到的时候再做改动。