网页逆向-对某混淆加密的代码解混淆

John Who

2025-04-21

逆向案例

https://www.superbed.cn/

查看网页源码，发现是js加密，v7的混淆

字符串解密

现在要做的是把字符串解密

把整段代码拷贝到文件中

在还原之前，需要知道里面用了哪些混淆的手段，混淆还原是代码帮助去实现思路，而不是代码把一切都做了

通过源代码中的一些片段可以猜测，这里的_0x1c34函数就是字符串解密函数

在源代码中找到该函数，然后保存到test.js中，代码的第一行var _0x408fc0 = _0x408f();调用了_0x408f函数，这个函数实际上是一个字符串数组

这个数组不能直接拿来用，而且是乱序的，还原数组顺序的代码在代码的开头，明显这里有unshift等字符

var _0xodV = 'jsjiami.com.v7';
var _0x242699 = _0x1c34;
(function(_0x3a1be9, _0x3aed09, _0x3a5fad, _0xea80fc, _0x33c703, _0x113dc2, _0xbcf3b4) {
    return _0x3a1be9 = _0x3a1be9 >> 0x8,
    _0x113dc2 = 'hs',
    _0xbcf3b4 = 'hs',
    function(_0x5e21d7, _0x3b431f, _0x3ed0b5, _0x3aae96, _0x41b0b9) {
        var _0x457660 = _0x1c34;
        _0x3aae96 = 'tfi',
        _0x113dc2 = _0x3aae96 + _0x113dc2,
        _0x41b0b9 = 'up',
        _0xbcf3b4 += _0x41b0b9,
        _0x113dc2 = _0x3ed0b5(_0x113dc2),
        _0xbcf3b4 = _0x3ed0b5(_0xbcf3b4),
        _0x3ed0b5 = 0x0;
        var _0x194433 = _0x5e21d7();
        while (!![] && --_0xea80fc + _0x3b431f) {
            try {
                _0x3aae96 = parseInt(_0x457660(0x191, 'jSfG')) / 0x1 + parseInt(_0x457660(0x7dd, 'pm#g')) / 0x2 * (parseInt(_0x457660(0x7d9, 'ntPS')) / 0x3) + parseInt(_0x457660(0x196, 'JRwz')) / 0x4 + parseInt(_0x457660(0x763, 'mCY8')) / 0x5 * (-parseInt(_0x457660(0x36e, 'jSfG')) / 0x6) + parseInt(_0x457660(0x66b, 'pm#g')) / 0x7 * (parseInt(_0x457660(0x2c0, '5Pg*')) / 0x8) + parseInt(_0x457660(0x503, '6oXn')) / 0x9 + -parseInt(_0x457660(0x362, '21Kl')) / 0xa;
            } catch (_0x5843bf) {
                _0x3aae96 = _0x3ed0b5;
            } finally {
                _0x41b0b9 = _0x194433[_0x113dc2]();
                if (_0x3a1be9 <= _0xea80fc)
                    _0x3ed0b5 ? _0x33c703 ? _0x3aae96 = _0x41b0b9 : _0x33c703 = _0x41b0b9 : _0x3ed0b5 = _0x41b0b9;
                else {
                    if (_0x3ed0b5 == _0x33c703['replace'](/[AHTfkyeDFIYgUBNOWtp=]/g, '')) {
                        if (_0x3aae96 === _0x3b431f) {
                            _0x194433['un' + _0x113dc2](_0x41b0b9);
                            break;
                        }
                        _0x194433[_0xbcf3b4](_0x41b0b9);
                    }
                }
            }
        }
    }(_0x3a5fad, _0x3aed09, function(_0x885bf6, _0x4aaec1, _0x4af83a, _0x42c9fa, _0x58033a, _0x525257, _0x2a9b24) {
        return _0x4aaec1 = '\x73\x70\x6c\x69\x74',
        _0x885bf6 = arguments[0x0],
        _0x885bf6 = _0x885bf6[_0x4aaec1](''),
        _0x4af83a = '\x72\x65\x76\x65\x72\x73\x65',
        _0x885bf6 = _0x885bf6[_0x4af83a]('\x76'),
        _0x42c9fa = '\x6a\x6f\x69\x6e',
        (0x19c6cd,
        _0x885bf6[_0x42c9fa](''));
    });
}(0xbf00, 0x6d3f1, _0x408f, 0xc1),
_0x408f) && (_0xodV = 0x26da);

最简单的处理方式就是打断点，然后把还原之后的数组copy到代码中替换_0x408f函数

后面就可以正常解密字符串了

现在要做的就是找到源代码中所有调用了_0x408f函数的地方，把函数调用的结果替换到原来的位置

可以发现代码中函数的赋值语句是var _0x4dcb36 = _0x1c34，该类语句的AST是VariableDeclaration节点，里面有

比如这里的赋值语句

var _0x4dcb36 = _0x1c34, _0x5931aa = {
    'lrrge': _0x4dcb36(0x868, '%mrq'),
    'GiGHq': _0x4dcb36(0x2f8, 'Q6ts'),
    'MLWYc': 'error'
}

通过逗号分隔，是三个赋值语句，在VariableDeclaration节点下有declarations数组，该数组有三个VariableDeclarators节点，该节点的id存放的是赋值语句=左边的标识符，init存放的是赋值语句=右边的标识符

现在需要遍历VariableDeclarators节点，当节点的init是目标函数_0x1c34的时候，取出节点的id属性，然后根据id属性，去遍历标识符的绑定，找到所有引用到该id标识符的地方，取出里面的参数，手动调用解密函数，再把调用的结果填回去

丢给AI写就完了

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
let bigArr = [...]
let args = []
function _0x1c34(_0x1524a4, _0x1aee01) {
    ...
}
// 遍历 AST
traverse(ast, {
    VariableDeclarator(path) {
        // 取出init属性
        let init = path.node.init;
        // 判断 init 是 Identifier 并且名字是 _0x1c34
        if (init && init.type === 'Identifier' && init.name === '_0x1c34') {
            // 取出 id 的属性值
            let id = path.node.id;
            console.log('匹配到的变量名:', id.name);
            // 取出 id_name
            let funcName = id.name;
            // 获取绑定
            let binding = path.scope.getBinding(funcName);
            if (!binding) return;
            // 遍历引用
            binding.referencePaths.forEach(refPath => {
                const parent = refPath.parent;
                // 是函数调用：_0x4dcb36(...)
                if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
                    console.log(`调用位置 -> ${funcName}(`);
                    // 清空 args，每次只保留当前调用参数
                    args = [];
                    parent.arguments.forEach((arg, i) => {
                        if (arg.type === 'NumericLiteral') {
                            args.push(arg.value);
                            //console.log(`  参数${i}: 数字 -> ${arg.value}`);
                        } else if (arg.type === 'StringLiteral') {
                            args.push(arg.value); // 推入 args
                            //console.log(`  参数${i}: 字符串 -> "${arg.value}"`);
                        } else {
                            args.push(null); // 占位防错
                            //console.log(`  参数${i}: 其它类型 -> ${arg.type}`);
                        }
                    });
                    console.log(args)
                    // console.log(`)`);
                    const result = _0x1c34(args[0], args[1]);
                    console.log(`解密结果 -> ${result}`)
                    // 替换为字符串字面量
                    refPath.parentPath.replaceWith(types.stringLiteral(result));
                }
            })
        }
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNew.js', code, (err)=>{});

但是这里，不是所有的函数被引用的地方都是CallExpression，还有其他情况，可以打印出来

if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
    //...
}
else {
    console.log(refPath.parentPath + '')
}

输出

_0x350a03 = _0x242699
_0x5f2cac = _0x379b39
_0x10780b = _0x46a457
_0x2fc4a9 = _0x4dcb36
_0x9f416d = _0x4dcb36
_0x59a36f = _0x149afc
_0x156e5c = _0x149afc
_0x42e822 = _0x149afc
_0x54f06d = _0x149afc
_0x146e87 = _0x149afc
_0x2f567 = _0x149afc
_0x396f76 = _0x149afc
_0x1bb606 = _0x332ed8
_0x354745 = _0x332ed8
_0x225487 = _0x332ed8
_0x4a387a = _0x332ed8
_0x4ccddb = _0x332ed8
_0x32e01c = _0x332ed8
_0x2d3aae = _0x395074
_0x5e7074 = _0x395074

这表示存在函数的递归赋值，也就是说，原先的解密函数是_0x1c34，把_0x1c34赋值给了_0x395074，然后又把_0x395074赋值给了_0x2d3aae，最终调用_0x2d3aae函数

解决方法是遍历所有CallExpression节点的最原始的绑定，如果最原始的绑定是_0x1c34，就去获取参数并解密，递归寻找祖先函数的代码如下

// 寻找祖先函数
// 需要判断是不是Identifier节点
// 如果是var _0x2d3aae = _0x395074，这就不是Identifier节点，没有path.node.name属性
// 如果是 _0x2d3aae = _0x395074，就是Identifier节点
function resolveCalleeName(path) {
    const seen = new Set();

    let name;

    // 如果是标识符，直接拿名字
    if (path.isIdentifier()) {
        name = path.node.name;
    }
    // 如果是变量声明，例如 const a = b; 从 id.name 拿名字
    else if (path.isVariableDeclarator()) {
        if (path.node.init && path.node.init.type === 'Identifier') {
            name = path.node.init.name; // 从 init 拿初始值名，比如 b
        } else {
            return null;
        }
    } else {
        return null;
    }

    while (name && !seen.has(name)) {
        seen.add(name);
        const binding = path.scope.getBinding(name);
        if (!binding || !binding.path.isVariableDeclarator()) break;

        const init = binding.path.node.init;
        if (!init || init.type !== 'Identifier') break;

        name = init.name;
    }

    return name;
}

最终的递归解密代码

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demo.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
let bigArr = [...]
let args = []
// 解密函数
function _0x1c34(_0x1524a4, _0x1aee01) {
    //...
}
// 寻找祖先函数
function resolveCalleeName(path) {
    const seen = new Set();

    let name;

    // 如果是标识符，直接拿名字
    if (path.isIdentifier()) {
        name = path.node.name;
    }
    // 如果是变量声明，例如 const a = b; 从 id.name 拿名字
    else if (path.isVariableDeclarator()) {
        if (path.node.init && path.node.init.type === 'Identifier') {
            name = path.node.init.name; // 从 init 拿初始值名，比如 b
        } else {
            return null;
        }
    } else {
        return null;
    }

    while (name && !seen.has(name)) {
        seen.add(name);
        const binding = path.scope.getBinding(name);
        if (!binding || !binding.path.isVariableDeclarator()) break;

        const init = binding.path.node.init;
        if (!init || init.type !== 'Identifier') break;

        name = init.name;
    }

    return name;
}

// 遍历 AST
traverse(ast, {
    VariableDeclarator(path) {
        const init = path.node.init;
        const id = path.node.id;

        // 排除无效节点
        if (!init || !id || id.type !== 'Identifier') return;
        const funcName = id.name;
        console.log(`找到的函数名称:${funcName}`);

        const binding = path.scope.getBinding(funcName);
        if (!binding) return;
        // 取真正的函数名（处理赋值引用链）
        const realFuncName = resolveCalleeName(path);
        console.log(`该函数的祖先函数名称:${realFuncName}`);
        // 如果祖先函数不是解密函数，直接返回
        if (realFuncName !== '_0x1c34') return;
        binding.referencePaths.forEach(refPath => {
            const parent = refPath.parent;
            // 是函数调用：_0x4dcb36(...)
            if (parent.type === 'CallExpression' && parent.callee === refPath.node) {
                console.log(`${funcName}函数调用了`);
                // 清空 args，每次只保留当前调用参数
                args = [];
                parent.arguments.forEach((arg, i) => {
                    if (arg.type === 'NumericLiteral') {
                        args.push(arg.value);
                        //console.log(`  参数${i}: 数字 -> ${arg.value}`);
                    } else if (arg.type === 'StringLiteral') {
                        args.push(arg.value); // 推入 args
                        //console.log(`  参数${i}: 字符串 -> "${arg.value}"`);
                    } else {
                        args.push(null); // 占位防错
                        //console.log(`  参数${i}: 其它类型 -> ${arg.type}`);
                    }
                });
                //console.log(args)
                // console.log(`)`);
                const result = _0x1c34(args[0], args[1]);
                console.log(`解密结果 -> ${result}`)
                // 替换为字符串字面量
                refPath.parentPath.replaceWith(types.stringLiteral(result));
            }
        })
    }
})
let code = generator(ast).code;
fs.writeFile('./demoNewNew.js', code, (err)=>{});

去除字符串花指令

以解密之后的这个函数为例

function changeUserState(_0x168850) {
  var _0x59f106 = _0x1c34,
    _0x422da4 = {
      'FNUmX': ".logout",
      'XOXOd': "layui-hide",
      'voMHG': '.login',
      'OtQuj': function (_0x152ee4, _0x4f25e3) {
        return _0x152ee4(_0x4f25e3);
      },
      'eXLpj': ".username"
    };
  $(_0x422da4["FNUmX"])["removeClass"](_0x422da4["XOXOd"]), $(_0x422da4['voMHG'])["addClass"](_0x422da4["XOXOd"]), _0x422da4['OtQuj']($, _0x422da4["eXLpj"])["text"](_0x168850['username'] || _0x168850["nickName"]), _0x168850["admin"] && $('.user-admin')["removeClass"]('layui-hide');
}

访问_0x422da4["FNUmX"]实际上就是".logout"，类似这样替换

思路是遍历节点，找到类似_0x422da4这种标识符，我感觉应该是VariableDeclator的init要是ObjectExpression，获取名字，找到绑定，找到引用的地方，如果引用的父节点是MemberExpression，就获取value属性，然后根据value去找到对象对应的属性值，如果属性值是字符串，就获取字符串，去替换节点，如果属性值是其他（函数等），就跳过。

但是注意，如果对象的声明没有var和let，那么节点信息就变了，此时应该是AssignmentExpression的right要是ObjectExpression

也就是说，变量声明式的对象绑定

var _0x422da4 = {
  'FNUmX': '.logout',
  'XOXOd': 'layui-hide',
  // ...
};

在AST中表现为

1
2
3

VariableDeclarator
  id.name: _0x422da4
  init.type: ObjectExpression

赋值表达式式的对象绑定

_0x422da4 = {
  'FNUmX': '.logout',
  'XOXOd': 'layui-hide',
  // ...
};

在AST中表现为

ExpressionStatement
  expression.type: AssignmentExpression
    left.name: _0x422da4
    right.type: ObjectExpression

对于逗号分隔的表达式

_0x59f106 = _0x1c34,
_0x422da4 = {
  'FNUmX': ".logout",
  'XOXOd': "layui-hide",
  // ...
};

在AST中对应的是

1
2
3

ExpressionStatement
└── expression: SequenceExpression
    └── expressions: [ AssignmentExpression, AssignmentExpression ]

核心处理函数如下

function handleObfuscatedObject(path, objName, properties) {
    // properties是一个ObjectProperty的数组
    // objName是对象标识符的名称
  const binding = path.scope.getBinding(objName);
  if (!binding) return;

  const propMap = {};
  for (const prop of properties) {
    if (prop.key.type === 'StringLiteral' && prop.value.type === 'StringLiteral') {
      propMap[prop.key.value] = prop.value.value;
    }
  }

  binding.referencePaths.forEach(refPath => {
    const parentPath = refPath.parentPath;
    const parentNode = parentPath.node;
	// 如果父节点是MemberExpression并且是指定对象的表达式
    if (parentNode.type === 'MemberExpression' &&
        parentNode.object.name === objName &&
        parentNode.property.type === 'StringLiteral') {
      const propKey = parentNode.property.value;
      const value = propMap[propKey];

      if (typeof value === 'string') {
        parentPath.replaceWith(types.stringLiteral(value));
      }
    }
  });
}

代码如下：

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleObfuscatedObject(path, objName, properties) {
    // properties是一个ObjectProperty的数组
    // objName是对象标识符的名称
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};
    for (const prop of properties) {
        if (prop.key.type === 'StringLiteral' && prop.value.type === 'StringLiteral') {
            propMap[prop.key.value] = prop.value.value;
        }
    }

    binding.referencePaths.forEach(refPath => {
        const parentPath = refPath.parentPath;
        const parentNode = parentPath.node;
        // 如果父节点是MemberExpression并且是指定对象的表达式
        if (parentNode.type === 'MemberExpression' &&
            parentNode.object.name === objName &&
            parentNode.property.type === 'StringLiteral') {
            const propKey = parentNode.property.value;
            const value = propMap[propKey];

            if (typeof value === 'string') {
                console.log(`找到指定表达式,标识符名称为${objName}`)
                parentPath.replaceWith(types.stringLiteral(value));
            }
        }
    });
}

traverse(ast, {
    VariableDeclarator(path) {
        if (path.node.init?.type === 'ObjectExpression') {
            handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
        }
    },
    ExpressionStatement(path) {
        const expr = path.node.expression;

        // 1. 是多个表达式（逗号分隔）
        if (expr.type === 'SequenceExpression') {
            expr.expressions.forEach(item => {
                if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
                    handleObfuscatedObject(path, item.left.name, item.right.properties);
                }
            });
        }
        // 2. 单个赋值
        else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
            handleObfuscatedObject(path, expr.left.name, expr.right.properties);
        }
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew2.js', code, (err)=>{});

去除函数花指令

还是以上面的函数为例

function changeUserState(_0x168850) {
  var _0x59f106 = _0x1c34,
    _0x422da4 = {
      'FNUmX': ".logout",
      'XOXOd': "layui-hide",
      'voMHG': '.login',
      'OtQuj': function (_0x152ee4, _0x4f25e3) {
        return _0x152ee4(_0x4f25e3);
      },
      'eXLpj': ".username"
    };
  $(".logout")["removeClass"]("layui-hide"), $(".login")["addClass"]("layui-hide"), _0x422da4['OtQuj']($, ".username")["text"](_0x168850['username'] || _0x168850["nickName"]), _0x168850["admin"] && $('.user-admin')["removeClass"]('layui-hide');
}

_0x422da4['OtQuj']($, ".username") 其实就是$(".username")

思路就是先找出对象标识符，取出属性，然后丢到封装好的函数里面去。先找到对象标识符，然后把节点信息丢到封装的函数里，在函数中处理，函数逻辑如下，先去找到引用，然后判断引用的父节点是不是MemberExpression并且父节点的父节点是不是CallExpression，如果是，取出MemberExpression的参数，这个参数就是对象标识符的属性，然后拿着这个属性去对象标识符里去找属性值，如果属性值是函数，并且只有一个return语句，并且return语句的返回就是参数1(参数2)的形式(也就是返回的是CallExpression)，那么就拿着引用那个地方的两个参数，构造一个新的CallExpression，替换原来的CallExpression

这里还需要判断一下函数返回的是a(b)还是b(a)，关系到怎么构造CallExpression

核心代码如下

function handleObfuscatedObject(path, objName, properties) {
    // 找到对象标识符的绑定
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};
    // properties是ObjectProperty的数组
    for (const prop of properties) {
        // 如果属性是string，属性值是函数，并且函数体只有1个return
        if (
            prop.key.type === 'StringLiteral' &&
            prop.value.type === 'FunctionExpression' &&
            prop.value.body.body.length === 1)
        {
            const returnStmt = prop.value.body.body[0];
            // 如果return的是函数形式，并且是a(b)或者b(a)的形式
            if (
                returnStmt.type === 'ReturnStatement' &&
                returnStmt.argument?.type === 'CallExpression')
            {
                // callExpr存储的是return的函数的函数名也就是a，params是函数的参数数组也就是[a,b]
                const callExpr = returnStmt.argument;
                const params = prop.value.params;
                // 如果return的函数的参数只有1个，并且参数数组长度为2
                if (
                    callExpr.callee.type === 'Identifier' &&
                    callExpr.arguments.length === 1 &&
                    callExpr.arguments[0].type === 'Identifier'
                    && params.length === 2)
                {
                    const param1 = params[0].name;
                    const param2 = params[1].name;
                    // 判断返回的return是a(b)还是b(a)形式的
                    let isNormalOrder = null;
                    if (callExpr.callee.name === param1 && callExpr.arguments[0].name === param2) {
                        isNormalOrder = true; // 正常顺序 a(b)
                    } else if (callExpr.callee.name === param2 && callExpr.arguments[0].name === param1) {
                        isNormalOrder = false; // 反顺序 b(a)
                    } else {
                        continue; // 不符合格式
                    }
                    // propMapde的属性是对象标识符的属性名，也就是_0x422da4['OtQuj']($, ".username")里的OtQuj
                    // 属性值是是否为正常顺序调用
                    propMap[prop.key.value] = { isNormalOrder };
                }
            }
        }
    }

    // 遍历引用位置，替换调用
    binding.referencePaths.forEach(refPath => {
        const parentPath = refPath.parentPath;
        const parentNode = parentPath.node;

        // 形如 obj['key'](...)
        // 判断引用的父节点是不是MemberExpression，也就是_0x422da4['OtQuj']形式
        // 判断引用的爷爷节点是不是CallExpression，也就是_0x422da4['OtQuj']($, ".username")形式
        if (
            parentNode.type === 'MemberExpression' &&
            parentNode.object.name === objName &&
            parentNode.property.type === 'StringLiteral' &&
            parentPath.parentPath.node.type === 'CallExpression' &&
            parentPath.parentPath.node.callee === parentNode
        ) {
            const propKey = parentNode.property.value;
            const callInfo = propMap[propKey];
            if (!callInfo) return;

            const callPath = parentPath.parentPath;
            const args = callPath.node.arguments;

            if (args.length !== 2) return; // 确保是两个参数

            const newCall = callInfo.isNormalOrder
                ? types.callExpression(args[0], [args[1]]) // a(b)
                : types.callExpression(args[1], [args[0]]); // b(a)

            callPath.replaceWith(newCall);
        }
    });
}

去除二元运算花指令

寻找对象标识符的代码和上面字符串花指令是一样的，只是函数处理部分的区别，判断如果属性值是函数，并且返回值是BinaryExpression，取出里面的operator，注意也要判断一下调用顺序，然后找到objname的引用，判断引用的父节点是MemberExpression，爷爷节点是CallExpression，取出参数，构造一个BinaryExpression，替换CallExpression

function handleObfuscatedObjectFunctionCall(path, objName, properties) {
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};

    for (const prop of properties) {
        if (
            prop.type === 'ObjectProperty' &&
            prop.value.type === 'FunctionExpression' &&
            prop.value.body.body.length === 1 &&
            prop.value.body.body[0].type === 'ReturnStatement'
        ) {
            const returnNode = prop.value.body.body[0].argument;

            if (
                returnNode?.type === 'BinaryExpression' &&
                returnNode.left.type === 'Identifier' &&
                returnNode.right.type === 'Identifier' &&
                prop.value.params.length === 2
            ) {
                const leftName = returnNode.left.name;
                const isNormal = leftName === prop.value.params[0].name;

                propMap[prop.key.value] = {
                    operator: returnNode.operator,
                    isNormal: isNormal
                };
            }
        }
    }

    binding.referencePaths.forEach(refPath => {
        const memberExpr = refPath.parentPath;
        const callExpr = memberExpr?.parentPath;

        if (
            memberExpr.node.type === 'MemberExpression' &&
            memberExpr.node.object.name === objName &&
            memberExpr.node.property.type === 'StringLiteral' &&
            callExpr.node.type === 'CallExpression'
        ) {
            const propKey = memberExpr.node.property.value;
            const funcInfo = propMap[propKey];
            const args = callExpr.node.arguments;

            if (!funcInfo || args.length !== 2) return;

            const binaryExpr = funcInfo.isNormal
                ? types.binaryExpression(funcInfo.operator, args[0], args[1])
                : types.binaryExpression(funcInfo.operator, args[1], args[0]);

            callExpr.replaceWith(binaryExpr);
        }
    });
}

直接运行报错ReferenceError: Container is falsy，报错的地方在callExpr.replaceWith(binaryExpr);此时需要进行调试，可以打印一下中间值

一般的打印方法就是打印节点和节点的code

1
2
3

console.log(memberExpr+'',generator(callExpr.node).code);
console.log(memberExpr+'',generator(binaryExpr).code);
callExpr.replaceWith(binaryExpr);

输出报错部分信息如下

_0x5c8922["wVUIT"] _0x5c8922["wVUIT"](_0x5c8922["zfnda"](0x1, Math['random']()), 0x10000)
_0x5c8922["wVUIT"] _0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000
_0x5c8922["zfnda"] _0x5c8922["zfnda"](0x1, Math['random']())
_0x5c8922["zfnda"] 0x1 + Math['random']()

这是什么意思呢？

其实问题出在“从外往里”替换节点时，可能会破坏 AST 结构，导致 Babel 内部找不到“容器”来放你新的节点，从而抛出 ReferenceError: Container is falsy 这个错

举个例子，比如如下代码

1	_0x5c8922["wVUIT"](_0x5c8922["zfnda"](0x1, Math['random']()), 0x10000);

对应AST是这样的

CallExpression (wVUIT)
 ├── callee: MemberExpression (_0x5c8922["wVUIT"])
 └── arguments:
     ├── CallExpression (zfnda)
     │   ├── callee: MemberExpression (_0x5c8922["zfnda"])
     │   └── arguments: ...
     └── NumericLiteral 0x10000

如果先替换外层的 CallExpression，假设把整个 _0x5c8922["wVUIT"] 替换成一个 BinaryExpression，也就替换成了

1	_0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000;

此时，内部的那个 _0x5c8922["zfnda"] 已经被当作纯数值用了，不是一个 CallExpression 节点了，而下一轮想再替换 _0x5c8922["zfnda"] 的时候，Babel 的 path 就找不到它对应的“容器”了（也就是 AST 父节点的 child list），这时 path.replaceWith(...) 就会抛错。

怎么理解“内部的那个 _0x5c8922["zfnda"] 已经被当作纯数值用了”这句话？假设执行了这一句

1	callExpr.replaceWith(types.binaryExpression('*', innerExpr, outerArg));

假设 callExpr 是 wVUIT(...) 的 Path，把它整个替换成了

1	_0x5c8922["zfnda"](0x1, Math['random']()) * 0x10000

从 Babel 的角度，这个 Path 被 replaceWith 之后，它就“死了”，Babel 不再追踪这个Path。

但问题是此时还保留着 _0x5c8922["zfnda"](...) 的 Path，因为Path是在binding.referencePaths.forEach这里被赋值的，也就是说referencePaths 里下一个 Path 还是旧的 zfnda(...) 节点的Path，它指向的是之前 AST中的节点结构，而这个结构已经在replaceWith(...) 之后被重建了，压根就找不到。

也就是说，解决的方法就是，要从内往外处理，而不是从外往里处理。

处理方式是这样，binding.referencePaths.forEach(refPath => {不是从前往后遍历referencePaths数组吗？把数组反转过来，也就是从后往前遍历，只需要把遍历顺序反转一下或者把数组反转一下，可以通过先 .slice() 一下复制一份再 .reverse()，从而达到从后往前遍历的目的，也就是binding.referencePaths.slice().reverse().forEach

最终代码

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew3.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleObfuscatedObject(path, objName, properties) {
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};

    for (const prop of properties) {
        if (
            prop.type === 'ObjectProperty' &&
            prop.value.type === 'FunctionExpression' &&
            prop.value.body.body.length === 1 &&
            prop.value.body.body[0].type === 'ReturnStatement'
        ) {
            const returnNode = prop.value.body.body[0].argument;

            if (
                returnNode?.type === 'BinaryExpression' &&
                returnNode.left.type === 'Identifier' &&
                returnNode.right.type === 'Identifier' &&
                prop.value.params.length === 2
            ) {
                const leftName = returnNode.left.name;
                const isNormal = leftName === prop.value.params[0].name;

                propMap[prop.key.value] = {
                    operator: returnNode.operator,
                    isNormal: isNormal
                };
            }
        }
    }

    binding.referencePaths.slice().reverse().forEach(refPath => {
        const memberExpr = refPath.parentPath;
        const callExpr = memberExpr?.parentPath;

        if (
            memberExpr.node.type === 'MemberExpression' &&
            memberExpr.node.object.name === objName &&
            memberExpr.node.property.type === 'StringLiteral' &&
            callExpr.node.type === 'CallExpression'
        ) {
            const propKey = memberExpr.node.property.value;
            const funcInfo = propMap[propKey];
            const args = callExpr.node.arguments;

            if (!funcInfo || args.length !== 2) return;

            const binaryExpr = funcInfo.isNormal
                ? types.binaryExpression(funcInfo.operator, args[0], args[1])
                : types.binaryExpression(funcInfo.operator, args[1], args[0]);
            console.log(memberExpr+'',generator(callExpr.node).code);
            console.log(memberExpr+'',generator(binaryExpr).code);
            callExpr.replaceWith(binaryExpr);
        }
    });
}
traverse(ast, {
    VariableDeclarator(path) {
        if (path.node.init?.type === 'ObjectExpression') {
            handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
        }
    },
    ExpressionStatement(path) {
        const expr = path.node.expression;

        // 1. 是多个表达式（逗号分隔）
        if (expr.type === 'SequenceExpression') {
            expr.expressions.forEach(item => {
                if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
                    handleObfuscatedObject(path, item.left.name, item.right.properties);
                }
            });
        }
        // 2. 单个赋值
        else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
            handleObfuscatedObject(path, expr.left.name, expr.right.properties);
        }
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew4.js', code, (err)=>{});

不过还得考虑一下这种情况

'BKHUw': function (_a, _b) { 
  var _junk = _0x42e822;
  return _a === _b;
}

属性值是函数，但是函数体不是1句return，中间加了一句var _junk = _0x42e822;垃圾语句，还需要修改一下判断条件，只判断函数体中最后一条语句是否是一个合法的 return BinaryExpression，并且检查 return 中的 left 和 right 是否是函数参数里的变量

修改之后的代码如下

function handleObfuscatedObject(path, objName, properties) {
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};

    for (const prop of properties) {
        if (
            prop.type === 'ObjectProperty' &&
            prop.value.type === 'FunctionExpression' &&
            prop.value.params.length === 2
        ) {
            const bodyStatements = prop.value.body.body;
            const lastStmt = bodyStatements[bodyStatements.length - 1];

            if (
                lastStmt.type === 'ReturnStatement' &&
                lastStmt.argument?.type === 'BinaryExpression'
            ) {
                const returnNode = lastStmt.argument;
                const left = returnNode.left;
                const right = returnNode.right;

                if (
                    left.type === 'Identifier' &&
                    right.type === 'Identifier'
                ) {
                    const [paramA, paramB] = prop.value.params.map(p => p.name);

                    const isNormal = left.name === paramA && right.name === paramB ||
                        left.name === paramB && right.name === paramA;

                    if (isNormal) {
                        const useParamOrder = left.name === paramA;

                        propMap[prop.key.value] = {
                            operator: returnNode.operator,
                            isNormal: useParamOrder
                        };
                    }
                }
            }
        }
    }

    binding.referencePaths.slice().reverse().forEach(refPath => {
        const memberExpr = refPath.parentPath;
        const callExpr = memberExpr?.parentPath;

        if (
            memberExpr.node.type === 'MemberExpression' &&
            memberExpr.node.object.name === objName &&
            memberExpr.node.property.type === 'StringLiteral' &&
            callExpr.node.type === 'CallExpression'
        ) {
            const propKey = memberExpr.node.property.value;
            const funcInfo = propMap[propKey];
            const args = callExpr.node.arguments;

            if (!funcInfo || args.length !== 2) return;

            const binaryExpr = funcInfo.isNormal
                ? types.binaryExpression(funcInfo.operator, args[0], args[1])
                : types.binaryExpression(funcInfo.operator, args[1], args[0]);

            console.log(memberExpr + '', generator(callExpr.node).code);
            console.log(memberExpr + '', generator(binaryExpr).code);
            callExpr.replaceWith(binaryExpr);
        }
    });
}

去除虚假指令

虚假指令，就是明显不会去执行的那种指令，比如

1	if ('dXFzu' !== "PIrIS") return layer["msg"]("\u91CD\u590D\u6B21\u6570\u53EA\u80FD\u57281\u523020\u4E4B\u95F4"), ![];else _0x390fdb['push']("[img]" + _0x13e9ed + "[/img]");

这里的if条件判断肯定为真，也就是说else那里不会执行到

对于这种语句，直接取真正执行的那条语句，替换掉整个语句

if语句的语法树如下

IfStatement
 ├── test: BinaryExpression (!==)
 │   ├── left: StringLiteral ('dXFzu')
 │   └── right: StringLiteral ('PIrIS')
 ├── consequent: BlockStatement
 │   └── ReturnStatement
 │       ├── ExpressionStatement: CallExpression (layer["msg"](...))
 │       └── UnaryExpression (![])
 └── alternate: ExpressionStatement
     └── CallExpression (_0x390fdb["push"](...))

思路就是遍历所有的IfStatement节点，取出里面的test节点，如果test节点的left和right都是字符串的话，然后取出test节点中的operator进行计算，如果为真，则取出consequent节点的return语句，否则取出alternate里面的语句，把取出的语句替换掉整个IfStatement节点

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedIfStatement(path) {
    // 获取test节点
    const test = path.node.test;

    if (
        // 得是BinaryExpression且左右都是字符串
        test.type === "BinaryExpression" &&
        test.left.type === "StringLiteral" &&
        test.right.type === "StringLiteral"
    ) {
        let result;
        result = eval(generator(test).code);
        console.log(generator(test).code,result);

        const replacement = result ? path.node.consequent : path.node.alternate;
        if (!replacement) {
            path.remove(); // 例如：if (...); 这种无内容的情况
        } else if (replacement.type === 'BlockStatement') {
            // 如果代码块中有多条语句，不能直接replaceWith，replaceWith只能替换1条
            path.replaceWithMultiple(replacement.body);
        } else {
            path.replaceWith(replacement);
        }
    }
}

traverse(ast, {
    IfStatement(path) {
        handleObfuscatedIfStatement(path);
    }
});
let code = generator(ast).code;
fs.writeFile('./aaaNew.js', code, (err)=>{});

这里加的这条判断逻辑是为了处理if或者else的代码块中有多条语句的情况

else if (replacement.type === 'BlockStatement') {
            // 如果代码块中有多条语句，不能直接replaceWith，replaceWith只能替换1条
            path.replaceWithMultiple(replacement.body);
        }

比如这种代码

if('a' === 'b')
{
  1+1;
  2+2；
}

里面的if代码块也就是consequent是一个BlockStatement，BlockStatement的body是一个ExpressionStatement的数组，也就是存储了多条语句

此外，还有一种虚假指令，就是?表达式，这种表达式的AST语法是一个ConditionalExpression，test子节点是一个BinaryExpression，如果BinaryExpression节点的left和right都是字符串的话，就进行计算，如果为真则取出consequent节点的return语句，否则取出alternate里面的语句，把取出的语句替换掉整个ConditionalExpression节点

1	if (_0x34db88['err']) layer["msg"](_0x34db88["msg"]);else "dXuwI" !== "dXuwI" ? _0x164735['rmYNL'](_0xd9f6eb) : (layer["msg"]("\u5220\u9664\u6210\u529F"), _0x50e443["remove"]());

代码如下

function handleObfuscatedConditionalExpression(path) {
    const { node } = path;

    // 只处理 test 是 BinaryExpression 的三元表达式
    if (
        node.test.type === 'BinaryExpression' &&
        node.test.left.type === 'StringLiteral' &&
        node.test.right.type === 'StringLiteral'
    ) {
        const { left, right, operator } = node.test;

        // 计算真假
        const result = eval(`"${left.value}" ${operator} "${right.value}"`);

        // 选择保留的分支
        const replacement = result ? node.consequent : node.alternate;

        // 直接替换三元表达式
        path.replaceWith(replacement);
    }
}

去除未引用代码

就是找到没有被调用的标识符，也就是遍历标识符的绑定，然后如果引用为0，就删除

function logout() {
  var _0x46a457 = _0x1c34,
    _0x450081 = {
      'IvBNU': function (_0x53be2d, _0x5a0425) {
        return _0x53be2d(_0x5a0425);
      },
      'fyixX': "span",
      'NwDqh': function (_0x2da198, _0x472408) {
        return _0x2da198 - _0x472408;
      },
      'OVGne': function (_0x1e906e, _0x2aae97) {
        return _0x1e906e !== _0x2aae97;
      },
      'PLaKy': "pqBCQ",
      'JRihc': "endpoints"
    };
  localStorage['removeItem']("user"), removeCookie('token'), removeCookie("endpoints"), $["get"]("/logout", function () {
    var _0x10780b = _0x46a457,
      _0x19e1f1 = {
        'kdxpu': function (_0x3c46fc, _0x7175de) {
          return _0x3c46fc(_0x7175de);
        },
        'SEAUA': function (_0x3ae857, _0x3bb569) {
          var _0x1a7a44 = _0x1c34;
          return _0x3ae857(_0x3bb569);
        },
        'LnNMW': "span",
        'hFLNZ': function (_0x334318, _0x859ce6) {
          var _0x1243d6 = _0x10780b;
          return _0x334318 - _0x859ce6;
        },
        'kyPvQ': function (_0xfad184, _0x3a964c) {
          return _0xfad184(_0x3a964c);
        }
      };
    location["href"] = '/';
  });
}

比如这段代码，思路是遍历Identifier节点，然后判断Identifier节点的父节点是否为var _0x46a457 = _0x1c34这种表达式，也就是目标Identifier节点得是变量声明语句VariableDeclarator中的Identifier，然后判断VariableDeclarator的init是否为对象表达式ObjectExpression或者是Identifier，如果是，就删除Identifier节点的父节点，比如var _0x46a457 = _0x1c34，_0x46a457是遍历到的Identifier节点，如果这个节点没有被引用，就删除整个VariableDeclarator变量声明

但是要注意，为什么这里不直接判断Identifier的引用为0，然后直接删除呢？因为有的变量声明的值携带了setInterval等函数时，哪怕变量没有被引用，也会触发setInterval内设置的函数

1
2
3

var test = setInterval(function (){
	console.log("xxxxxxxxxxxxxxxxxx")
},1000)

代码如下

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew7.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);

function handleTrashIntruction(path){
    let binding = path.scope.getBinding(path.node.name);
    if(binding && binding.referencePaths.length === 0){
        if(path.parentPath.isVariableDeclarator()){
            if(types.isObjectExpression(path.parentPath.node.init) || types.isIdentifier(path.parentPath.node.init)){
                console.log(path.parentPath + '');
                path.parentPath.remove();
            }
        }
    }
}

traverse(ast, {
    Identifier(path) {
        handleTrashIntruction(path);
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew8.js', code, (err)=>{});

但是这样经过处理之后的代码量还是很大，于是我考虑是不是之前去除函数花指令的时候，没有把情况考虑完全。

去除函数花指令——优化

之前去除函数花指令部分还不够完善，比如对象的属性值是函数，但是参数有很多个

'RisKf': function (_0x23cfb3, _0x53e1a7, _0x57712f) {                
    return _0x23cfb3(_0x53e1a7, _0x57712f);            
},            
'gkGGU': function (_0x50ca5b, _0x404328, _0x147c72, _0x16bae0) {                
    return _0x50ca5b(_0x404328, _0x147c72, _0x16bae0);            
}
'aUMKF': function (_0x77f479, _0x215b9e) {
    var _0x5402f5 = _0x1c34;
    return _0x77f479(_0x215b9e);
}

也就是说，处理的逻辑是，只要 return 的是一个 CallExpression，并且它是调用函数参数列表中的第一个参数作为函数名（callee），其余所有参数作为 arguments，就可以认为是要还原为直接调用形式。

并且函数体代码不能仅仅局限于只有1条return，还需要扩大范围，比如上面的第三种情况，虽然 body.body.length > 1，但实际上只要最后一句是合法的 return 调用，就可以继续处理。正确的处理流程应该如下

函数体中最后一条语句是 ReturnStatement
return的内容是 CallExpression
callee是Identifier，且是函数第一个参数
arguments是后面所有参数

最终的代码如下

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew5.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedObject(path, objName, properties) {
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propMap = {};

    for (const prop of properties) {
        if (prop.key.type === 'StringLiteral' && prop.value.type === 'FunctionExpression') {
            const fn = prop.value;
            const params = fn.params;
            if (params.length < 1) continue;
            const returnStmt = prop.value.body.body[prop.value.body.body.length-1];
            if (returnStmt.type === 'ReturnStatement' && returnStmt.argument?.type === 'CallExpression') {
                const callExpr = returnStmt.argument;
                // 判断：callee 是第一个参数，参数列表是后续所有参数
                if (callExpr.callee.type === 'Identifier' && callExpr.callee.name === params[0].name && callExpr.arguments.length === params.length - 1){
                    let isValid = true;
                    for (let i = 1; i < params.length; i++){
                        if (callExpr.arguments[i - 1].type !== 'Identifier' || callExpr.arguments[i - 1].name !== params[i].name) {
                            isValid = false;
                            break;
                        }
                    }
                    if (!isValid) continue;
                    // 满足条件，记录映射
                    propMap[prop.key.value] = { paramCount: params.length };
                }
            }
        }
    }

    binding.referencePaths.forEach(refPath => {
        const parentPath = refPath.parentPath;
        const parentNode = parentPath.node;
        // console.log(objName);
        // console.log(generator(refPath.parentPath.parentPath.node).code);
        // console.log(parentNode.type);
        // console.log(parentNode.property.type);
        // console.log(parentPath.parentPath.node.type);


        if (
            parentNode.type === 'MemberExpression' &&
            parentNode.object.name === objName &&
            parentNode.property.type === 'StringLiteral' &&
            parentPath.parentPath.node.type === 'CallExpression' &&
            parentPath.parentPath.node.callee === parentNode
        ) {
            const propKey = parentNode.property.value;
            const callInfo = propMap[propKey];
            if (!callInfo) return;

            const callPath = parentPath.parentPath;
            const args = callPath.node.arguments;
            // console.log('参数构成:',args);

            if (args.length < 2) return;

            const callee = args[0];
            const fnArgs = args.slice(1); // 剩余作为调用参数

            const newCall = types.callExpression(callee, fnArgs);
            callPath.replaceWith(newCall);
        }
    });
}



traverse(ast, {
    VariableDeclarator(path) {
        if (path.node.init?.type === 'ObjectExpression') {
            handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
        }
    },
    ExpressionStatement(path) {
        const expr = path.node.expression;

        // 1. 是多个表达式（逗号分隔）
        if (expr.type === 'SequenceExpression') {
            expr.expressions.forEach(item => {
                if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
                    handleObfuscatedObject(path, item.left.name, item.right.properties);
                }
            });
        }
        // 2. 单个赋值
        else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
            handleObfuscatedObject(path, expr.left.name, expr.right.properties);
        }
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew6.js', code, (err)=>{});

去除函数花指令——第二次优化

类似于这种函数的属性

1
2
3

'eHAHE': function (_0x23a4d9) {                
    return _0x23a4d9();            
}，

也就是接收一个参数，把参数直接当成return的CallExpression的callee的name返回的

代码如下:

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./demoNewNew6.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
function handleObfuscatedObject(path, objName, properties) {
    const binding = path.scope.getBinding(objName);
    if (!binding) return;

    const propSet = new Set();

    for (const prop of properties) {
        if (
            prop.key.type === 'StringLiteral' &&
            prop.value.type === 'FunctionExpression' &&
            prop.value.params.length === 1
        ) {
            const paramName = prop.value.params[0].name;
            const body = prop.value.body.body;

            const returnStmt = body.find(stmt =>
                stmt.type === 'ReturnStatement' &&
                stmt.argument?.type === 'CallExpression' &&
                stmt.argument.callee.type === 'Identifier' &&
                stmt.argument.callee.name === paramName &&
                stmt.argument.arguments.length === 0
            );

            if (returnStmt) {
                propSet.add(prop.key.value);
            }
        }
    }

    binding.referencePaths.forEach(refPath => {
        const memberExpr = refPath.parentPath;
        const callExpr = memberExpr?.parentPath;

        if (
            memberExpr.node.type === 'MemberExpression' &&
            memberExpr.node.object.name === objName &&
            memberExpr.node.property.type === 'StringLiteral' &&
            callExpr.node.type === 'CallExpression'
        ) {
            const propKey = memberExpr.node.property.value;
            if (!propSet.has(propKey)) return;

            const args = callExpr.node.arguments;
            if (args.length !== 1) return;

            const newCall = types.callExpression(args[0], []);
            callExpr.replaceWith(newCall);
        }
    });
}

traverse(ast, {
    VariableDeclarator(path) {
        if (path.node.init?.type === 'ObjectExpression') {
            handleObfuscatedObject(path, path.node.id.name, path.node.init.properties);
        }
    },
    ExpressionStatement(path) {
        const expr = path.node.expression;

        // 1. 是多个表达式（逗号分隔）
        if (expr.type === 'SequenceExpression') {
            expr.expressions.forEach(item => {
                if (item.type === 'AssignmentExpression' && item.right.type === 'ObjectExpression') {
                    handleObfuscatedObject(path, item.left.name, item.right.properties);
                }
            });
        }
        // 2. 单个赋值
        else if (expr.type === 'AssignmentExpression' && expr.right.type === 'ObjectExpression') {
            handleObfuscatedObject(path, expr.left.name, expr.right.properties);
        }
    }
});
let code = generator(ast).code;
fs.writeFile('./demoNewNew7.js', code, (err)=>{});

此时还原出的代码，从2700多行减少到800行，然后剩下的一些代码可以手动删一下，比如大数组、大数组还原、字符串解密函数等代码。

去除switch混淆

这里只针对这一种情况，因为也是代码中的情况

function imageToCanvas(_0x16d162) {
    var _0x2030a7 = "3|0|2|1|4"["split"]('|'),
        _0x222463 = 0x0;
    while (!![]) {
        switch (_0x2030a7[_0x222463++]) {
            case '0':
                _0x4e7708['width'] = _0x16d162["width"];
                continue;
            case '1':
                _0x4e7708["getContext"]('2d')["drawImage"](_0x16d162, 0x0, 0x0);
                continue;
            case '2':
                _0x4e7708["height"] = _0x16d162['height'];
                continue;
            case '3':
                var _0x4e7708 = document["createElement"]("canvas");
                continue;
            case '4':
                return _0x4e7708;
        }
        break;
    }
}

这里给两个代码

第一个如下，这个代码只能处理单层switch混淆，目前无法处理switch嵌套的混淆，如果有多层switch嵌套混淆，建议使用第二个，该代码的思路如下

提取 switchKeyArray = "3|0|2|1|4".split('|')
映射出 caseMap = { '0': Node, '1': Node, ... }
依次取出对应 case 节点，拼成新语句列表
替换原始 while-switch 为这些语句

function handleObfuscatedSwitch(path) {
    const { node } = path;

    // 1. 判断是否是 while(true) + switch 的混淆结构
    if (node.type !== 'WhileStatement') return;

    const body = node.body.body;
    // 判断是否有case块
    if (!body || body[0].type !== 'SwitchStatement') return;

    const switchNode = body[0];
    // discriminant就是switch (_0x2030a7[_0x222463++])里的_0x2030a7[_0x222463++]
    const discriminant = switchNode.discriminant;

    // 2. switch 的判断变量必须是数组索引访问，例如：_0x2030a7[_0x222463++]
    if (discriminant.type !== 'MemberExpression' || discriminant.object.type !== 'Identifier' || discriminant.property.type !== 'UpdateExpression') return;
    // 获取到_0x2030a7[_0x222463++]里的_0x2030a7，也就是数组的变量名
    const arrayName = discriminant.object.name;
    const switchCases = switchNode.cases;

    // 3. 向上找到数组的 split 声明，例如 var _0x2030a7 = "3|0|2|1|4".split("|");
    let parentBody = path.parentPath.node.body || [];
    if (!Array.isArray(parentBody)) return;

    let orderArray = null;
    // parentBody存储的就是函数体BlockStatement里的所有节点数组
    for (const stmt of parentBody) {
        // 如果是变量声明类型的话，就是要找的变量声明
        // var _0x2030a7 = "3|0|2|1|4".split("|");...
        // VariableDeclaration里可能有多个声明，比如var a=1,b=2;
        // 此时需要取出VariableDeclaration里declarations数组去遍历，每个数组都是一个VariableDeclarator
        if (stmt.type === 'VariableDeclaration') {
            for (const decl of stmt.declarations) {
                if (decl.id.name === arrayName && decl.init && decl.init.type === 'CallExpression'
                    && decl.init.callee.type === 'MemberExpression' && decl.init.callee.property.value === 'split') {
                    // 取出"3|0|2|1|4"
                    const rawString = decl.init.callee.object.value;
                    // 生成数组[3,0,2,1,4]
                    orderArray = rawString.split('|');
                    break;
                }
            }
        }
    }

    if (!orderArray) return;

    // 4. 构建 case 映射表
    const caseMap = {};
    // caseNode是每一个case代码块
    for (const caseNode of switchCases) {
        // 判断一下case块是否存在
        if (caseNode.test && caseNode.consequent) {
            // caseNode.test.value是case后面的字符串
            // caseNode.consequent.filter(n => n.type !== 'ContinueStatement')的意思是
            // 把 switch-case 语句中 case 对应的执行语句（consequent）中，去掉 continue 语句，然后放到 caseMap 对应的 key 下
            // 因为ContinueStatement也就是continue这行代码用不上
            caseMap[caseNode.test.value] = caseNode.consequent.filter(n => n.type !== 'ContinueStatement');
        }
    }
    console.log(caseMap);
    // 5. 按照顺序拼接语句数组
    const orderedStatements = [];
    for (const key of orderArray) {
        if (caseMap[key]) {
            orderedStatements.push(...caseMap[key]);
        }
    }
    // 6. 替换整个 WhileStatement 节点为顺序语句
    path.replaceWithMultiple(orderedStatements);
}
    
traverse(ast, {
    WhileStatement(path) {
        handleObfuscatedSwitch(path);
        // 每遍历一轮ast，只处理一个switch混淆
        // 因为防止switch嵌套的话，处理可能出问题
        path.stop();
    }
});

第二个代码思路如下：

找MemberExpression，也就是"3|0|2|1|4"["split"]('|')
MemberExpression的父节点就是变量声明节点var _0x2030a7 = "3|0|2|1|4"["split"]('|')
变量声明节点的兄弟节点，在这里是声明节点的后一句，就是while循环的地方，通过.getSibling(varPath.key + 1)
然后取出索引和代码，构造数组
在后面就和前面的代码一样了，只不过没有替换，而是删掉旧节点

code

const fs = require('fs');
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
const generator = require("@babel/generator").default;

const jscode = fs.readFileSync("./aaa.js", {
    encoding: "utf-8"
});
let ast = parser.parse(jscode);
for (let i = 0; i < 20; i++){
    traverse(ast, {
        MemberExpression(path) {
            if (types.isStringLiteral(path.node.object) && types.isStringLiteral(path.node.property, {value: 'split'})) {
                let varPath = path.findParent(function (p) {
                    return types.isVariableDeclaration(p);
                });
                let whilePath = varPath.getSibling(varPath.key + 1);
                let myArr = {};
                whilePath.node.body.body[0].cases.map(function (p) {
                    myArr[p.test.value] = p.consequent[0];
                });
                let parentPath = whilePath.parent;
                varPath.remove();
                whilePath.remove();
                let shufferArr = path.node.object.value.split("|");
                shufferArr.map(function (v) {
                    parentPath.body.push(myArr[v]);
                });
                //每遍历一轮ast，只处理一个switch流程平坦化就停止遍历
                path.stop();
            }
        }
    });
}
let code = generator(ast).code;
fs.writeFile('./aaa3.js', code, (err)=>{});

总结

其实有的地方还不够完善，只是针对大部分混淆的情况，还有一些表达式和函数的情况没有完全考虑进去，毕竟AST混淆的话针对每个混淆都不尽相同，遇到的时候再做改动。