OLLVM虚假控制流源码分析

文章介绍了LLVM中用于函数混淆的BogusControlFlow技术,包括runOnFunction函数的逻辑,检查输入参数,以及bogus函数如何按照给定概率对基本块进行混淆。在混淆过程中,createAlteredBasicBlock函数用于克隆基本块并处理PHINode和操作数映射。混淆方法涉及向基本块添加无用指令、修改条件判断等,增加代码分析难度。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

runOnFunction函数

if (ObfTimes <= 0) {
        errs()<<"BogusControlFlow application number -bcf_loop=x must be x > 0";
		return false;
      }
if ( !((ObfProbRate > 0) && (ObfProbRate <= 100)) ) {
        errs()<<"BogusControlFlow application basic blocks percentage -bcf_prob=x must be 0 < x <= 100";
		return false;
      }
const int defaultObfRate = 30, defaultObfTime = 1;

static cl::opt<int>
ObfProbRate("bcf_prob", cl::desc("Choose the probability [%] each basic blocks will be obfuscated by the -bcf pass"), cl::value_desc("probability rate"), cl::init(defaultObfRate), cl::Optional);

static cl::opt<int>
ObfTimes("bcf_loop", cl::desc("Choose how many time the -bcf pass loop on a function"), cl::value_desc("number of times"), cl::init(defaultObfTime), cl::Optional);

这里的ObfTimes对应过来的默认值就是1,对函数进行混淆的次数,ObfProbRate就是30,对基本块进行混淆的概率,分别是opt时传入的参数。

    // check for compatible
      for (BasicBlock &bb : F.getBasicBlockList()) {
        if (isa<InvokeInst>(bb.getTerminator())) {
          return false;
        }
      }

枚举这个函数的所有基本块,如果这个函数后面基本块中含有invoke(调用了某个函数),它就不执行了,就退出这个基本块。

if(toObfuscate(flag,&F,"bcf")) {
        bogus(F);
        doF(*F.getParent());
        return true;
      }

判断有没有bcf也就是虚假控制流,有的话就进入。

bogus函数

  if(ObfProbRate < 0 || ObfProbRate > 100){
        DEBUG_WITH_TYPE("opt", errs() << "bcf: Incorrect value,"
            << " probability rate set to default value: "
            << defaultObfRate <<" \n");
        ObfProbRate = defaultObfRate;
      }
 if(ObfTimes <= 0){
        DEBUG_WITH_TYPE("opt", errs() << "bcf: Incorrect value,"
            << " must be greater than 1. Set to default: "
            << defaultObfTime <<" \n");
        ObfTimes = defaultObfTime;
      }

首先进行判断这个次数和概率,是否符合条件,不符合的话会进行设置默认值。
紧接着就是一个大型的do while循环里面包含着的代码:

 		std::list<BasicBlock *> basicBlocks;
          for (Function::iterator i=F.begin();i!=F.end();++i) {
            basicBlocks.push_back(&*i);
          }

把所有的基本块放在basicblock list里面。
获取一个随机值,如果符合的话就进入

 if((int)llvm::cryptoutils->get_range(100) <= ObfProbRate){
              DEBUG_WITH_TYPE("opt", errs() << "bcf: Block "
                  << NumBasicBlocks <<" selected. \n");
              hasBeenModified = true;
              ++NumModifiedBasicBlocks;
              NumAddedBasicBlocks += 3;
              FinalNumBasicBlocks += 3;
              // Add bogus flow to the given Basic Block (see description)
              BasicBlock *basicBlock = basicBlocks.front();
              addBogusFlow(basicBlock, F);
            }else{
              DEBUG_WITH_TYPE("opt", errs() << "bcf: Block "
                  << NumBasicBlocks <<" not selected.\n");
            }

每个基本块都有ObfProbRate的概率被混淆,即基本块调用了addBogusFlow函数。
这个函数的作用就是对指定函数的每个基本块以ObfProbRate的概率去进行调用函数混淆。

目前源码:

define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

if.then:                                          ; preds = %entry
  store i32 1, i32* %retval, align 4
  br label %return

if.else:                                          ; preds = %entry
  store i32 10, i32* %retval, align 4
  br label %return

return:                                           ; preds = %if.else, %if.then
  %3 = load i32, i32* %retval, align 4
  ret i32 %3
}

addBogusFlow函数1

 Instruction *i1 = &*basicBlock->begin();
      if(basicBlock->getFirstNonPHIOrDbgOrLifetime())
        i1 = basicBlock->getFirstNonPHIOrDbgOrLifetime();
      Twine *var;
      var = new Twine("originalBB");
      BasicBlock *originalBB = basicBlock->splitBasicBlock(i1, *var);

执行完后这里的basicBlock指令是br label %originalBB,而originalBB目前代码块如下:

originalBB:
 %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

而之前的entry就是目前basicblock:

entry:
br label %originalBB

紧接着:

 Twine * var3 = new Twine("alteredBB");
      BasicBlock *alteredBB = createAlteredBasicBlock(originalBB, *var3, &F);

createAlteredBasicBlock会把这个originalBB进行克隆

createAlteredBasicBlock函数

    virtual BasicBlock* createAlteredBasicBlock(BasicBlock * basicBlock,
        const Twine &  Name = "gen", Function * F = 0){
      // Useful to remap the informations concerning instructions.
      ValueToValueMapTy VMap;
      BasicBlock * alteredBB = llvm::CloneBasicBlock (basicBlock, VMap, Name, F);
  
  
        // Remap attached metadata.
        SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
        i->getAllMetadata(MDs);
        // important for compiling with DWARF, using option -g.
        i->setDebugLoc(ji->getDebugLoc());
        ji++;
      } // The instructions' informations are now all correct

这里的代码的话主要解决两个问题,就cloneBasicBlock函数进行的克隆并不是完全的克隆,第一它不会对操作数进行替换,比如:

orig:
  %a = ...
  %b = fadd %a, ...
 
clone:
  %a.clone = ...
  %b.clone = fadd %a, ... ; Note that this references the old %a and
not %a.clone!

在clone出来的基本块中,fadd指令的操作数不是%a.clone,而是a%,所以之后要通过VMap对所有操作数进行映射,使其恢复正常:

      BasicBlock::iterator ji = basicBlock->begin();
      for (BasicBlock::iterator i = alteredBB->begin(), e = alteredBB->end() ; i != e; ++i){
        // Loop over the operands of the instruction
        for(User::op_iterator opi = i->op_begin (), ope = i->op_end(); opi != ope; ++opi){
          // get the value for the operand
          Value *v = MapValue(*opi, VMap,  RF_NoModuleLevelChanges, 0);
          if (v != 0){
            *opi = v;
          }
        }

第二,它不会对PHI Node进行任何处理,PHI Node的前驱块仍是原始基本块的前驱块,但是新克隆出来的基本块没有任何前驱块,所以要对PHI Node的前驱块进行remap:

// Remap phi nodes' incoming blocks.
        if (PHINode *pn = dyn_cast<PHINode>(i)) {
          for (unsigned j = 0, e = pn->getNumIncomingValues(); j != e; ++j) {
            Value *v = MapValue(pn->getIncomingBlock(j), VMap, RF_None, 0);
            if (v != 0){
              pn->setIncomingBlock(j, cast<BasicBlock>(v));
            }
          }
        }

解释一下PHI Node,所有的LLVM都要使用SSA(Static Single Assignment,静态一次性赋值)方式表示,即所有变量都只能被赋值一次,这样做主要是为了代码优化,如下图,%temp的值被赋值成1后就永远都是1了:
在这里插入图片描述
PHI Node是一条可以一定程序上绕开SSA机制的指令,它可以根据不同的前驱基本块来赋值(有点像三元运算符),如下图,如果PHI Node 的前驱基本块是entry,则将current_i赋值为2,如果是for_body,则赋值为%i_plus_one在这里插入图片描述

 for (BasicBlock::iterator i = alteredBB->begin(), e = alteredBB->end() ; i != e; ++i){
        // in the case we find binary operator, we modify slightly this part by randomly
        // insert some instructions
        if(i->isBinaryOp()){ // binary instructions
          unsigned opcode = i->getOpcode();
          BinaryOperator *op, *op1 = NULL;
          Twine *var = new Twine("_");
          // treat differently float or int
          // Binary int
          if(opcode == Instruction::Add || opcode == Instruction::Sub ||
              opcode == Instruction::Mul || opcode == Instruction::UDiv ||
              opcode == Instruction::SDiv || opcode == Instruction::URem ||
              opcode == Instruction::SRem || opcode == Instruction::Shl ||
              opcode == Instruction::LShr || opcode == Instruction::AShr ||
              opcode == Instruction::And || opcode == Instruction::Or ||
              opcode == Instruction::Xor){
            for(int random = (int)llvm::cryptoutils->get_range(10); random < 10; ++random){
              switch(llvm::cryptoutils->get_range(4)){ // to improve
                case 0: //do nothing
                  break;
                case 1: op = BinaryOperator::CreateNeg(i->getOperand(0),*var,&*i);
                        op1 = BinaryOperator::Create(Instruction::Add,op,
                            i->getOperand(1),"gen",&*i);
                        break;
                case 2: op1 = BinaryOperator::Create(Instruction::Sub,
                            i->getOperand(0),
                            i->getOperand(1),*var,&*i);
                        op = BinaryOperator::Create(Instruction::Mul,op1,
                            i->getOperand(1),"gen",&*i);
                        break;
                case 3: op = BinaryOperator::Create(Instruction::Shl,
                            i->getOperand(0),
                            i->getOperand(1),*var,&*i);
                        break;
              }
            }
          }
          // Binary float
          if(opcode == Instruction::FAdd || opcode == Instruction::FSub ||
              opcode == Instruction::FMul || opcode == Instruction::FDiv ||
              opcode == Instruction::FRem){
            for(int random = (int)llvm::cryptoutils->get_range(10); random < 10; ++random){
              switch(llvm::cryptoutils->get_range(3)){ // can be improved
                case 0: //do nothing
                  break;
                case 1: op = BinaryOperator::CreateFNeg(i->getOperand(0),*var,&*i);
                        op1 = BinaryOperator::Create(Instruction::FAdd,op,
                            i->getOperand(1),"gen",&*i);
                        break;
                case 2: op = BinaryOperator::Create(Instruction::FSub,
                            i->getOperand(0),
                            i->getOperand(1),*var,&*i);
                        op1 = BinaryOperator::Create(Instruction::FMul,op,
                            i->getOperand(1),"gen",&*i);
                        break;
              }
            }
          }
          if(opcode == Instruction::ICmp){ // Condition (with int)
            ICmpInst *currentI = (ICmpInst*)(&i);
            switch(llvm::cryptoutils->get_range(3)){ // must be improved
              case 0: //do nothing
                break;
              case 1: currentI->swapOperands();
                      break;
              case 2: // randomly change the predicate
                      switch(llvm::cryptoutils->get_range(10)){
                        case 0: currentI->setPredicate(ICmpInst::ICMP_EQ);
                                break; // equal
                        case 1: currentI->setPredicate(ICmpInst::ICMP_NE);
                                break; // not equal
                        case 2: currentI->setPredicate(ICmpInst::ICMP_UGT);
                                break; // unsigned greater than
                        case 3: currentI->setPredicate(ICmpInst::ICMP_UGE);
                                break; // unsigned greater or equal
                        case 4: currentI->setPredicate(ICmpInst::ICMP_ULT);
                                break; // unsigned less than
                        case 5: currentI->setPredicate(ICmpInst::ICMP_ULE);
                                break; // unsigned less or equal
                        case 6: currentI->setPredicate(ICmpInst::ICMP_SGT);
                                break; // signed greater than
                        case 7: currentI->setPredicate(ICmpInst::ICMP_SGE);
                                break; // signed greater or equal
                        case 8: currentI->setPredicate(ICmpInst::ICMP_SLT);
                                break; // signed less than
                        case 9: currentI->setPredicate(ICmpInst::ICMP_SLE);
                                break; // signed less or equal
                      }
                      break;
            }

          }
          if(opcode == Instruction::FCmp){ // Conditions (with float)
            FCmpInst *currentI = (FCmpInst*)(&i);
            switch(llvm::cryptoutils->get_range(3)){ // must be improved
              case 0: //do nothing
                break;
              case 1: currentI->swapOperands();
                      break;
              case 2: // randomly change the predicate
                      switch(llvm::cryptoutils->get_range(10)){
                        case 0: currentI->setPredicate(FCmpInst::FCMP_OEQ);
                                break; // ordered and equal
                        case 1: currentI->setPredicate(FCmpInst::FCMP_ONE);
                                break; // ordered and operands are unequal
                        case 2: currentI->setPredicate(FCmpInst::FCMP_UGT);
                                break; // unordered or greater than
                        case 3: currentI->setPredicate(FCmpInst::FCMP_UGE);
                                break; // unordered, or greater than, or equal
                        case 4: currentI->setPredicate(FCmpInst::FCMP_ULT);
                                break; // unordered or less than
                        case 5: currentI->setPredicate(FCmpInst::FCMP_ULE);
                                break; // unordered, or less than, or equal
                        case 6: currentI->setPredicate(FCmpInst::FCMP_OGT);
                                break; // ordered and greater than
                        case 7: currentI->setPredicate(FCmpInst::FCMP_OGE);
                                break; // ordered and greater than or equal
                        case 8: currentI->setPredicate(FCmpInst::FCMP_OLT);
                                break; // ordered and less than
                        case 9: currentI->setPredicate(FCmpInst::FCMP_OLE);
                                break; // ordered or less than, or equal
                      }
                      break;
            }
          }
        }
      }

大概思路就是往基本块里面添加一些没用的赋值指令,或者修改cmp的条件,binaryop大概指的是add,mul,cmp这类运算指令

原基本块:

originalBB:
 %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

copy的基本块:

originalBBalteredBB:                              ; preds = %originalBB, %entry
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %70 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %70, i64 1
  %71 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %71) #3
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %72 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %72, 0
  br i1 %cmpalteredBB,label %if.then,label %if.else

copy后变量名字进行改动

addBogusFlow函数2

 	  alteredBB->getTerminator()->eraseFromParent();
      basicBlock->getTerminator()->eraseFromParent();

这里的话是指把entry里面的跳转和拷贝出来的block块最后一个跳转也删掉:

entry:

originalBBalteredBB:                              ; preds = %originalBB, %entry
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %70 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %70, i64 1
  %71 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %71) #3
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %72 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %72, 0
  	  Value * LHS = ConstantFP::get(Type::getFloatTy(F.getContext()), 1.0);
      Value * RHS = ConstantFP::get(Type::getFloatTy(F.getContext()), 1.0);
      Twine * var4 = new Twine("condition");
      FCmpInst * condition = new FCmpInst(*basicBlock, FCmpInst::FCMP_TRUE , LHS, RHS, *var4);

在entry里面生成两个浮点数,并进行两个浮点数的比较跳转指令:

entry:
	%condition=fcmp true float 1.00000e+00,1.00000e+00
	br i1 %7, label %originalBB, label %originalBBalteredBB

fcmp后面条件为true,它只会一直跳转为前者originalBB。

 BranchInst::Create(originalBB, alteredBB, (Value *)condition, basicBlock);

在originalBBalteredBB生成一个跳转指令,跳转到originalBB

originalBBalteredBB:                              ; preds = %originalBB, %entry
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %70 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %70, i64 1
  %71 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %71) #3
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %72 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %72, 0
   br label %originalBB
      BasicBlock::iterator i = originalBB->end();
      // Split at this point (we only want the terminator in the second part)
      Twine * var5 = new Twine("originalBBpart2");
      BasicBlock * originalBBpart2 = originalBB->splitBasicBlock(--i , *var5);

查找到originBB最后一条指令进行split,然后创建一个originalBBpart2基本块

originalBBpart2:
  br i1 %cmp, label %if.then, label %if.else

切割后originBB最后就变成了无条件的跳转:

originalBB:
 %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br label %originalBBpart2

紧接着把originBB最后一行给删掉,创建一个fcmp的条件跳转:

     originalBB->getTerminator()->eraseFromParent();
     Twine * var6 = new Twine("condition2");
      FCmpInst * condition2 = new FCmpInst(*originalBB, CmpInst::FCMP_TRUE , LHS, RHS, *var6);
      BranchInst::Create(originalBBpart2, alteredBB, (Value *)condition2, originalBB);

如下:

originalBB:
 %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  %condition2=fcmp true float 1.00000e+00,1.00000e+00
  br i1 %condition2, label %originalBBpart2, label %originalBBalteredBB

这个函数主要就是创建了entry和originalBB代码块的最后两行浮点数比较的跳转

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

寻梦&之璐

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值