OLLVM控制流平坦化源码分析+魔改-优快云博客

本文链接：https://blog.youkuaiyun.com/CSNN2019/article/details/131600471

文章目录

前置知识

Module是指模块，Function模块下的函数，BasicBlock函数下的基本块，Instruction 基本块下的IR指令

Flattening::flatten(Function *f)

 for (Function::iterator i = f->begin(); i != f->end(); ++i) {
    BasicBlock *tmp = &*i;
    origBB.push_back(tmp);

    BasicBlock *bb = &*i;
    if (isa<InvokeInst>(bb->getTerminator())) {
      return false;
    }
  }

把函数分成很多个基本块，并且push到vector类型的 origBB中。

判断里面基本块是否大于1，不大于1的话就没有意义去进行混淆：

 if (origBB.size() <= 1) {
    return false;
  }

需要把vertor里面的第一个基本块即入口基本块单独拿出来进行处理：对入口基本块进行判断，如果是无条件跳转则不进行任何处理，否则需要找到最后一条指令，将整个if结构给split，split之后两个块之间会自动添加跳转指令，然后就可以把原来的split后的if结构给它扔进要处理的基本块列表。

  origBB.erase(origBB.begin());

  // Get a pointer on the first BB
  Function::iterator tmp = f->begin(); //++tmp;
  BasicBlock *insert = &*tmp;

  // If main begin with an if
  BranchInst *br = NULL;
  if (isa<BranchInst>(insert->getTerminator())) {
    br = cast<BranchInst>(insert->getTerminator());
  }

  if ((br != NULL && br->isConditional()) ||
      insert->getTerminator()->getNumSuccessors() > 1) {
    BasicBlock::iterator i = insert->end();
    --i;

    if (insert->size() > 1) {
      --i;
    }

    BasicBlock *tmpBB = insert->splitBasicBlock(i, "first");
    origBB.insert(origBB.begin(), tmpBB);
  }

如果是条件跳转的话这里是把上面自动添加那个跳转指令给删除，如果不是的话，那么也是需要把它删除，因为跳转点目标还不能确定：

// Remove jump
  insert->getTerminator()->eraseFromParent();

源代码

 %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  br label %NodeBlock8

目前代码

entry:
  %.reg2mem = alloca i32
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  store i32 %2, i32* %.reg2mem
  %switchVar = alloca i32

创建一个switchvar变量，然后去获取一个随机整数创建store指令塞给switchvar中

 switchVar =
      new AllocaInst(Type::getInt32Ty(f->getContext()), 0, "switchVar", insert);
  new StoreInst(
      ConstantInt::get(Type::getInt32Ty(f->getContext()),
                       llvm::cryptoutils->scramble32(0, scrambling_key)),
      switchVar, insert);

也就是在switchvar添了如下这一行：

  store i32 157301900, i32* %switchVar

创建switch

创建两个block，其它的基本块插入它们之间

  loopEntry = BasicBlock::Create(f->getContext(), "loopEntry", f, insert);
  loopEnd = BasicBlock::Create(f->getContext(), "loopEnd", f, insert);

如下：

loopEntry:                                      
 
loopEnd:

目标基本块里面啥内容也没有。

在loopEntry里面新建一个load指令，并且把switchVar

 load = new LoadInst(switchVar, "switchVar", loopEntry);

目前loopentry指令如下：

loopEntry:                                        ; preds = %entry, %loopEnd
  %switchVar10 = load i32, i32* %switchVar

把insert插入到loopEntry之前，这里的insert就是entry基本块，再创建两个跳转指令，从insert（即第一个基本块）跳转到loopEntry；从loopend跳转到loopEntry

  // Move first BB on top
  insert->moveBefore(loopEntry);
  BranchInst::Create(loopEntry, insert);
  // loopEnd jump to loopEntry
  BranchInst::Create(loopEntry, loopEnd);

这里结束后，entry模块就完整了，如下：

entry:
  %.reg2mem = alloca i32
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  store i32 %2, i32* %.reg2mem
  %switchVar = alloca i32
  store i32 157301900, i32* %switchVar
  br label %loopEntry

而loopend模块也有了一条指令（其实也是完整了）：

loopEnd:                                        
br label %loopEntry

紧接着创建一个基本块，然后在基本块里面创建一个跳转指令，从switchDefault跳转到loopend中

 BasicBlock *swDefault =
      BasicBlock::Create(f->getContext(), "switchDefault", f, loopEnd);
  BranchInst::Create(loopEnd, swDefault);

多了一个switchDefault基本块，指令如下：

switchDefault:                                    ; preds = %loopEntry
  br label %loopEnd

创建一个switch指令，位置是在loopentry基本块下，且创建了0个case，然后设置了条件为load，就上面的load。

 switchI = SwitchInst::Create(&*f->begin(), swDefault, 0, loopEntry);
  switchI->setCondition(load);

把entry最后一行跳转指令删除后再创建了一个跳转指令，从entry跳转到loopentry

  f->begin()->getTerminator()->eraseFromParent();
    BranchInst::Create(loopEntry, &*f->begin());

  for (std::vector<BasicBlock *>::iterator b = origBB.begin();
       b != origBB.end(); ++b) {
    BasicBlock *i = *b;
    ConstantInt *numCase = NULL;

    // Move the BB inside the switch (only visual, no code logic)
    i->moveBefore(loopEnd);

    // Add case to switch
    numCase = cast<ConstantInt>(ConstantInt::get(
        switchI->getCondition()->getType(),
        llvm::cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
    switchI->addCase(numCase, i);
  }

目前代码：

entry:
  %.reg2mem = alloca i32
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  store i32 %2, i32* %.reg2mem
  %switchVar = alloca i32
  store i32 157301900, i32* %switchVar
  br label %loopEntry

loopEntry:     
%switchVar10 = load i32, i32* %switchVar
  switch i32 %switchVar10, label %switchDefault [
  ]

switchDefault:                                    ; preds = %loopEntry
  br label %loopEnd

loopEnd:                                        
br label %loopEntry

创建case

  for (std::vector<BasicBlock *>::iterator b = origBB.begin();
       b != origBB.end(); ++b) {
    BasicBlock *i = *b;
    ConstantInt *numCase = NULL;

    // Move the BB inside the switch (only visual, no code logic)
    i->moveBefore(loopEnd);

    // Add case to switch
    numCase = cast<ConstantInt>(ConstantInt::get(
        switchI->getCondition()->getType(),
        llvm::cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
    switchI->addCase(numCase, i);
  }

这里的i就是指剩下的那些case分支代码基本块，i->moveBefore(loopEnd)，把某个代码基本块置于loopend之前。比如某个基本块是这样：

NodeBlock8:                                       ; preds = %entry
  %Pivot9 = icmp slt i32 %2, 2
  br i1 %Pivot9, label %LeafBlock, label %NodeBlock

然后下面的这些代码就是创建一个numcase，就是case分支里面的case值，这个值它是随机生成的，种子的话是Entry.cpp里面的那个AesSeed值，如果确定AesSeed的话，那么这里随机生成的case每次都是固定的。
switchI->addCase(numCase, i);紧接着在switch里面增加一个case值，跳转到NodeBlock8里面。
目前switch执行完一次后，loopentry基本bolck块如下：

loopEntry:     
%switchVar10 = load i32, i32* %switchVar
  switch i32 %switchVar10, label %switchDefault [
   i32 157301900, label %NodeBlock8
  ]

当循环执行结束后：

目前代码

entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  br label %NodeBlock8

NodeBlock8:                                       ; preds = %entry
  %Pivot9 = icmp slt i32 %2, 2
  br i1 %Pivot9, label %LeafBlock, label %NodeBlock

NodeBlock:                                        ; preds = %NodeBlock8
  %Pivot = icmp slt i32 %2, 3
  br i1 %Pivot, label %sw.bb2, label %LeafBlock6

LeafBlock6:                                       ; preds = %NodeBlock
  %SwitchLeaf7 = icmp eq i32 %2, 3
  br i1 %SwitchLeaf7, label %sw.bb4, label %NewDefault

LeafBlock:                                        ; preds = %NodeBlock8
  %SwitchLeaf = icmp eq i32 %2, 1
  br i1 %SwitchLeaf, label %sw.bb, label %NewDefault

sw.bb:                                            ; preds = %LeafBlock
  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
  br label %sw.epilog

sw.bb2:                                           ; preds = %NodeBlock
  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0))
  br label %sw.epilog

sw.bb4:                                           ; preds = %LeafBlock6
  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i64 0, i64 0))
  br label %sw.epilog

NewDefault:                                       ; preds = %LeafBlock6, %LeafBlock
  br label %sw.default

sw.default:                                       ; preds = %NewDefault
  br label %sw.epilog

sw.epilog:                                        ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
  %3 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %3, 0
  br i1 %cmp, label %if.then, label %if.else

if.then:                                          ; preds = %sw.epilog
  store i32 1, i32* %retval, align 4
  br label %return

if.else:                                          ; preds = %sw.epilog
  store i32 10, i32* %retval, align 4
  br label %return

return:                                           ; preds = %if.else, %if.then
  %4 = load i32, i32* %retval, align 4
  ret i32 %4
  
loopEnd:                                          ; preds = %if.else, %if.then, %sw.epilog, %sw.default, %NewDefault, %sw.bb4, %sw.bb2, %sw.bb, %LeafBlock, %LeafBlock6, %NodeBlock, %NodeBlock8, %switchDefault
  br label %loopEntry

}

枚举更改各个case block块

return block

 // Ret BB
    if (i->getTerminator()->getNumSuccessors() == 0) {
      continue;
    }

getNumSuccessors是获取后续BB的个数，Ret BB后继BB为0个（判断分支），直接continue

非条件跳转block

 // If it's a non-conditional jump
    if (i->getTerminator()->getNumSuccessors() == 1) {
      // Get successor and delete terminator
      BasicBlock *succ = i->getTerminator()->getSuccessor(0);
      i->getTerminator()->eraseFromParent();

      // Get next case
      numCase = switchI->findCaseDest(succ);

      // If next case == default case (switchDefault)
      if (numCase == NULL) {
        numCase = cast<ConstantInt>(
            ConstantInt::get(switchI->getCondition()->getType(),
                             llvm::cryptoutils->scramble32(
                                 switchI->getNumCases() - 1, scrambling_key)));
      }

      // Update switchVar and jump to the end of loop
      new StoreInst(numCase, load->getPointerOperand(), i);
      BranchInst::Create(loopEnd, i);
      continue;
    }

如果后面只有一个分支的话，那么先判断分支是否能够找到，不为null后先去根据原来条件去创建一个store指令，然后创建一个跳转指令跳转到loopend，再把原来跳转指令抹去。
原来：

sw.bb:                                            ; preds = %LeafBlock
  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
  br label %sw.epilog

改变后：

sw.bb:                                            ; preds = %LeafBlock
  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
  store i32 387774014, i32* %switchVar
  br label %loopEnd

条件跳转 block

  if (i->getTerminator()->getNumSuccessors() == 2) {
      // Get next cases
      ConstantInt *numCaseTrue =
          switchI->findCaseDest(i->getTerminator()->getSuccessor(0));
      ConstantInt *numCaseFalse =
          switchI->findCaseDest(i->getTerminator()->getSuccessor(1));

      // Check if next case == default case (switchDefault)
      if (numCaseTrue == NULL) {
        numCaseTrue = cast<ConstantInt>(
            ConstantInt::get(switchI->getCondition()->getType(),
                             llvm::cryptoutils->scramble32(
                                 switchI->getNumCases() - 1, scrambling_key)));
      }

      if (numCaseFalse == NULL) {
        numCaseFalse = cast<ConstantInt>(
            ConstantInt::get(switchI->getCondition()->getType(),
                             llvm::cryptoutils->scramble32(
                                 switchI->getNumCases() - 1, scrambling_key)));
      }

      // Create a SelectInst
      BranchInst *br = cast<BranchInst>(i->getTerminator());
      SelectInst *sel =
          SelectInst::Create(br->getCondition(), numCaseTrue, numCaseFalse, "",
                             i->getTerminator());

      // Erase terminator
      i->getTerminator()->eraseFromParent();

      // Update switchVar and jump to the end of loop
      new StoreInst(sel, load->getPointerOperand(), i);
      BranchInst::Create(loopEnd, i);
      continue;
    }

首先会把两个跳转分支都取出来，先判断两个分支是否都能够找到，如果都不为null 的话，那么取出原来的跳转指令，根据br的两个分支条件，去创建一个SelectInst然后再删除原来指令，创建一个store指令，再去创建一个跳转指令跳转到loopend。
原来：

NodeBlock8:                                       ; preds = %entry
  %Pivot9 = icmp slt i32 %2, 2
  br i1 %Pivot9, label %LeafBlock, label %NodeBlock

改变后：

NodeBlock8:                                       ; preds = %entry
  %Pivot9 = icmp slt i32 %2, 2
  %3 = select i1 %Pivot9, i32 -1519555718, i32 241816174
  store i32 %3, i32* %switchVar
  br label %loopEnd

目前代码

define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %.reg2mem = alloca i32
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #3
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  store i32 %2, i32* %.reg2mem
  %switchVar = alloca i32
  store i32 157301900, i32* %switchVar
  br label %loopEntry

loopEntry:                                        ; preds = %entry, %loopEnd
  %switchVar10 = load i32, i32* %switchVar
  switch i32 %switchVar10, label %switchDefault [
    i32 157301900, label %NodeBlock8
    i32 241816174, label %NodeBlock
    i32 1003739776, label %LeafBlock6
    i32 -1519555718, label %LeafBlock
    i32 -749093422, label %sw.bb
    i32 1599617141, label %sw.bb2
    i32 1815329037, label %sw.bb4
    i32 1738940479, label %NewDefault
    i32 -282945350, label %sw.default
    i32 387774014, label %sw.epilog
    i32 1681741611, label %if.then
    i32 347219667, label %if.else
    i32 -618048859, label %return
  ]

switchDefault:                                    ; preds = %loopEntry
  br label %loopEnd

NodeBlock8:                                       ; preds = %entry
  %Pivot9 = icmp slt i32 %2, 2
  %3 = select i1 %Pivot9, i32 -1519555718, i32 241816174
  store i32 %3, i32* %switchVar
  br label %loopEnd

NodeBlock:                                        ; preds = %NodeBlock8
  %Pivot = icmp slt i32 %2, 3
  %4 = select i1 %Pivot, i32 1599617141, i32 1003739776
  store i32 %4, i32* %switchVar
  br label %loopEnd

LeafBlock6:                                       ; preds = %NodeBlock
  %SwitchLeaf7 = icmp eq i32 %2, 3
  %5 = select i1 %SwitchLeaf7, i32 1815329037, i32 1738940479
  store i32 %5, i32* %switchVar
  br label %loopEnd

LeafBlock:                                        ; preds = %NodeBlock8
  %SwitchLeaf = icmp eq i32 %2, 1
  %6 = select i1 %SwitchLeaf, i32 -749093422, i32 1738940479
  store i32 %6, i32* %switchVar
  br label %loopEnd

sw.bb:                                            ; preds = %LeafBlock
  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
  store i32 387774014, i32* %switchVar
  br label %loopEnd

sw.bb2:                                           ; preds = %NodeBlock
  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0))
  br label %sw.epilog

sw.bb4:                                           ; preds = %LeafBlock6
  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i64 0, i64 0))
  store i32 387774014, i32* %switchVar
  br label %loopEnd

NewDefault:                                       ; preds = %LeafBlock6, %LeafBlock
  br label %sw.default

sw.default:                                       ; preds = %NewDefault
  store i32 387774014, i32* %switchVar
  br label %loopEnd

sw.epilog:                                        ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
  %3 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %3, 0
  %8 = select i1 %cmp, i32 1681741611, i32 347219667
  store i32 %8, i32* %switchVar
  br label %loopEnd

if.then:                                          ; preds = %sw.epilog
  store i32 1, i32* %retval, align 4
  store i32 -618048859, i32* %switchVar
  br label %loopEnd

if.else:                                          ; preds = %sw.epilog
  store i32 10, i32* %retval, align 4
  store i32 -618048859, i32* %switchVar
  br label %loopEnd

return:                                           ; preds = %if.else, %if.then
  %4 = load i32, i32* %retval, align 4
  ret i32 %4

loopEnd:                                          ; preds = %if.else, %if.then, %sw.epilog, %sw.default, %NewDefault, %sw.bb4, %sw.bb2, %sw.bb, %LeafBlock, %LeafBlock6, %NodeBlock, %NodeBlock8, %switchDefault
  br label %loopEntry
}

附带一句，进行控制流平坦化之前刚开始会把switch语句给它全部进行改为if else目的主要是为了进行多次平坦化做准备（进行平坦化时里面是可以填次数）撒花撒花🎉

魔改平坦化

正常流程就是首先进入entry，entry会给一个case常量值，然后进入loopentry，loopentry根据这个常量值进行switch分发跳转到case常量对应的基本块，基本块执行完又会赋值一个case常量值，跳转到loopend，loopend又会跳转到loopentry进行下一次分发。（感觉特征点的话就是entry和loopend都会跳转到loopentry）：
在这里插入图片描述

正常的控制流平坦化源码已经被研究透了，大部分的方法都是基于定位loopend基本块，实际上，这种简单的IR结构编译成汇编代码也是这个样子，很容易定位到loopend基本块。在这里，我们的魔改方法就直接在loopend基本块后面加一个新的switchInst，然后跳转到基本块，相当于从开始的switch转移到后面来了。

只需要在原来的代码后面新增对loopend基本块的操作即可：抹除原来的跳转（或者就是再准备骨架的时候让loopend不建立跳转到loopbegin的指令），插入loadInst加载swichvar，再用于swichInst跳转：

	LoadInst *sw_var=new LoadInst(switchVar,"",loopEnd);
    SwitchInst * sw2=SwitchInst::create(sw_var,origBB.begin(),0,loopEnd);
    for(std::vector<BasicBlock>::iterator b =origBB.begin();b!=origBB.end(),b++){
      BasicBlock *bb=*b;
      ConstantInt *tmp=sw->findCaseDest(bb);
      sw2->addCase(tmp,bb);
    }
    std::vector<PHINode *>tmpPhi;

在这里插入图片描述
魔改后确实是平坦化的逻辑，但CFG变得难以分类了，并且loopend难以用上面的那种思维去定位了，但是仍然可以通过basicblock的入度最多的来进行定位loopend，然后去找到所有的真实块，这里的话就可以去使用多个loopend，使得真实块分配跳转到不同的loopend基本块处，这样就难找到所有的loopend和真实块了。

魔改源码：

void DoFlatten(Function *f,int seed,int loopEndNum)
              {
                        srand(seed);
                        std::vector<BasicBlock*> origBB;
                        getBlocks(f,&origBB);
                        if(origBB.size()<=1 || origBB.size()<loopEndNum)
                                return ;
                        unsigned int rand_val=seed;
                        Function::iterator  tmp=f->begin();
                        BasicBlock *oldEntry=&*tmp;
                        origBB.erase(origBB.begin());
                        BranchInst *firstBr=NULL;
                        if(isa<BranchInst>(oldEntry->getTerminator()))
                                firstBr=cast<BranchInst>(oldEntry->getTerminator());
                        if((firstBr!=NULL && firstBr->isConditional()) || oldEntry->getTerminator()->getNumSuccessors()>2)                //Split the first basic block
                        {
                                BasicBlock::iterator iter=oldEntry->end();
                                iter--;
                                if(oldEntry->size()>1)
                                        iter--;
                                BasicBlock *splited=oldEntry->splitBasicBlock(iter,Twine("FirstBB"));
                                origBB.insert(origBB.begin(),splited);
                        }
                        BasicBlock *newEntry=oldEntry;                                                                                                //Prepare basic block
                        BasicBlock *loopBegin=BasicBlock::Create(f->getContext(),"LoopBegin",f,newEntry);
                        BasicBlock *defaultCase=BasicBlock::Create(f->getContext(),"DefaultCase",f,newEntry);
                        std::vector<BasicBlock*> loopEndBlocks;
                        for(int i=0;i<loopEndNum;i++)
                                loopEndBlocks.push_back(BasicBlock::Create(f->getContext(),"LoopEnd",f,newEntry));
                        newEntry->moveBefore(loopBegin);
                        BranchInst::Create(origBB.at(rand()%origBB.size()),defaultCase);                                        //Create branch instruction,link basic blocks
                    newEntry->getTerminator()->eraseFromParent();
                    BranchInst::Create(loopBegin,newEntry);
                    AllocaInst *switchVar=new AllocaInst(Type::getInt32Ty(f->getContext()),0,Twine("switchVar"),newEntry->getTerminator());                //Create switch variable
                    LoadInst *value=new LoadInst(switchVar,"cmd",loopBegin);
                        SwitchInst *sw=SwitchInst::Create(value,defaultCase,0,loopBegin);
                        std::vector<unsigned int> rand_list;
                        unsigned int startNum=0;
                        for(std::vector<BasicBlock *>::iterator b=origBB.begin();b!=origBB.end();b++)                                                        //Put basic blocks into switch structure
                        {
                                BasicBlock *block=*b;
                                block->moveBefore(*loopEndBlocks.begin());
                                unsigned int num=getUniqueNumber(&rand_list);
                                rand_list.push_back(num);
                                if(b==origBB.begin())
                                        startNum=num;
                                ConstantInt *numCase=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
                                sw->addCase(numCase,block);
                        }
                        ConstantInt *startVal=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),startNum));                //Set the entry value
                        new StoreInst(startVal,switchVar,newEntry->getTerminator());
                        errs()<<"Put Block Into Switch\n";
                        int every=(int)((double)origBB.size()/(double)loopEndNum);
                        int counter=0;
                        std::vector<BasicBlock *>::iterator end_iter=loopEndBlocks.begin();
                        for(std::vector<BasicBlock *>::iterator b=origBB.begin();b!=origBB.end();b++)                                                        //Handle successors
                        {
                                BasicBlock *block=*b;
                                if(counter==every)
                                {
                                        counter=0;
                                        end_iter++;
                                }
                                BasicBlock *loopEnd=*end_iter;
                                if(block->getTerminator()->getNumSuccessors()==1)
                                {
                                        errs()<<"This block has 1 successor\n";
                                        BasicBlock *succ=block->getTerminator()->getSuccessor(0);
                                        ConstantInt *caseNum=sw->findCaseDest(succ);
                                        if(caseNum==NULL)
                                        {
                                                unsigned int num=getUniqueNumber(&rand_list);
                                                rand_list.push_back(num);
                                                caseNum=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
                                        }
                                        block->getTerminator()->eraseFromParent();
                                        new StoreInst(caseNum,switchVar,block);
                                        BranchInst::Create(loopEnd,block);
                                }
                                else if(block->getTerminator()->getNumSuccessors()==2)
                                {
                                        errs()<<"This block has 2 successors\n";
                                        BasicBlock *succTrue=block->getTerminator()->getSuccessor(0);
                                        BasicBlock *succFalse=block->getTerminator()->getSuccessor(1);
                                        ConstantInt *numTrue=sw->findCaseDest(succTrue);
                                        ConstantInt *numFalse=sw->findCaseDest(succFalse);
                                        if(numTrue==NULL)
                                        {
                                                unsigned int num=getUniqueNumber(&rand_list);
                                                rand_list.push_back(num);
                                                numTrue=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
                                        }
                                        if(numFalse==NULL)
                                        {
                                                unsigned int num=getUniqueNumber(&rand_list);
                                                rand_list.push_back(num);
                                                numFalse=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
                                        }
                                        BranchInst *oldBr=cast<BranchInst>(block->getTerminator());
                                        SelectInst *select=SelectInst::Create(oldBr->getCondition(),numTrue,numFalse,Twine("choice"),block->getTerminator());
                                        block->getTerminator()->eraseFromParent();
                                        new StoreInst(select,switchVar,block);
                                        BranchInst::Create(loopEnd,block);
                                }
                                counter++;
                        }
                        for(std::vector<BasicBlock*>::iterator x=loopEndBlocks.begin();x!=loopEndBlocks.end();x++)
                        {
                                BasicBlock *loopEnd=*x;
                                LoadInst *sw_val=new LoadInst(switchVar,"",loopEnd);
                                SwitchInst *sw2=SwitchInst::Create(sw_val,origBB.at(rand()%origBB.size()),0,loopEnd);
                                for(std::vector<BasicBlock*>::iterator b=origBB.begin();b!=origBB.end();b++)
                                {
                                        BasicBlock *bb=*b;
                                        ConstantInt *tmp=sw->findCaseDest(bb);
                                        sw2->addCase(tmp,bb);
                                }
                        }
                        std::vector<PHINode *> tmpPhi;
                    std::vector<Instruction *> tmpReg;
                        BasicBlock *bbEntry = &*f->begin();
                        do
                        {
                                tmpPhi.clear();
                                tmpReg.clear();
                                for(Function::iterator i = f->begin();i!=f->end();i++)
                                {
                                        for( BasicBlock::iterator j=i->begin();j!=i->end();j++)
                                        {
                                                if(isa<PHINode>(j))
                                                {
                                                        PHINode *phi=cast<PHINode>(j);
                                                        tmpPhi.push_back(phi);
                                                        continue;
                                                }
                                                if (!(isa<AllocaInst>(j) && j->getParent()==bbEntry) && (valueEscapes(&*j) || j->isUsedOutsideOfBlock(&*i)))
                                                {
                                                        tmpReg.push_back(&*j);
                                                        continue;
                                                }
                                        }
                                }
                                for(unsigned int i=0;i<tmpReg.size();i++)
                                        DemoteRegToStack(*tmpReg.at(i),f->begin()->getTerminator());
                                for(unsigned int i=0;i<tmpPhi.size();i++)
                                        DemotePHIToStack(tmpPhi.at(i),f->begin()->getTerminator());
                        }
                        while(tmpReg.size()!= 0 || tmpPhi.size()!= 0);
                        errs()<<"Finish\n";
              }

函数合并pass

如何去实现这样的功能，我们可以写一个ModulePass，模块pass来处理，首先得定位哪些函数需要被合并，然后加入到一个List之中进行处理，先创建一个函数MixFunction，它的参数将所有的函数的参数合并，然后将List中的所有函数CopyInto这个MixFunction之中，然后修正一下各个函数对参数的引用，这时的函数内部的逻辑并不正确,需要修正一下，具体的修正过程就是添加个switch指令，根据MixFunction添加的最后一个函数来确定选用哪个原函数的BasicBlock块
在这里插入图片描述
此时MixFunction已经被修正了，可以作为单独的函数了，但是源程序的所有Call指令Call的都是原来的函数，MixFunction并没有被调用，所以我们需要遍历所有指令，然后替换掉这些Call指令，让他变成Call我们的MixFunction，传入的最后一个参数标识使用的MixFunction的哪个函数部分

#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/CFG.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include<vector>
#include<algorithm>
#include<map>
#include<ctime>
#include<cstdlib>
using namespace llvm;
namespace
{
        struct CombineFunction : public ModulePass
        {
            static char ID;
                   CombineFunction() : ModulePass(ID) {}
              std::vector<BasicBlock*> *getBlocks(Function *function,std::vector<BasicBlock*> *lists)    //得到函数下的所有基本块
              {
                      lists->clear();
                      for(BasicBlock &basicBlock:*function)
                              lists->push_back(&basicBlock);
                      return lists;
              }
              std::vector<Function*> *getFunctions(Module *module,std::vector<Function*> *lists)    //得到模块下的所有函数
              {
                      lists->clear();
                      for(Function &func:*module)
                              lists->push_back(&func);
                      return lists;
              }
                std::string readAnnotate(Function *f)     //从OLLVM借用的，用于读取函数的标识，确定那些函数需要合并在一起
                {
                        std::string annotation = "";
                        /* Get annotation variable */
                        GlobalVariable *glob=f->getParent()->getGlobalVariable( "llvm.global.annotations" );
                        if ( glob != NULL )
                        {
                                /* Get the array */
                                if ( ConstantArray * ca = dyn_cast<ConstantArray>( glob->getInitializer() ) )
                                {
                                        for ( unsigned i = 0; i < ca->getNumOperands(); ++i )
                                        {
                                                /* Get the struct */
                                                if ( ConstantStruct * structAn = dyn_cast<ConstantStruct>( ca->getOperand( i ) ) )
                                                {
                                                        if ( ConstantExpr * expr = dyn_cast<ConstantExpr>( structAn->getOperand( 0 ) ) )
                                                        {
                                                                /*
                                                                 * If it's a bitcast we can check if the annotation is concerning
                                                                 * the current function
                                                                 */
                                                                if ( expr->getOpcode() == Instruction::BitCast && expr->getOperand( 0 ) == f )
                                                                {
                                                                        ConstantExpr *note = cast<ConstantExpr>( structAn->getOperand( 1 ) );
                                                                        /*
                                                                         * If it's a GetElementPtr, that means we found the variable
                                                                         * containing the annotations
                                                                         */
                                                                        if ( note->getOpcode() == Instruction::GetElementPtr )
                                                                        {
                                                                                if ( GlobalVariable * annoteStr = dyn_cast<GlobalVariable>( note->getOperand( 0 ) ) )
                                                                                {
                                                                                        if ( ConstantDataSequential * data = dyn_cast<ConstantDataSequential>( annoteStr->getInitializer() ) )
                                                                                        {
                                                                                                if ( data->isString() )
                                                                                                {
                                                                                                        annotation += data->getAsString().lower() + " ";
                                                                                                }
                                                                                        }
                                                                                }
                                                                        }
                                                                }
                                                        }
                                                }
                                        }
                                }
                        }
                        return(annotation);
                }
                Function *Combine(std::vector<Function*> *func_list,ValueToValueMapTy* VMap,Twine name,std::vector<unsigned int> *argPosList)    //单纯的将原来的函数放入MixFunction
                {
                        if(func_list->size()<1)    //如果只有一个函数，没必要合并
                                return NULL;
                        errs()<<"Check Function Type\n";
                        for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)    //遍历vector中的函数
                        {
                                Function *func=*f;
                                if(func->isDeclaration() || func->hasAvailableExternallyLinkage()!=0 || func->getFunctionType()->isVarArg()!=false)    //检测函数是否有内容(有BasicBlock),是否是变长参数，这里都处理不了
                                        return NULL;
                        }
                        errs()<<"        Done\n";
                        errs()<<"Prepare Function Type\n";
                        std::vector<Type*> ArgTypes;
                        for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)  
                        {
                                Function *func=*f;
                                for(Argument &I:func->args())    //对于每个函数，将其参数类型保存下来，用于建立函数MixFunction
                                        ArgTypes.push_back(I.getType());
                        }
                        errs()<<"        Done\n";
                        errs()<<"Check Function Return Type\n";
                        Function *first=*func_list->begin();
                        ArgTypes.push_back(Type::getInt32Ty(first->getParent()->getContext()));    //添加最后一个参数用于标识函数部分
                        for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)
                        {
                                Function *func=*f;
                                if(func->getFunctionType()->getReturnType()!=first->getFunctionType()->getReturnType())    //检验函数的返回值是否相等
                                        return NULL;
                                if(func->getParent()!=first->getParent())    //检验函数是否在同一个模块内(这显然)
                                        return NULL;
                                if(func->getLinkage()!=first->getLinkage())
                                        return NULL;
                        }
                        FunctionType *fty=FunctionType::get(first->getFunctionType()->getReturnType(),ArgTypes,false);    //声明函数的类型
                        Function *result=Function::Create(fty,first->getLinkage(),first->getAddressSpace(),name,first->getParent());    //创建函数
                        Function ::arg_iterator iter=result->arg_begin();    //迭代器指向MixFunction函数的参数
                        errs()<<"        Done\n";
                        errs()<<"Start Working\n";
                        unsigned int index=0;
                        for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)    //对于每个函数
                        {
                                Function *func=*f;
                                argPosList->push_back(index);    //存储每个函数参数起始位置是第几个
                                for(Argument &I:func->args())    //对于每个参数
                                        (*VMap)[&I]=&*iter++,index++;    //添加参数映射，用于CloneInto，迭代器++，位置序号++
                        }
                        SmallVector<ReturnInst*,8> returns;
                        ClonedCodeInfo CodeInfo;
                        for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)
                        {
                                Function *func=*f;
                                CloneFunctionInto(result,func,*VMap,func->getSubprogram()!= nullptr,returns,"",&CodeInfo);    //开始将原来的函数一个个克隆到MixFunction
                        }
                        errs()<<"        Done\n";
                        return result;
                }
                unsigned int getUniqueNumber(std::vector<unsigned int> *rand_list)    //用于保证不会出现重复的随机数
                {
                        unsigned int num=rand();
                        while(true)
                        {
                                bool state=true;
                                for(std::vector<unsigned int>::iterator n=rand_list->begin();n!=rand_list->end();n++)
                                        if(*n==num)
                                        {
                                                state=false;
                                                break;
                                        }
                                if(state)
                                        break;
                                num=rand();
                        }
                        return num;
                }
                bool FixFunction(Function *target,std::vector<Function*> *orig_list,ValueToValueMapTy *VMap,std::vector<unsigned int> *valueList)    //将函数逻辑链接起来，使得MixFunction逻辑正确
                {
                        std::vector<BasicBlock*> entryBlocks;
                        std::vector<BasicBlock*> bodyBlock;
                        errs()<<"Get all entry blocks\n";
                        for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++)    //遍历函数，保存下MixFunction下原来组成函数的入口BasicBlock
                        {
                                Function *func=*f;
                                BasicBlock *entry=&*func->begin();
                                Value *ptr=(Value*)VMap->lookup(entry);    //从CloneFunctionInto返回的Map中，找到原来函数入口BasicBlock对应在MixFunction中的入口BasicBlock
                                if(isa<BasicBlock>(*ptr))
                                        entryBlocks.push_back((BasicBlock*)ptr);
                                else
                                        return false;
                        }
                        getBlocks(target,&bodyBlock);
                        errs()<<"        Done\n";
                        errs()<<"Build switch\n";
                        BasicBlock *entry=BasicBlock::Create(target->getContext(),"Entry",target);    //建立新的入口BasicBlock
                        BasicBlock *selector=BasicBlock::Create(target->getContext(),"Selector",target);    //建立选择BasicBlock，其中包含SwitchInst
                        entry->moveBefore(*entryBlocks.begin());    //表面上的movebefore
                        selector->moveBefore(*entryBlocks.begin());
                        AllocaInst *var=new AllocaInst(Type::getInt32Ty(target->getContext()),0,Twine("switchVar"),entry);    //在入口BasicBlock中分配一个变量
                        Function::arg_iterator iter=target->arg_end();
                        Value *controlArg=--iter;    //找到最后一个参数
                        new StoreInst(controlArg,var,entry);    //把最后一个参数的值储存到刚才分配的变量中
                        BranchInst::Create(selector,entry);    //把入口BasicBlock和selector的BasicBlock通过跳转指令链接起来
                        LoadInst *load=new LoadInst(var,Twine(""),selector);    //在selector的BasicBlock中把保存的变量加载出来
                        BasicBlock *endBlock=BasicBlock::Create(target->getContext(),"DefaultEnd",target);    //创建结束的BasicBlock
                        ReturnInst *ret=ReturnInst::Create(target->getContext(),Constant::getNullValue(target->getFunctionType()->getReturnType()),endBlock);    //在结束的BasicBlock中创建返回语句
                        SwitchInst *sw=SwitchInst::Create(load,endBlock,0,selector);    [/font][/size][size=2][font=微软雅黑][size=2][font=微软雅黑]//在selector的BasicBlock后添加SwitchInst[/font][/size]                        std::vector<unsigned int> rand_list;
                        std::vector<BasicBlock*>::iterator bblist_iter=entryBlocks.begin();
                        for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++)    //将原来函数在MixFunction中的入口块放入到SwitchInst的case中
                        {
                                unsigned int val=getUniqueNumber(&rand_list);    //分配一个随机数
                                rand_list.push_back(val);
                                ConstantInt *numCase=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),val));
                                valueList->push_back(val);
                                sw->addCase(numCase,*bblist_iter);
                                bblist_iter++;
                        }
                        errs()<<"        Done\n";
                        errs()<<"Add useless code\n";
                        for(std::vector<BasicBlock *>::iterator b=bodyBlock.begin();b!=bodyBlock.end();b++)    //添加一些垃圾指令，其实没有必要
                        {
                                BasicBlock *basicBlock=*b;
                                BranchInst *br=NULL;
                                if(isa<BranchInst>(*basicBlock->getTerminator()))
                                {
                                        br=(BranchInst*)basicBlock->getTerminator();
                                        if(br->isUnconditional())
                                        {
                                                BasicBlock *rand_target=entryBlocks.at(rand()%entryBlocks.size());
                                                BasicBlock *right=basicBlock->getTerminator()->getSuccessor(0);
                                                basicBlock->getTerminator()->eraseFromParent();
                                                unsigned int val=getUniqueNumber(&rand_list);
                                                rand_list.push_back(val);
                                                LoadInst *cmpValA=new LoadInst(var,Twine(""),basicBlock);
                                                ConstantInt *cmpValB=ConstantInt::get(Type::getInt32Ty(target->getContext()),val);
                                                ICmpInst *condition=new ICmpInst(*basicBlock,ICmpInst::ICMP_EQ,cmpValA,cmpValB);
                                                BranchInst::Create(rand_target,right,condition,basicBlock);    //恒成立的跳转
                                        }
                                }
 
                        }
 
                        errs()<<"        Done\n";
                        return true;
                }
                bool FixCallInst(Function *target,std::vector<Function*> *orig_list,ValueToValueMapTy *VMap,std::vector<unsigned int> *valueList,std::vector<unsigned int> *argPosList)    //修正Call指令
                {
                        std::vector<unsigned int>::iterator  v=valueList->begin(),a=argPosList->begin();
                        std::vector<CallInst*> remove_list;
                        for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++,v++,a++)    //遍历合并的每个函数
                        {
                                unsigned int val=*v,argPos=*a;
                                Function *ff=*f;
                                for(Function &func:*ff->getParent())
                                        for(BasicBlock &bb:func)
                                                for(Instruction &ii:bb)    //遍历所有的指令
                                                {
                                                        if(isa<CallInst>(ii))    //如果发现是个Call指令
                                                        {
                                                                CallInst* callInst=&cast<CallInst>(ii);
                                                                if(callInst->getCalledFunction()==ff)     //查看每个Call指令调用的是不是这个函数
                                                                {
                                                                        std::vector<Value *> arg_list;
                                                                        Function ::arg_iterator itera=target->arg_begin();    //迭代器MixFunction的参数
                                                                        User::op_iterator iterb=callInst->arg_begin();    //迭代器指向call指令的参数
                                                                        for(size_t i=0;i<target->arg_size()-1;i++,itera++)    //遍历，构造新的call指令参数
                                                                        {
                                                                                if(i>=argPos && i<argPos+callInst->arg_size())    //相应位置参数填写
                                                                                {
                                                                                        arg_list.push_back(*iterb);
                                                                                        iterb++;
                                                                                }
                                                                                else    //否则就填空值
                                                                                        arg_list.push_back(Constant::getNullValue((*itera).getType()));
                                                                        }
                                                                        arg_list.push_back(ConstantInt::get(Type::getInt32Ty(target->getContext()),val));
                                                                        CallInst *newCall=CallInst::Create(target,arg_list,Twine(""),callInst);    //创建修正过的CallInst
                                                                        remove_list.push_back(callInst);
                                                                        callInst->replaceAllUsesWith(newCall);    //[/font][/size][size=2][font=微软雅黑][size=2][font=微软雅黑]取代旧的CallInst[/font][/size][/font][/size]
[size=2][font=微软雅黑]                                                                }
                                                        }
                                                }
 
                        }
                        for(std::vector<CallInst *>::iterator c=remove_list.begin();c!=remove_list.end();c++)
                                (*c)->eraseFromParent();
                        return true;
                }
                   bool runOnModule(Module &module) override
                {
                        std::vector<Function*>  func_list;
                        getFunctions(&module,&func_list);
                        std::vector<Function*>  work_list;
                        errs()<<"Function List:\n";
                        for(std::vector<Function *>::iterator f=func_list.begin();f!=func_list.end();f++)
                        {
                                Function *func=*f;
                                errs()<<"        ";
                                errs().write_escaped(func->getName()) << '\n';
                                if(!readAnnotate(func).find("combine"))
                                {
                                        errs()<<"                -Add to work list\n";
                                        work_list.push_back(func);
                                }
                        }
                        ValueToValueMapTy VMap;
                        std::vector<unsigned int> values,argPos;
                        Function *target=Combine(&work_list,&VMap,"MixFunction",&argPos);
                        if(target==NULL)
                        {
                                errs()<<"Combine Fail\n";
                                return false;
                        }
 
                        if(!FixFunction(target,&work_list,&VMap,&values))
                        {
                                errs()<<"FixFunction Fail\n";
                                return false;
                        }
                        if(!FixCallInst(target,&work_list,&VMap,&values,&argPos))
                        {
                                errs()<<"FixCallInst Fail\n";
                                return false;
                        }
                        module.getGlobalVariable("llvm.global.annotations")->eraseFromParent();
                        for(std::vector<Function *>::iterator f=work_list.begin();f!=work_list.end();f++)
                        {
                                Function *func=*f;
                                func->eraseFromParent();
                        }
                      return false;
            }
          };
}
 
char CombineFunction ::ID=0;    //pass标准格式
static RegisterPass<CombineFunction > X("combine", "MyCombine");
 
// Register for clang
static RegisterStandardPasses Y(PassManagerBuilder::EP_EarlyAsPossible,
  [](const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
    PM.add(new CombineFunction ());
  });