文章目录
前置知识
Module是指模块,Function模块下的函数,BasicBlock函数下的基本块,Instruction 基本块下的IR指令
Flattening::flatten(Function *f)
for (Function::iterator i = f->begin(); i != f->end(); ++i) {
BasicBlock *tmp = &*i;
origBB.push_back(tmp);
BasicBlock *bb = &*i;
if (isa<InvokeInst>(bb->getTerminator())) {
return false;
}
}
把函数分成很多个基本块,并且push到vector类型的 origBB中。
判断里面基本块是否大于1,不大于1的话就没有意义去进行混淆:
if (origBB.size() <= 1) {
return false;
}
需要把vertor里面的第一个基本块即入口基本块单独拿出来进行处理:对入口基本块进行判断,如果是无条件跳转则不进行任何处理,否则需要找到最后一条指令,将整个if结构给split,split之后两个块之间会自动添加跳转指令,然后就可以把原来的split后的if结构给它扔进要处理的基本块列表。
origBB.erase(origBB.begin());
// Get a pointer on the first BB
Function::iterator tmp = f->begin(); //++tmp;
BasicBlock *insert = &*tmp;
// If main begin with an if
BranchInst *br = NULL;
if (isa<BranchInst>(insert->getTerminator())) {
br = cast<BranchInst>(insert->getTerminator());
}
if ((br != NULL && br->isConditional()) ||
insert->getTerminator()->getNumSuccessors() > 1) {
BasicBlock::iterator i = insert->end();
--i;
if (insert->size() > 1) {
--i;
}
BasicBlock *tmpBB = insert->splitBasicBlock(i, "first");
origBB.insert(origBB.begin(), tmpBB);
}
如果是条件跳转的话这里是把上面自动添加那个跳转指令给删除,如果不是的话,那么也是需要把它删除,因为跳转点目标还不能确定:
// Remove jump
insert->getTerminator()->eraseFromParent();
源代码
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
br label %NodeBlock8
目前代码
entry:
%.reg2mem = alloca i32
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
store i32 %2, i32* %.reg2mem
%switchVar = alloca i32
创建一个switchvar变量,然后去获取一个随机整数创建store指令塞给switchvar中
switchVar =
new AllocaInst(Type::getInt32Ty(f->getContext()), 0, "switchVar", insert);
new StoreInst(
ConstantInt::get(Type::getInt32Ty(f->getContext()),
llvm::cryptoutils->scramble32(0, scrambling_key)),
switchVar, insert);
也就是在switchvar添了如下这一行:
store i32 157301900, i32* %switchVar
创建switch
创建两个block,其它的基本块插入它们之间
loopEntry = BasicBlock::Create(f->getContext(), "loopEntry", f, insert);
loopEnd = BasicBlock::Create(f->getContext(), "loopEnd", f, insert);
如下:
loopEntry:
loopEnd:
目标基本块里面啥内容也没有。
在loopEntry里面新建一个load指令,并且把switchVar
load = new LoadInst(switchVar, "switchVar", loopEntry);
目前loopentry指令如下:
loopEntry: ; preds = %entry, %loopEnd
%switchVar10 = load i32, i32* %switchVar
把insert插入到loopEntry之前,这里的insert就是entry基本块,再创建两个跳转指令,从insert(即第一个基本块)跳转到loopEntry;从loopend跳转到loopEntry
// Move first BB on top
insert->moveBefore(loopEntry);
BranchInst::Create(loopEntry, insert);
// loopEnd jump to loopEntry
BranchInst::Create(loopEntry, loopEnd);
这里结束后,entry模块就完整了,如下:
entry:
%.reg2mem = alloca i32
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
store i32 %2, i32* %.reg2mem
%switchVar = alloca i32
store i32 157301900, i32* %switchVar
br label %loopEntry
而loopend模块也有了一条指令(其实也是完整了):
loopEnd:
br label %loopEntry
紧接着创建一个基本块,然后在基本块里面创建一个跳转指令,从switchDefault跳转到loopend中
BasicBlock *swDefault =
BasicBlock::Create(f->getContext(), "switchDefault", f, loopEnd);
BranchInst::Create(loopEnd, swDefault);
多了一个switchDefault基本块,指令如下:
switchDefault: ; preds = %loopEntry
br label %loopEnd
创建一个switch指令,位置是在loopentry基本块下,且创建了0个case,然后设置了条件为load,就上面的load。
switchI = SwitchInst::Create(&*f->begin(), swDefault, 0, loopEntry);
switchI->setCondition(load);
把entry最后一行跳转指令删除后再创建了一个跳转指令,从entry跳转到loopentry
f->begin()->getTerminator()->eraseFromParent();
BranchInst::Create(loopEntry, &*f->begin());
for (std::vector<BasicBlock *>::iterator b = origBB.begin();
b != origBB.end(); ++b) {
BasicBlock *i = *b;
ConstantInt *numCase = NULL;
// Move the BB inside the switch (only visual, no code logic)
i->moveBefore(loopEnd);
// Add case to switch
numCase = cast<ConstantInt>(ConstantInt::get(
switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
switchI->addCase(numCase, i);
}
目前代码:
entry:
%.reg2mem = alloca i32
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
store i32 %2, i32* %.reg2mem
%switchVar = alloca i32
store i32 157301900, i32* %switchVar
br label %loopEntry
loopEntry:
%switchVar10 = load i32, i32* %switchVar
switch i32 %switchVar10, label %switchDefault [
]
switchDefault: ; preds = %loopEntry
br label %loopEnd
loopEnd:
br label %loopEntry
创建case
for (std::vector<BasicBlock *>::iterator b = origBB.begin();
b != origBB.end(); ++b) {
BasicBlock *i = *b;
ConstantInt *numCase = NULL;
// Move the BB inside the switch (only visual, no code logic)
i->moveBefore(loopEnd);
// Add case to switch
numCase = cast<ConstantInt>(ConstantInt::get(
switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
switchI->addCase(numCase, i);
}
这里的i就是指剩下的那些case分支代码基本块,i->moveBefore(loopEnd),把某个代码基本块置于loopend之前。比如某个基本块是这样:
NodeBlock8: ; preds = %entry
%Pivot9 = icmp slt i32 %2, 2
br i1 %Pivot9, label %LeafBlock, label %NodeBlock
然后下面的这些代码就是创建一个numcase,就是case分支里面的case值,这个值它是随机生成的,种子的话是Entry.cpp里面的那个AesSeed值,如果确定AesSeed的话,那么这里随机生成的case每次都是固定的。
switchI->addCase(numCase, i);紧接着在switch里面增加一个case值,跳转到NodeBlock8里面。
目前switch执行完一次后,loopentry基本bolck块如下:
loopEntry:
%switchVar10 = load i32, i32* %switchVar
switch i32 %switchVar10, label %switchDefault [
i32 157301900, label %NodeBlock8
]
当循环执行结束后:
目前代码
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
br label %NodeBlock8
NodeBlock8: ; preds = %entry
%Pivot9 = icmp slt i32 %2, 2
br i1 %Pivot9, label %LeafBlock, label %NodeBlock
NodeBlock: ; preds = %NodeBlock8
%Pivot = icmp slt i32 %2, 3
br i1 %Pivot, label %sw.bb2, label %LeafBlock6
LeafBlock6: ; preds = %NodeBlock
%SwitchLeaf7 = icmp eq i32 %2, 3
br i1 %SwitchLeaf7, label %sw.bb4, label %NewDefault
LeafBlock: ; preds = %NodeBlock8
%SwitchLeaf = icmp eq i32 %2, 1
br i1 %SwitchLeaf, label %sw.bb, label %NewDefault
sw.bb: ; preds = %LeafBlock
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
br label %sw.epilog
sw.bb2: ; preds = %NodeBlock
%call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0))
br label %sw.epilog
sw.bb4: ; preds = %LeafBlock6
%call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i64 0, i64 0))
br label %sw.epilog
NewDefault: ; preds = %LeafBlock6, %LeafBlock
br label %sw.default
sw.default: ; preds = %NewDefault
br label %sw.epilog
sw.epilog: ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
%3 = load i32, i32* %a, align 4
%cmp = icmp eq i32 %3, 0
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %sw.epilog
store i32 1, i32* %retval, align 4
br label %return
if.else: ; preds = %sw.epilog
store i32 10, i32* %retval, align 4
br label %return
return: ; preds = %if.else, %if.then
%4 = load i32, i32* %retval, align 4
ret i32 %4
loopEnd: ; preds = %if.else, %if.then, %sw.epilog, %sw.default, %NewDefault, %sw.bb4, %sw.bb2, %sw.bb, %LeafBlock, %LeafBlock6, %NodeBlock, %NodeBlock8, %switchDefault
br label %loopEntry
}
枚举更改各个case block块
return block
// Ret BB
if (i->getTerminator()->getNumSuccessors() == 0) {
continue;
}
getNumSuccessors是获取后续BB的个数,Ret BB后继BB为0个(判断分支),直接continue
非条件跳转block
// If it's a non-conditional jump
if (i->getTerminator()->getNumSuccessors() == 1) {
// Get successor and delete terminator
BasicBlock *succ = i->getTerminator()->getSuccessor(0);
i->getTerminator()->eraseFromParent();
// Get next case
numCase = switchI->findCaseDest(succ);
// If next case == default case (switchDefault)
if (numCase == NULL) {
numCase = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}
// Update switchVar and jump to the end of loop
new StoreInst(numCase, load->getPointerOperand(), i);
BranchInst::Create(loopEnd, i);
continue;
}
如果后面只有一个分支的话,那么先判断分支是否能够找到,不为null后先去根据原来条件去创建一个store指令,然后创建一个跳转指令跳转到loopend,再把原来跳转指令抹去。
原来:
sw.bb: ; preds = %LeafBlock
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
br label %sw.epilog
改变后:
sw.bb: ; preds = %LeafBlock
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
store i32 387774014, i32* %switchVar
br label %loopEnd
条件跳转 block
if (i->getTerminator()->getNumSuccessors() == 2) {
// Get next cases
ConstantInt *numCaseTrue =
switchI->findCaseDest(i->getTerminator()->getSuccessor(0));
ConstantInt *numCaseFalse =
switchI->findCaseDest(i->getTerminator()->getSuccessor(1));
// Check if next case == default case (switchDefault)
if (numCaseTrue == NULL) {
numCaseTrue = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}
if (numCaseFalse == NULL) {
numCaseFalse = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}
// Create a SelectInst
BranchInst *br = cast<BranchInst>(i->getTerminator());
SelectInst *sel =
SelectInst::Create(br->getCondition(), numCaseTrue, numCaseFalse, "",
i->getTerminator());
// Erase terminator
i->getTerminator()->eraseFromParent();
// Update switchVar and jump to the end of loop
new StoreInst(sel, load->getPointerOperand(), i);
BranchInst::Create(loopEnd, i);
continue;
}
首先会把两个跳转分支都取出来,先判断两个分支是否都能够找到,如果都不为null 的话,那么取出原来的跳转指令,根据br的两个分支条件,去创建一个SelectInst然后再删除原来指令,创建一个store指令,再去创建一个跳转指令跳转到loopend。
原来:
NodeBlock8: ; preds = %entry
%Pivot9 = icmp slt i32 %2, 2
br i1 %Pivot9, label %LeafBlock, label %NodeBlock
改变后:
NodeBlock8: ; preds = %entry
%Pivot9 = icmp slt i32 %2, 2
%3 = select i1 %Pivot9, i32 -1519555718, i32 241816174
store i32 %3, i32* %switchVar
br label %loopEnd
目前代码
define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%.reg2mem = alloca i32
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%a = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
%1 = load i8*, i8** %arrayidx, align 8
%call = call i32 @atoi(i8* %1) #3
store i32 %call, i32* %a, align 4
%2 = load i32, i32* %a, align 4
store i32 %2, i32* %.reg2mem
%switchVar = alloca i32
store i32 157301900, i32* %switchVar
br label %loopEntry
loopEntry: ; preds = %entry, %loopEnd
%switchVar10 = load i32, i32* %switchVar
switch i32 %switchVar10, label %switchDefault [
i32 157301900, label %NodeBlock8
i32 241816174, label %NodeBlock
i32 1003739776, label %LeafBlock6
i32 -1519555718, label %LeafBlock
i32 -749093422, label %sw.bb
i32 1599617141, label %sw.bb2
i32 1815329037, label %sw.bb4
i32 1738940479, label %NewDefault
i32 -282945350, label %sw.default
i32 387774014, label %sw.epilog
i32 1681741611, label %if.then
i32 347219667, label %if.else
i32 -618048859, label %return
]
switchDefault: ; preds = %loopEntry
br label %loopEnd
NodeBlock8: ; preds = %entry
%Pivot9 = icmp slt i32 %2, 2
%3 = select i1 %Pivot9, i32 -1519555718, i32 241816174
store i32 %3, i32* %switchVar
br label %loopEnd
NodeBlock: ; preds = %NodeBlock8
%Pivot = icmp slt i32 %2, 3
%4 = select i1 %Pivot, i32 1599617141, i32 1003739776
store i32 %4, i32* %switchVar
br label %loopEnd
LeafBlock6: ; preds = %NodeBlock
%SwitchLeaf7 = icmp eq i32 %2, 3
%5 = select i1 %SwitchLeaf7, i32 1815329037, i32 1738940479
store i32 %5, i32* %switchVar
br label %loopEnd
LeafBlock: ; preds = %NodeBlock8
%SwitchLeaf = icmp eq i32 %2, 1
%6 = select i1 %SwitchLeaf, i32 -749093422, i32 1738940479
store i32 %6, i32* %switchVar
br label %loopEnd
sw.bb: ; preds = %LeafBlock
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
store i32 387774014, i32* %switchVar
br label %loopEnd
sw.bb2: ; preds = %NodeBlock
%call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0))
br label %sw.epilog
sw.bb4: ; preds = %LeafBlock6
%call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i64 0, i64 0))
store i32 387774014, i32* %switchVar
br label %loopEnd
NewDefault: ; preds = %LeafBlock6, %LeafBlock
br label %sw.default
sw.default: ; preds = %NewDefault
store i32 387774014, i32* %switchVar
br label %loopEnd
sw.epilog: ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
%3 = load i32, i32* %a, align 4
%cmp = icmp eq i32 %3, 0
%8 = select i1 %cmp, i32 1681741611, i32 347219667
store i32 %8, i32* %switchVar
br label %loopEnd
if.then: ; preds = %sw.epilog
store i32 1, i32* %retval, align 4
store i32 -618048859, i32* %switchVar
br label %loopEnd
if.else: ; preds = %sw.epilog
store i32 10, i32* %retval, align 4
store i32 -618048859, i32* %switchVar
br label %loopEnd
return: ; preds = %if.else, %if.then
%4 = load i32, i32* %retval, align 4
ret i32 %4
loopEnd: ; preds = %if.else, %if.then, %sw.epilog, %sw.default, %NewDefault, %sw.bb4, %sw.bb2, %sw.bb, %LeafBlock, %LeafBlock6, %NodeBlock, %NodeBlock8, %switchDefault
br label %loopEntry
}
附带一句,进行控制流平坦化之前刚开始会把switch语句给它全部进行改为if else目的主要是为了进行多次平坦化做准备(进行平坦化时里面是可以填次数)撒花撒花🎉
魔改平坦化
正常流程就是首先进入entry,entry会给一个case常量值,然后进入loopentry,loopentry根据这个常量值进行switch分发跳转到case常量对应的基本块,基本块执行完又会赋值一个case常量值,跳转到loopend,loopend又会跳转到loopentry进行下一次分发。(感觉特征点的话就是entry和loopend都会跳转到loopentry):
正常的控制流平坦化源码已经被研究透了,大部分的方法都是基于定位loopend基本块,实际上,这种简单的IR结构编译成汇编代码也是这个样子,很容易定位到loopend基本块。在这里,我们的魔改方法就直接在loopend基本块后面加一个新的switchInst,然后跳转到基本块,相当于从开始的switch转移到后面来了。
只需要在原来的代码后面新增对loopend基本块的操作即可:抹除原来的跳转(或者就是再准备骨架的时候让loopend不建立跳转到loopbegin的指令),插入loadInst加载swichvar,再用于swichInst跳转:
LoadInst *sw_var=new LoadInst(switchVar,"",loopEnd);
SwitchInst * sw2=SwitchInst::create(sw_var,origBB.begin(),0,loopEnd);
for(std::vector<BasicBlock>::iterator b =origBB.begin();b!=origBB.end(),b++){
BasicBlock *bb=*b;
ConstantInt *tmp=sw->findCaseDest(bb);
sw2->addCase(tmp,bb);
}
std::vector<PHINode *>tmpPhi;
魔改后确实是平坦化的逻辑,但CFG变得难以分类了,并且loopend难以用上面的那种思维去定位了,但是仍然可以通过basicblock的入度最多的来进行定位loopend,然后去找到所有的真实块,这里的话就可以去使用多个loopend,使得真实块分配跳转到不同的loopend基本块处,这样就难找到所有的loopend和真实块了。
魔改源码:
void DoFlatten(Function *f,int seed,int loopEndNum)
{
srand(seed);
std::vector<BasicBlock*> origBB;
getBlocks(f,&origBB);
if(origBB.size()<=1 || origBB.size()<loopEndNum)
return ;
unsigned int rand_val=seed;
Function::iterator tmp=f->begin();
BasicBlock *oldEntry=&*tmp;
origBB.erase(origBB.begin());
BranchInst *firstBr=NULL;
if(isa<BranchInst>(oldEntry->getTerminator()))
firstBr=cast<BranchInst>(oldEntry->getTerminator());
if((firstBr!=NULL && firstBr->isConditional()) || oldEntry->getTerminator()->getNumSuccessors()>2) //Split the first basic block
{
BasicBlock::iterator iter=oldEntry->end();
iter--;
if(oldEntry->size()>1)
iter--;
BasicBlock *splited=oldEntry->splitBasicBlock(iter,Twine("FirstBB"));
origBB.insert(origBB.begin(),splited);
}
BasicBlock *newEntry=oldEntry; //Prepare basic block
BasicBlock *loopBegin=BasicBlock::Create(f->getContext(),"LoopBegin",f,newEntry);
BasicBlock *defaultCase=BasicBlock::Create(f->getContext(),"DefaultCase",f,newEntry);
std::vector<BasicBlock*> loopEndBlocks;
for(int i=0;i<loopEndNum;i++)
loopEndBlocks.push_back(BasicBlock::Create(f->getContext(),"LoopEnd",f,newEntry));
newEntry->moveBefore(loopBegin);
BranchInst::Create(origBB.at(rand()%origBB.size()),defaultCase); //Create branch instruction,link basic blocks
newEntry->getTerminator()->eraseFromParent();
BranchInst::Create(loopBegin,newEntry);
AllocaInst *switchVar=new AllocaInst(Type::getInt32Ty(f->getContext()),0,Twine("switchVar"),newEntry->getTerminator()); //Create switch variable
LoadInst *value=new LoadInst(switchVar,"cmd",loopBegin);
SwitchInst *sw=SwitchInst::Create(value,defaultCase,0,loopBegin);
std::vector<unsigned int> rand_list;
unsigned int startNum=0;
for(std::vector<BasicBlock *>::iterator b=origBB.begin();b!=origBB.end();b++) //Put basic blocks into switch structure
{
BasicBlock *block=*b;
block->moveBefore(*loopEndBlocks.begin());
unsigned int num=getUniqueNumber(&rand_list);
rand_list.push_back(num);
if(b==origBB.begin())
startNum=num;
ConstantInt *numCase=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
sw->addCase(numCase,block);
}
ConstantInt *startVal=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),startNum)); //Set the entry value
new StoreInst(startVal,switchVar,newEntry->getTerminator());
errs()<<"Put Block Into Switch\n";
int every=(int)((double)origBB.size()/(double)loopEndNum);
int counter=0;
std::vector<BasicBlock *>::iterator end_iter=loopEndBlocks.begin();
for(std::vector<BasicBlock *>::iterator b=origBB.begin();b!=origBB.end();b++) //Handle successors
{
BasicBlock *block=*b;
if(counter==every)
{
counter=0;
end_iter++;
}
BasicBlock *loopEnd=*end_iter;
if(block->getTerminator()->getNumSuccessors()==1)
{
errs()<<"This block has 1 successor\n";
BasicBlock *succ=block->getTerminator()->getSuccessor(0);
ConstantInt *caseNum=sw->findCaseDest(succ);
if(caseNum==NULL)
{
unsigned int num=getUniqueNumber(&rand_list);
rand_list.push_back(num);
caseNum=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
}
block->getTerminator()->eraseFromParent();
new StoreInst(caseNum,switchVar,block);
BranchInst::Create(loopEnd,block);
}
else if(block->getTerminator()->getNumSuccessors()==2)
{
errs()<<"This block has 2 successors\n";
BasicBlock *succTrue=block->getTerminator()->getSuccessor(0);
BasicBlock *succFalse=block->getTerminator()->getSuccessor(1);
ConstantInt *numTrue=sw->findCaseDest(succTrue);
ConstantInt *numFalse=sw->findCaseDest(succFalse);
if(numTrue==NULL)
{
unsigned int num=getUniqueNumber(&rand_list);
rand_list.push_back(num);
numTrue=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
}
if(numFalse==NULL)
{
unsigned int num=getUniqueNumber(&rand_list);
rand_list.push_back(num);
numFalse=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),num));
}
BranchInst *oldBr=cast<BranchInst>(block->getTerminator());
SelectInst *select=SelectInst::Create(oldBr->getCondition(),numTrue,numFalse,Twine("choice"),block->getTerminator());
block->getTerminator()->eraseFromParent();
new StoreInst(select,switchVar,block);
BranchInst::Create(loopEnd,block);
}
counter++;
}
for(std::vector<BasicBlock*>::iterator x=loopEndBlocks.begin();x!=loopEndBlocks.end();x++)
{
BasicBlock *loopEnd=*x;
LoadInst *sw_val=new LoadInst(switchVar,"",loopEnd);
SwitchInst *sw2=SwitchInst::Create(sw_val,origBB.at(rand()%origBB.size()),0,loopEnd);
for(std::vector<BasicBlock*>::iterator b=origBB.begin();b!=origBB.end();b++)
{
BasicBlock *bb=*b;
ConstantInt *tmp=sw->findCaseDest(bb);
sw2->addCase(tmp,bb);
}
}
std::vector<PHINode *> tmpPhi;
std::vector<Instruction *> tmpReg;
BasicBlock *bbEntry = &*f->begin();
do
{
tmpPhi.clear();
tmpReg.clear();
for(Function::iterator i = f->begin();i!=f->end();i++)
{
for( BasicBlock::iterator j=i->begin();j!=i->end();j++)
{
if(isa<PHINode>(j))
{
PHINode *phi=cast<PHINode>(j);
tmpPhi.push_back(phi);
continue;
}
if (!(isa<AllocaInst>(j) && j->getParent()==bbEntry) && (valueEscapes(&*j) || j->isUsedOutsideOfBlock(&*i)))
{
tmpReg.push_back(&*j);
continue;
}
}
}
for(unsigned int i=0;i<tmpReg.size();i++)
DemoteRegToStack(*tmpReg.at(i),f->begin()->getTerminator());
for(unsigned int i=0;i<tmpPhi.size();i++)
DemotePHIToStack(tmpPhi.at(i),f->begin()->getTerminator());
}
while(tmpReg.size()!= 0 || tmpPhi.size()!= 0);
errs()<<"Finish\n";
}
函数合并pass
如何去实现这样的功能,我们可以写一个ModulePass,模块pass来处理,首先得定位哪些函数需要被合并,然后加入到一个List之中进行处理,先创建一个函数MixFunction,它的参数将所有的函数的参数合并,然后将List中的所有函数CopyInto这个MixFunction之中,然后修正一下各个函数对参数的引用,这时的函数内部的逻辑并不正确,需要修正一下,具体的修正过程就是添加个switch指令,根据MixFunction添加的最后一个函数来确定选用哪个原函数的BasicBlock块
此时MixFunction已经被修正了,可以作为单独的函数了,但是源程序的所有Call指令Call的都是原来的函数,MixFunction并没有被调用,所以我们需要遍历所有指令,然后替换掉这些Call指令,让他变成Call我们的MixFunction,传入的最后一个参数标识使用的MixFunction的哪个函数部分
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/CFG.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include<vector>
#include<algorithm>
#include<map>
#include<ctime>
#include<cstdlib>
using namespace llvm;
namespace
{
struct CombineFunction : public ModulePass
{
static char ID;
CombineFunction() : ModulePass(ID) {}
std::vector<BasicBlock*> *getBlocks(Function *function,std::vector<BasicBlock*> *lists) //得到函数下的所有基本块
{
lists->clear();
for(BasicBlock &basicBlock:*function)
lists->push_back(&basicBlock);
return lists;
}
std::vector<Function*> *getFunctions(Module *module,std::vector<Function*> *lists) //得到模块下的所有函数
{
lists->clear();
for(Function &func:*module)
lists->push_back(&func);
return lists;
}
std::string readAnnotate(Function *f) //从OLLVM借用的,用于读取函数的标识,确定那些函数需要合并在一起
{
std::string annotation = "";
/* Get annotation variable */
GlobalVariable *glob=f->getParent()->getGlobalVariable( "llvm.global.annotations" );
if ( glob != NULL )
{
/* Get the array */
if ( ConstantArray * ca = dyn_cast<ConstantArray>( glob->getInitializer() ) )
{
for ( unsigned i = 0; i < ca->getNumOperands(); ++i )
{
/* Get the struct */
if ( ConstantStruct * structAn = dyn_cast<ConstantStruct>( ca->getOperand( i ) ) )
{
if ( ConstantExpr * expr = dyn_cast<ConstantExpr>( structAn->getOperand( 0 ) ) )
{
/*
* If it's a bitcast we can check if the annotation is concerning
* the current function
*/
if ( expr->getOpcode() == Instruction::BitCast && expr->getOperand( 0 ) == f )
{
ConstantExpr *note = cast<ConstantExpr>( structAn->getOperand( 1 ) );
/*
* If it's a GetElementPtr, that means we found the variable
* containing the annotations
*/
if ( note->getOpcode() == Instruction::GetElementPtr )
{
if ( GlobalVariable * annoteStr = dyn_cast<GlobalVariable>( note->getOperand( 0 ) ) )
{
if ( ConstantDataSequential * data = dyn_cast<ConstantDataSequential>( annoteStr->getInitializer() ) )
{
if ( data->isString() )
{
annotation += data->getAsString().lower() + " ";
}
}
}
}
}
}
}
}
}
}
return(annotation);
}
Function *Combine(std::vector<Function*> *func_list,ValueToValueMapTy* VMap,Twine name,std::vector<unsigned int> *argPosList) //单纯的将原来的函数放入MixFunction
{
if(func_list->size()<1) //如果只有一个函数,没必要合并
return NULL;
errs()<<"Check Function Type\n";
for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++) //遍历vector中的函数
{
Function *func=*f;
if(func->isDeclaration() || func->hasAvailableExternallyLinkage()!=0 || func->getFunctionType()->isVarArg()!=false) //检测函数是否有内容(有BasicBlock),是否是变长参数,这里都处理不了
return NULL;
}
errs()<<" Done\n";
errs()<<"Prepare Function Type\n";
std::vector<Type*> ArgTypes;
for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)
{
Function *func=*f;
for(Argument &I:func->args()) //对于每个函数,将其参数类型保存下来,用于建立函数MixFunction
ArgTypes.push_back(I.getType());
}
errs()<<" Done\n";
errs()<<"Check Function Return Type\n";
Function *first=*func_list->begin();
ArgTypes.push_back(Type::getInt32Ty(first->getParent()->getContext())); //添加最后一个参数用于标识函数部分
for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)
{
Function *func=*f;
if(func->getFunctionType()->getReturnType()!=first->getFunctionType()->getReturnType()) //检验函数的返回值是否相等
return NULL;
if(func->getParent()!=first->getParent()) //检验函数是否在同一个模块内(这显然)
return NULL;
if(func->getLinkage()!=first->getLinkage())
return NULL;
}
FunctionType *fty=FunctionType::get(first->getFunctionType()->getReturnType(),ArgTypes,false); //声明函数的类型
Function *result=Function::Create(fty,first->getLinkage(),first->getAddressSpace(),name,first->getParent()); //创建函数
Function ::arg_iterator iter=result->arg_begin(); //迭代器指向MixFunction函数的参数
errs()<<" Done\n";
errs()<<"Start Working\n";
unsigned int index=0;
for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++) //对于每个函数
{
Function *func=*f;
argPosList->push_back(index); //存储每个函数参数起始位置是第几个
for(Argument &I:func->args()) //对于每个参数
(*VMap)[&I]=&*iter++,index++; //添加参数映射,用于CloneInto,迭代器++,位置序号++
}
SmallVector<ReturnInst*,8> returns;
ClonedCodeInfo CodeInfo;
for(std::vector<Function *>::iterator f=func_list->begin();f!=func_list->end();f++)
{
Function *func=*f;
CloneFunctionInto(result,func,*VMap,func->getSubprogram()!= nullptr,returns,"",&CodeInfo); //开始将原来的函数一个个克隆到MixFunction
}
errs()<<" Done\n";
return result;
}
unsigned int getUniqueNumber(std::vector<unsigned int> *rand_list) //用于保证不会出现重复的随机数
{
unsigned int num=rand();
while(true)
{
bool state=true;
for(std::vector<unsigned int>::iterator n=rand_list->begin();n!=rand_list->end();n++)
if(*n==num)
{
state=false;
break;
}
if(state)
break;
num=rand();
}
return num;
}
bool FixFunction(Function *target,std::vector<Function*> *orig_list,ValueToValueMapTy *VMap,std::vector<unsigned int> *valueList) //将函数逻辑链接起来,使得MixFunction逻辑正确
{
std::vector<BasicBlock*> entryBlocks;
std::vector<BasicBlock*> bodyBlock;
errs()<<"Get all entry blocks\n";
for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++) //遍历函数,保存下MixFunction下原来组成函数的入口BasicBlock
{
Function *func=*f;
BasicBlock *entry=&*func->begin();
Value *ptr=(Value*)VMap->lookup(entry); //从CloneFunctionInto返回的Map中,找到原来函数入口BasicBlock对应在MixFunction中的入口BasicBlock
if(isa<BasicBlock>(*ptr))
entryBlocks.push_back((BasicBlock*)ptr);
else
return false;
}
getBlocks(target,&bodyBlock);
errs()<<" Done\n";
errs()<<"Build switch\n";
BasicBlock *entry=BasicBlock::Create(target->getContext(),"Entry",target); //建立新的入口BasicBlock
BasicBlock *selector=BasicBlock::Create(target->getContext(),"Selector",target); //建立选择BasicBlock,其中包含SwitchInst
entry->moveBefore(*entryBlocks.begin()); //表面上的movebefore
selector->moveBefore(*entryBlocks.begin());
AllocaInst *var=new AllocaInst(Type::getInt32Ty(target->getContext()),0,Twine("switchVar"),entry); //在入口BasicBlock中分配一个变量
Function::arg_iterator iter=target->arg_end();
Value *controlArg=--iter; //找到最后一个参数
new StoreInst(controlArg,var,entry); //把最后一个参数的值储存到刚才分配的变量中
BranchInst::Create(selector,entry); //把入口BasicBlock和selector的BasicBlock通过跳转指令链接起来
LoadInst *load=new LoadInst(var,Twine(""),selector); //在selector的BasicBlock中把保存的变量加载出来
BasicBlock *endBlock=BasicBlock::Create(target->getContext(),"DefaultEnd",target); //创建结束的BasicBlock
ReturnInst *ret=ReturnInst::Create(target->getContext(),Constant::getNullValue(target->getFunctionType()->getReturnType()),endBlock); //在结束的BasicBlock中创建返回语句
SwitchInst *sw=SwitchInst::Create(load,endBlock,0,selector); [/font][/size][size=2][font=微软雅黑][size=2][font=微软雅黑]//在selector的BasicBlock后添加SwitchInst[/font][/size] std::vector<unsigned int> rand_list;
std::vector<BasicBlock*>::iterator bblist_iter=entryBlocks.begin();
for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++) //将原来函数在MixFunction中的入口块放入到SwitchInst的case中
{
unsigned int val=getUniqueNumber(&rand_list); //分配一个随机数
rand_list.push_back(val);
ConstantInt *numCase=cast<ConstantInt>(ConstantInt::get(sw->getCondition()->getType(),val));
valueList->push_back(val);
sw->addCase(numCase,*bblist_iter);
bblist_iter++;
}
errs()<<" Done\n";
errs()<<"Add useless code\n";
for(std::vector<BasicBlock *>::iterator b=bodyBlock.begin();b!=bodyBlock.end();b++) //添加一些垃圾指令,其实没有必要
{
BasicBlock *basicBlock=*b;
BranchInst *br=NULL;
if(isa<BranchInst>(*basicBlock->getTerminator()))
{
br=(BranchInst*)basicBlock->getTerminator();
if(br->isUnconditional())
{
BasicBlock *rand_target=entryBlocks.at(rand()%entryBlocks.size());
BasicBlock *right=basicBlock->getTerminator()->getSuccessor(0);
basicBlock->getTerminator()->eraseFromParent();
unsigned int val=getUniqueNumber(&rand_list);
rand_list.push_back(val);
LoadInst *cmpValA=new LoadInst(var,Twine(""),basicBlock);
ConstantInt *cmpValB=ConstantInt::get(Type::getInt32Ty(target->getContext()),val);
ICmpInst *condition=new ICmpInst(*basicBlock,ICmpInst::ICMP_EQ,cmpValA,cmpValB);
BranchInst::Create(rand_target,right,condition,basicBlock); //恒成立的跳转
}
}
}
errs()<<" Done\n";
return true;
}
bool FixCallInst(Function *target,std::vector<Function*> *orig_list,ValueToValueMapTy *VMap,std::vector<unsigned int> *valueList,std::vector<unsigned int> *argPosList) //修正Call指令
{
std::vector<unsigned int>::iterator v=valueList->begin(),a=argPosList->begin();
std::vector<CallInst*> remove_list;
for(std::vector<Function *>::iterator f=orig_list->begin();f!=orig_list->end();f++,v++,a++) //遍历合并的每个函数
{
unsigned int val=*v,argPos=*a;
Function *ff=*f;
for(Function &func:*ff->getParent())
for(BasicBlock &bb:func)
for(Instruction &ii:bb) //遍历所有的指令
{
if(isa<CallInst>(ii)) //如果发现是个Call指令
{
CallInst* callInst=&cast<CallInst>(ii);
if(callInst->getCalledFunction()==ff) //查看每个Call指令调用的是不是这个函数
{
std::vector<Value *> arg_list;
Function ::arg_iterator itera=target->arg_begin(); //迭代器MixFunction的参数
User::op_iterator iterb=callInst->arg_begin(); //迭代器指向call指令的参数
for(size_t i=0;i<target->arg_size()-1;i++,itera++) //遍历,构造新的call指令参数
{
if(i>=argPos && i<argPos+callInst->arg_size()) //相应位置参数填写
{
arg_list.push_back(*iterb);
iterb++;
}
else //否则就填空值
arg_list.push_back(Constant::getNullValue((*itera).getType()));
}
arg_list.push_back(ConstantInt::get(Type::getInt32Ty(target->getContext()),val));
CallInst *newCall=CallInst::Create(target,arg_list,Twine(""),callInst); //创建修正过的CallInst
remove_list.push_back(callInst);
callInst->replaceAllUsesWith(newCall); //[/font][/size][size=2][font=微软雅黑][size=2][font=微软雅黑]取代旧的CallInst[/font][/size][/font][/size]
[size=2][font=微软雅黑] }
}
}
}
for(std::vector<CallInst *>::iterator c=remove_list.begin();c!=remove_list.end();c++)
(*c)->eraseFromParent();
return true;
}
bool runOnModule(Module &module) override
{
std::vector<Function*> func_list;
getFunctions(&module,&func_list);
std::vector<Function*> work_list;
errs()<<"Function List:\n";
for(std::vector<Function *>::iterator f=func_list.begin();f!=func_list.end();f++)
{
Function *func=*f;
errs()<<" ";
errs().write_escaped(func->getName()) << '\n';
if(!readAnnotate(func).find("combine"))
{
errs()<<" -Add to work list\n";
work_list.push_back(func);
}
}
ValueToValueMapTy VMap;
std::vector<unsigned int> values,argPos;
Function *target=Combine(&work_list,&VMap,"MixFunction",&argPos);
if(target==NULL)
{
errs()<<"Combine Fail\n";
return false;
}
if(!FixFunction(target,&work_list,&VMap,&values))
{
errs()<<"FixFunction Fail\n";
return false;
}
if(!FixCallInst(target,&work_list,&VMap,&values,&argPos))
{
errs()<<"FixCallInst Fail\n";
return false;
}
module.getGlobalVariable("llvm.global.annotations")->eraseFromParent();
for(std::vector<Function *>::iterator f=work_list.begin();f!=work_list.end();f++)
{
Function *func=*f;
func->eraseFromParent();
}
return false;
}
};
}
char CombineFunction ::ID=0; //pass标准格式
static RegisterPass<CombineFunction > X("combine", "MyCombine");
// Register for clang
static RegisterStandardPasses Y(PassManagerBuilder::EP_EarlyAsPossible,
[](const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
PM.add(new CombineFunction ());
});
魔改pass部分原文
https://www.52pojie.cn/thread-1369130-1-1.html