Android逆向之旅—解析编译之后的AndroidManifest文件格式

最新推荐文章于 2025-06-20 00:46:10 发布

西子云齐

最新推荐文章于 2025-06-20 00:46:10 发布

阅读量1k

点赞数

分类专栏：安卓逆向

安卓逆向专栏收录该内容

20 篇文章

订阅专栏

本文详细介绍了如何解析AndroidManifest.xml文件中的各种Chunk，包括StringChunk、ResourceIdChunk、StartNamespaceChunk和StartTagChunk等内容。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

转载自：
http://www.wjdiankong.cn/android%E9%80%86%E5%90%91%E4%B9%8B%E6%97%85-%E8%A7%A3%E6%9E%90%E7%BC%96%E8%AF%91%E4%B9%8B%E5%90%8E%E7%9A%84androidmanifest%E6%96%87%E4%BB%B6%E6%A0%BC%E5%BC%8F/

格式解析

第一、头部信息

1、文件魔数：四个字节
2、文件大小：四个字节

第二、String Chunk内容

1、ChunkType：StringChunk的类型，固定四个字节：0x001C0001
2、ChunkSize：StringChunk的大小，四个字节
3、StringCount：StringChunk中字符串的个数，四个字节
4、StyleCount：StringChunk中样式的个数，四个字节，但是在实际解析过程中，这个值一直是0x00000000
5、Unknown：位置区域，四个字节，在解析的过程中，这里需要略过四个字节
6、StringPoolOffset：字符串池的偏移值，四个字节，这个偏移值是相对于StringChunk的头部位置
7、StylePoolOffset：样式池的偏移值，四个字节，这里没有Style,所以这个字段可忽略
8、StringOffsets：每个字符串的偏移值，所以他的大小应该是：StringCount*4个字节
9、SytleOffsets：每个样式的偏移值，所以他的大小应该是SytleCount*4个字节

1、首先我们需要把AndroidManifest.xml文件读入到一个byte数组中：

byte[] byteSrc = null;
FileInputStream fis = null;
ByteArrayOutputStream bos = null;
try{
    fis = new FileInputStream("xmltest/AndroidManifest1.xml");
    bos = new ByteArrayOutputStream();
    byte[] buffer = new byte[1024];
    int len = 0;
    while((len=fis.read(buffer)) != -1){
        bos.write(buffer, 0, len);
    }
    byteSrc = bos.toByteArray();
}catch(Exception e){
    System.out.println("parse xml error:"+e.toString());
}finally{
    try{
        fis.close();
        bos.close();
    }catch(Exception e){

    }
}

2、下面我们就来看看解析头部信息：

/**
 * 解析xml的头部信息
 * @param byteSrc
 */
public static void parseXmlHeader(byte[] byteSrc){
    byte[] xmlMagic = Utils.copyByte(byteSrc, 0, 4);
    System.out.println("magic number:"+Utils.bytesToHexString(xmlMagic));
    byte[] xmlSize = Utils.copyByte(byteSrc, 4, 4);
    System.out.println("xml size:"+Utils.bytesToHexString(xmlSize));

    xmlSb.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
    xmlSb.append("\n");
}

3、解析StringChunk信息

/**
 * 解析StringChunk
 * @param byteSrc
 */
public static void parseStringChunk(byte[] byteSrc){
    //String Chunk的标示
    byte[] chunkTagByte = Utils.copyByte(byteSrc, stringChunkOffset, 4);
    System.out.println("string chunktag:"+Utils.bytesToHexString(chunkTagByte));
    //String Size
    byte[] chunkSizeByte = Utils.copyByte(byteSrc, 12, 4);
    //System.out.println(Utils.bytesToHexString(chunkSizeByte));
    int chunkSize = Utils.byte2int(chunkSizeByte);
    System.out.println("chunk size:"+chunkSize);
    //String Count
    byte[] chunkStringCountByte = Utils.copyByte(byteSrc, 16, 4);
    int chunkStringCount = Utils.byte2int(chunkStringCountByte);
    System.out.println("count:"+chunkStringCount);

    stringContentList = new ArrayList<String>(chunkStringCount);

    //这里需要注意的是，后面的四个字节是Style的内容，然后紧接着的四个字节始终是0，所以我们需要直接过滤这8个字节
    //String Offset 相对于String Chunk的起始位置0x00000008
    byte[] chunkStringOffsetByte = Utils.copyByte(byteSrc, 28, 4);

    int stringContentStart = 8 + Utils.byte2int(chunkStringOffsetByte);
    System.out.println("start:"+stringContentStart);

    //String Content
    byte[] chunkStringContentByte = Utils.copyByte(byteSrc, stringContentStart, chunkSize);

    /**
     * 在解析字符串的时候有个问题，就是编码：UTF-8和UTF-16,如果是UTF-8的话是以00结尾的，如果是UTF-16的话以00 00结尾的
     */

    /**
     * 此处代码是用来解析AndroidManifest.xml文件的
     */
    //这里的格式是：偏移值开始的两个字节是字符串的长度，接着是字符串的内容，后面跟着两个字符串的结束符00
    byte[] firstStringSizeByte = Utils.copyByte(chunkStringContentByte, 0, 2);
    //一个字符对应两个字节
    int firstStringSize = Utils.byte2Short(firstStringSizeByte)*2;
    System.out.println("size:"+firstStringSize);
    byte[] firstStringContentByte = Utils.copyByte(chunkStringContentByte, 2, firstStringSize+2);
    String firstStringContent = new String(firstStringContentByte);
    stringContentList.add(Utils.filterStringNull(firstStringContent));
    System.out.println("first string:"+Utils.filterStringNull(firstStringContent));

    //将字符串都放到ArrayList中
    int endStringIndex = 2+firstStringSize+2;
    while(stringContentList.size() < chunkStringCount){
        //一个字符对应两个字节，所以要乘以2
        int stringSize = Utils.byte2Short(Utils.copyByte(chunkStringContentByte, endStringIndex, 2))*2;
        String str = new String(Utils.copyByte(chunkStringContentByte, endStringIndex+2, stringSize+2));
        System.out.println("str:"+Utils.filterStringNull(str));
        stringContentList.add(Utils.filterStringNull(str));
        endStringIndex += (2+stringSize+2);
    }

    /**
     * 此处的代码是用来解析资源文件xml的
     */
    /*int stringStart = 0;
        int index = 0;
        while(index < chunkStringCount){
            byte[] stringSizeByte = Utils.copyByte(chunkStringContentByte, stringStart, 2);
            int stringSize = (stringSizeByte[1] & 0x7F);
            System.out.println("string size:"+Utils.bytesToHexString(Utils.int2Byte(stringSize)));
            if(stringSize != 0){
                //这里注意是UTF-8编码的
                String val = "";
                try{
                    val = new String(Utils.copyByte(chunkStringContentByte, stringStart+2, stringSize), "utf-8");
                }catch(Exception e){
                    System.out.println("string encode error:"+e.toString());
                }
                stringContentList.add(val);
            }else{
                stringContentList.add("");
            }
            stringStart += (stringSize+3);
            index++;
        }

        for(String str : stringContentList){
            System.out.println("str:"+str);
        }*/

    resourceChunkOffset = stringChunkOffset + Utils.byte2int(chunkSizeByte);

}

这里我们需要解释几个点：

1、忽略过Unknown字段
2、字符串内容的结束符是：0x0000
3、每个字符串开始的前两个字节是字符串的长度
所以我们有了每个字符串的偏移值和大小，那么解析字符串内容就简单了：
这里写图片描述

这里我们看到0x000B(高位和低位相反)就是字符串的大小，结尾是0x0000

这里写图片描述

一个字符对应的是两个字节，而且这里有一个方法：Utils.filterStringNull(firstStringContent)：

public static String filterStringNull(String str){
    if(str == null || str.length() == 0){
        return str;
    }
    byte[] strByte = str.getBytes();
    ArrayList<Byte> newByte = new ArrayList<Byte>();
    for(int i=0;i<strByte.length;i++){
        if(strByte[i] != 0){
            newByte.add(strByte[i]);
        }
    }
    byte[] newByteAry = new byte[newByte.size()];
    for(int i=0;i<newByteAry.length;i++){
        newByteAry[i] = newByte.get(i);
    }
    return new String(newByteAry);
}

实逻辑很简单，就是过滤空字符串：在C语言中是NULL,在Java中就是00，如果不过滤的话，会出现下面的这种情况：
这里写图片描述
每个字符是宽字符，很难看，其实愿意就是每个字符后面多了一个00，所以过滤之后就可以了

上面我们就解析了AndroidManifest.xml中所有的字符串内容。这里我们需要用一个全局的字符列表，用来存储这些字符串的值，后面会用索引来获取这些字符串的值。

第三、解析ResourceIdChunk
这个Chunk主要是存放的是AndroidManifest中用到的系统属性值对应的资源Id，比如android:versionCode中的versionCode属性，android是前缀，后面会说道。

1、ChunkType：ResourceIdChunk的类型，固定四个字节：0x00080108
2、ChunkSize：ResourceChunk的大小，四个字节
3、ResourceIds：ResourceId的内容，这里大小是ResourceChunk大小除以4，减去头部的大小8个字节(ChunkType和ChunkSize)

/**
 * 解析Resource Chunk
 * @param byteSrc
 */
public static void parseResourceChunk(byte[] byteSrc){
    byte[] chunkTagByte = Utils.copyByte(byteSrc, resourceChunkOffset, 4);
    System.out.println(Utils.bytesToHexString(chunkTagByte));
    byte[] chunkSizeByte = Utils.copyByte(byteSrc, resourceChunkOffset+4, 4);
    int chunkSize = Utils.byte2int(chunkSizeByte);
    System.out.println("chunk size:"+chunkSize);
    //这里需要注意的是chunkSize是包含了chunkTag和chunkSize这两个字节的，所以需要剔除
    byte[] resourceIdByte = Utils.copyByte(byteSrc, resourceChunkOffset+8, chunkSize-8);
    ArrayList<Integer> resourceIdList = new ArrayList<Integer>(resourceIdByte.length/4);
    for(int i=0;i<resourceIdByte.length;i+=4){
        int resId = Utils.byte2int(Utils.copyByte(resourceIdByte, i, 4));
        System.out.println("id:"+resId+",hex:"+Utils.bytesToHexString(Utils.copyByte(resourceIdByte, i, 4)));
        resourceIdList.add(resId);
    }

    nextChunkOffset = (resourceChunkOffset+chunkSize);

}

解析结果：
这里写图片描述

第四、解析StartNamespaceChunk

1、ChunkType：Chunk的类型，固定四个字节：0x00100100

2、ChunkSize：Chunk的大小，四个字节

3、LineNumber：在AndroidManifest文件中的行号，四个字节

4、Unknown：未知区域，四个字节

5、Prefix：命名空间的前缀(在字符串中的索引值)，比如：android

6、Uri：命名空间的uri(在字符串中的索引值)：比如：http://schemas.android.com/apk/res/android

解析结果如下：
这里写图片描述

第五、StratTagChunk

这个Chunk主要是存放了AndroidManifest.xml中的标签信息了，也是最核心的内容，当然也是最复杂的内容。
1、ChunkType：Chunk的类型，固定四个字节：0x00100102

2、ChunkSize：Chunk的大小，固定四个字节

3、LineNumber：对应于AndroidManifest中的行号，四个字节

4、Unknown：未知领域，四个字节

5、NamespaceUri：这个标签用到的命名空间的Uri,比如用到了android这个前缀，那么就需要用http://schemas.android.com/apk/res/android这个Uri去获取，四个字节

6、Name：标签名称(在字符串中的索引值)，四个字节

7、Flags：标签的类型，四个字节，比如是开始标签还是结束标签等

8、AttributeCount：标签包含的属性个数，四个字节

9、ClassAtrribute：标签包含的类属性，四个字节

10，Atrributes：属性内容，每个属性算是一个Entry,这个Entry固定大小是大小为5的字节数组：

[Namespace，Uri，Name，ValueString，Data]，我们在解析的时候需要注意第四个值，要做一次处理：需要右移24位。所以这个字段的大小是：属性个数*5*4个字节。

/**
 * 解析StartTag Chunk
 * @param byteSrc
 */
public static void parseStartTagChunk(byte[] byteSrc){
    //解析ChunkTag
    byte[] chunkTagByte = Utils.copyByte(byteSrc, 0, 4);
    System.out.println(Utils.bytesToHexString(chunkTagByte));

    //解析ChunkSize
    byte[] chunkSizeByte = Utils.copyByte(byteSrc, 4, 4);
    int chunkSize = Utils.byte2int(chunkSizeByte);
    System.out.println("chunk size:"+chunkSize);

    //解析行号
    byte[] lineNumberByte = Utils.copyByte(byteSrc, 8, 4);
    int lineNumber = Utils.byte2int(lineNumberByte);
    System.out.println("line number:"+lineNumber);

    //解析prefix
    byte[] prefixByte = Utils.copyByte(byteSrc, 8, 4);
    int prefixIndex = Utils.byte2int(prefixByte);
    //这里可能会返回-1，如果返回-1的话，那就是说没有prefix
    if(prefixIndex != -1 && prefixIndex<stringContentList.size()){
        System.out.println("prefix:"+prefixIndex);
        System.out.println("prefix str:"+stringContentList.get(prefixIndex));
    }else{
        System.out.println("prefix null");
    }

    //解析Uri
    byte[] uriByte = Utils.copyByte(byteSrc, 16, 4);
    int uriIndex = Utils.byte2int(uriByte);
    if(uriIndex != -1 && prefixIndex<stringContentList.size()){
        System.out.println("uri:"+uriIndex);
        System.out.println("uri str:"+stringContentList.get(uriIndex));
    }else{
        System.out.println("uri null");
    }

    //解析TagName
    byte[] tagNameByte = Utils.copyByte(byteSrc, 20, 4);
    System.out.println(Utils.bytesToHexString(tagNameByte));
    int tagNameIndex = Utils.byte2int(tagNameByte);
    String tagName = stringContentList.get(tagNameIndex);
    if(tagNameIndex != -1){
        System.out.println("tag name index:"+tagNameIndex);
        System.out.println("tag name str:"+tagName);
    }else{
        System.out.println("tag name null");
    }

    //解析属性个数(这里需要过滤四个字节:14001400)
    byte[] attrCountByte = Utils.copyByte(byteSrc, 28, 4);
    int attrCount = Utils.byte2int(attrCountByte);
    System.out.println("attr count:"+attrCount);

    //解析属性
    //这里需要注意的是每个属性单元都是由五个元素组成，每个元素占用四个字节：namespaceuri, name, valuestring, type, data
    //在获取到type值的时候需要右移24位
    ArrayList<AttributeData> attrList = new ArrayList<AttributeData>(attrCount);
    for(int i=0;i<attrCount;i++){
        Integer[] values = new Integer[5];
        AttributeData attrData = new AttributeData();
        for(int j=0;j<5;j++){
            int value = Utils.byte2int(Utils.copyByte(byteSrc, 36+i*20+j*4, 4));
            switch(j){
            case 0:
                attrData.nameSpaceUri = value;
                break;
            case 1:
                attrData.name = value;
                break;
            case 2:
                attrData.valueString = value;
                break;
            case 3:
                value = (value >> 24);
                attrData.type = value;
                break;
            case 4:
                attrData.data = value;
                break;
            }
            values[j] = value;
        }
        attrList.add(attrData);
    }

    for(int i=0;i<attrCount;i++){
        if(attrList.get(i).nameSpaceUri != -1){
            System.out.println("nameSpaceUri:"+stringContentList.get(attrList.get(i).nameSpaceUri));
        }else{
            System.out.println("nameSpaceUri == null");
        }
        if(attrList.get(i).name != -1){
            System.out.println("name:"+stringContentList.get(attrList.get(i).name));
        }else{
            System.out.println("name == null");
        }
        if(attrList.get(i).valueString != -1){
            System.out.println("valueString:"+stringContentList.get(attrList.get(i).valueString));
        }else{
            System.out.println("valueString == null");
        }
        System.out.println("type:"+AttributeType.getAttrType(attrList.get(i).type));
        System.out.println("data:"+AttributeType.getAttributeData(attrList.get(i)));
    }

    //这里开始构造xml结构
    xmlSb.append(createStartTagXml(tagName, attrList));

}