此文件似乎是二进制XML格式.这种格式是什么以及如何以编程方式解析(与在SDK中使用aapt dump工具相反)?
此二进制格式未在此处的文档中讨论.
注意:我想从Android环境外部访问此信息,最好是从Java访问.
有一个应用程序可以读取apk文件并将XML解码为几乎原始的形式.
用法:
apktool d Gmail.apk && cat Gmail/AndroidManifest.xml
检查android-apktool以获取更多信息
这个在Android上运行的Java方法记录了(我能够解释的).apk包中AndroidManifest.xml文件的二进制格式.第二个代码框显示了如何调用decompressXML以及如何从设备上的app包文件加载byte [].(有些字段的目的我不明白,如果你知道它们的含义,请告诉我,我会更新信息.)
// decompressXML -- Parse the 'compressed' binary form of Android XML docs // such as for AndroidManifest.xml in .apk files public static int endDocTag = 0x00100101; public static int startTag = 0x00100102; public static int endTag = 0x00100103; public void decompressXML(byte[] xml) { // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, 4*4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) chars. int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start tag. int xmlTagOff = LEW(xml, 3*4); // Start from the offset in the 3rd word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int) for (int ii=xmlTagOff; ii"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6*4; // Skip over 6 words of endTag data String name = compXmlString(xml, sitOff, stOff, nameSi); prtIndent(indent, ""+name+"> (line "+startTagLineNo+"-"+lineNo+")"); //tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '"+Integer.toHexString(tag0) +"' at offset "+off); break; } } // end of while loop scanning tags and attributes of XML tree prt(" end at offset "+off); } // end of decompressXML public String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) { if (strInd < 0) return null; int strOff = stOff + LEW(xml, sitOff+strInd*4); return compXmlStringAt(xml, strOff); } public static String spaces = " "; public void prtIndent(int indent, String str) { prt(spaces.substring(0, Math.min(indent*2, spaces.length()))+str); } // compXmlStringAt -- Return the string stored in StringTable format at // offset strOff. This offset points to the 16 bit string length, which // is followed by that number of 16 bit (Unicode) chars. public String compXmlStringAt(byte[] arr, int strOff) { int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff; byte[] chars = new byte[strLen]; for (int ii=0; ii 此方法将AndroidManifest读入byte []进行处理:
public void getIntents(String path) { try { JarFile jf = new JarFile(path); InputStream is = jf.getInputStream(jf.getEntry("AndroidManifest.xml")); byte[] xml = new byte[is.available()]; int br = is.read(xml); //Tree tr = TrunkFactory.newTree(); decompressXML(xml); //prt("XML\n"+tr.list()); } catch (Exception ex) { console.log("getIntents, ex: "+ex); ex.printStackTrace(); } } // end of getIntents大多数应用程序都存储在/ system/app中,这些应用程序在没有root我的Evo的情况下可读,其他应用程序在/ data/app中,我需要root才能看到.上面的'path'参数类似于:"/ system/app /Weather.apk"
+1可以在Android之外使用的工具.我把它包装成一个工作的命令行Java工具; 见http://pastebin.com/c53DuqMt.
3> Paolo Rovell..:如何将Android资产包装工具(aapt)从Android SDK用于Python(或其他)脚本?
实际上,通过aapt(http://elinux.org/Android_aapt),您可以检索有关.apk包及其AndroidManifest.xml文件的信息.特别是,您可以通过'dump'子命令提取.apk包的各个元素的值.例如,您可以通过以下方式在.apk包中的AndroidManifest.xml文件中提取用户权限:
$ aapt dump permissions package.apk其中package.apk是您的apk包.
此外,您可以使用Unix管道命令清除输出.例如:
$ aapt dump permissions package.apk | sed 1d | awk '{ print $NF }'这是一个以编程方式编写的Python脚本:
import os import subprocess #Current directory and file name: curpath = os.path.dirname( os.path.realpath(__file__) ) filepath = os.path.join(curpath, "package.apk") #Extract the AndroidManifest.xml permissions: command = "aapt dump permissions " + filepath + " | sed 1d | awk '{ print $NF }'" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=None, shell=True) permissions = process.communicate()[0] print permissions以类似的方式,您可以提取AndroidManifest.xml的其他信息(例如包,应用程序名称等):
#Extract the APK package info: shellcommand = "aapt dump badging " + filepath process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True) apkInfo = process.communicate()[0].splitlines() for info in apkInfo: #Package info: if string.find(info, "package:", 0) != -1: print "App Package: " + findBetween(info, "name='", "'") print "App Version: " + findBetween(info, "versionName='", "'") continue #App name: if string.find(info, "application:", 0) != -1: print "App Name: " + findBetween(info, "label='", "'") continue def findBetween(s, prefix, suffix): try: start = s.index(prefix) + len(prefix) end = s.index(suffix, start) return s[start:end] except ValueError: return ""如果您想要解析整个AndroidManifest XML树,您可以使用xmltree命令以类似的方式执行此操作:
aapt dump xmltree package.apk AndroidManifest.xml像以前一样使用Python:
#Extract the AndroidManifest XML tree: shellcommand = "aapt dump xmltree " + filepath + " AndroidManifest.xml" process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True) xmlTree = process.communicate()[0] print "Number of Activities: " + str(xmlTree.count("activity")) print "Number of Services: " + str(xmlTree.count("service")) print "Number of BroadcastReceivers: " + str(xmlTree.count("receiver"))
4> Shonzilla..:你可以使用在android-random项目中开发的axml2xml.pl工具.它将从二进制文件生成文本清单文件(AndroidManifest.xml).
我说的是" 文本 "而不是" 原创 ",因为像许多逆向工程工具一样,这个工具并不完美,结果也不完整.我认为它从来没有功能完整或只是不向前兼容(使用更新的二进制编码方案).无论什么原因,axml2xml.pl工具将无法正确提取所有属性值.这些属性是minSdkVersion,targetSdkVersion以及基本上所有引用资源的属性(如字符串,图标等),即只有类名(活动,服务等)被正确提取.
但是,您仍然可以通过在原始Android应用程序文件(.apk)上运行aapt工具找到这些缺失的信息:
aapt l -a
5> Kerem Kusmez..:检查以下WPF项目,它正确解码属性.
6> 小智..:使用最新的SDK-Tools,您现在可以使用名为apkanalyzer的工具打印出APK的AndroidManifest.xml(以及其他部分,如资源).
[android sdk]/tools/bin/apkanalyzer manifest print [app.apk]
apkanalyzer
7> Liu Dong..:apk-parser,https://github.com/caoqianli/apk-parser,java的轻量级impl,对aapt或其他二进制文件没有依赖性,适用于解析二进制xml文件和其他apk信息.
ApkParser apkParser = new ApkParser(new File(filePath)); // set a locale to translate resource tag into specific strings in language the locale specified, you set locale to Locale.ENGLISH then get apk title 'WeChat' instead of '@string/app_name' for example apkParser.setPreferredLocale(locale); String xml = apkParser.getManifestXml(); System.out.println(xml); String xml2 = apkParser.transBinaryXml(xmlPathInApk); System.out.println(xml2); ApkMeta apkMeta = apkParser.getApkMeta(); System.out.println(apkMeta); Setlocales = apkParser.getLocales(); for (Locale l : locales) { System.out.println(l); } apkParser.close();
8> CatShoes..:如果您使用Python或使用Androguard,Androguard Androaxml功能将为您执行此转换.此博客文章中详细介绍了此功能,此处提供了其他文档和源代码.
用法:
$ ./androaxml.py -h Usage: androaxml.py [options] Options: -h, --help show this help message and exit -i INPUT, --input=INPUT filename input (APK or android's binary xml) -o OUTPUT, --output=OUTPUT filename output of the xml -v, --version version of the API $ ./androaxml.py -i yourfile.apk -o output.xml $ ./androaxml.py -i AndroidManifest.xml -o output.xml
9> Jonathan Pot..:如果它有用,这里是Ribo发布的Java片段的C++版本:
struct decompressXML { // decompressXML -- Parse the 'compressed' binary form of Android XML docs // such as for AndroidManifest.xml in .apk files enum { endDocTag = 0x00100101, startTag = 0x00100102, endTag = 0x00100103 }; decompressXML(const BYTE* xml, int cb) { // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, cb, 4*4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) chars. int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start tag. int xmlTagOff = LEW(xml, cb, 3*4); // Start from the offset in the 3rd word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int) for (int ii=xmlTagOff; ii"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6*4; // Skip over 6 words of endTag data std::string name = compXmlString(xml, cb, sitOff, stOff, nameSi); prtIndent(indent, ""+name+"> (line "+toIntString(startTagLineNo)+"-"+toIntString(lineNo)+")"); //tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '"+toHexString(tag0) +"' at offset "+toIntString(off)); break; } } // end of while loop scanning tags and attributes of XML tree prt(" end at offset "+off); } // end of decompressXML std::string compXmlString(const BYTE* xml, int cb, int sitOff, int stOff, int strInd) { if (strInd < 0) return std::string(""); int strOff = stOff + LEW(xml, cb, sitOff+strInd*4); return compXmlStringAt(xml, cb, strOff); } void prt(std::string str) { printf("%s", str.c_str()); } void prtIndent(int indent, std::string str) { char spaces[46]; memset(spaces, ' ', sizeof(spaces)); spaces[min(indent*2, sizeof(spaces) - 1)] = 0; prt(spaces); prt(str); prt("\n"); } // compXmlStringAt -- Return the string stored in StringTable format at // offset strOff. This offset points to the 16 bit string length, which // is followed by that number of 16 bit (Unicode) chars. std::string compXmlStringAt(const BYTE* arr, int cb, int strOff) { if (cb < strOff + 2) return std::string(""); int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff; char* chars = new char[strLen + 1]; chars[strLen] = 0; for (int ii=0; ii off + 3) ? ( arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000 | arr[off+1]<<8&0xff00 | arr[off]&0xFF ) : 0; } // end of LEW std::string toHexString(DWORD attrResId) { char ch[20]; sprintf_s(ch, 20, "%lx", attrResId); return std::string(ch); } std::string toIntString(int i) { char ch[20]; sprintf_s(ch, 20, "%ld", i); return std::string(ch); } };