Class: ApkXml

Inherits:
Object
  • Object
show all
Defined in:
lib/apktools/apkxml.rb

Overview

Class to parse an APK’s binary XML format back into textual XML

Defined Under Namespace

Classes: ChunkHeader, ResTypeEntry, StringPool, XmlAttribute, XmlElement, XmlResourceMap, XmlTreeHeader

Constant Summary collapse

DEBUG =

:nodoc:

false

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(apk_file) ⇒ ApkXml

Create a new ApkXml instance from the specified apk_file

This opens and parses the contents of the APK’s resources.arsc file.



115
116
117
118
# File 'lib/apktools/apkxml.rb', line 115

def initialize(apk_file)
	@current_apk = apk_file
	@apk_resources = ApkResources.new(apk_file)
end

Instance Attribute Details

#apk_resourcesObject (readonly)

ApkResources instance used to resolve resources in this APK



107
108
109
# File 'lib/apktools/apkxml.rb', line 107

def apk_resources
  @apk_resources
end

#current_apkObject (readonly)

APK file where parser will search for XML



105
106
107
# File 'lib/apktools/apkxml.rb', line 105

def current_apk
  @current_apk
end

#xml_elementsObject (readonly)

Array of XmlElements from the last parse operation



109
110
111
# File 'lib/apktools/apkxml.rb', line 109

def xml_elements
  @xml_elements
end

Instance Method Details

#parse_xml(xml_file, pretty = false, resolve_resources = false) ⇒ Object

Read the requested XML file from inside the APK and parse out into readable textual XML. Returns a string of the parsed XML.

xml_file: ID value of a resource as a FixNum or String representation (i.e. 0x7F060001) pretty: Optionally format the XML output as human readable resolve_resources: Optionally, where possible, resolve resource references to their default value

This opens and parses the contents of the APK’s resources.arsc file.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/apktools/apkxml.rb', line 129

def parse_xml(xml_file, pretty = false, resolve_resources = false)
	# Reset variables
	@xml_elements = Array.new()
	xml_output = ""
	indent = 0
	data = nil		
	
	# Get the XML from the APK file
	Zip::ZipFile.foreach(@current_apk) do |f|
	  if f.name.match(xml_file)
		data = f.get_input_stream.read
	  end
	end
	
	# Parse the Header Chunk		
	header = ChunkHeader.new( read_short(data, HEADER_START),
			read_short(data, HEADER_START+2),
			read_word(data, HEADER_START+4) )
	
	# Parse the StringPool Chunk
	startoffset_pool = HEADER_START + header.size
	puts "Parse Main StringPool Chunk" if DEBUG
	stringpool_main = parse_stringpool(data, startoffset_pool)
	puts "#{stringpool_main.values.length} strings found" if DEBUG
	
	# Parse the remainder of the file chunks based on type
	namespaces = Hash.new()
	current = startoffset_pool + stringpool_main.header.chunk_size
	puts "Parse Remaining Chunks" if DEBUG
	while current < data.length
		## Parse Header
		header = ChunkHeader.new( read_short(data, current),
				read_short(data, current+2),
				read_word(data, current+4) )
		## Check Type
		if header.type == TYPE_XML_RESOURCEMAP
			## Maps resource ids to strings in the pool
			map_ids = Array.new()
			map_strings = Array.new()
							
			index_offset = current + header.size
			i = 0
			while index_offset < (current + header.chunk_size)
				map_ids << read_word(data, index_offset)
				map_strings << stringpool_main.values[i]
				
				i += 1
				index_offset = i * 4 + (current + header.size)
			end
			
			current += header.chunk_size
		elsif header.type == TYPE_XML_STARTNAMESPACE
			tree_header = parse_tree_header(header, data, current)
			body_start = current+header.size
			prefix = stringpool_main.values[read_word(data, body_start)]
			uri = stringpool_main.values[read_word(data, body_start+4)]
			namespaces[uri] = prefix
			puts "NAMESPACE_START: xmlns:#{prefix} = '#{uri}'" if DEBUG
			current += header.chunk_size
		elsif header.type == TYPE_XML_ENDNAMESPACE
			tree_header = parse_tree_header(header, data, current)
			body_start = current+header.size
			prefix = stringpool_main.values[read_word(data, body_start)]
			uri = stringpool_main.values[read_word(data, body_start+4)]
			puts "NAMESPACE_END: xmlns:#{prefix} = '#{uri}'" if DEBUG
			current += header.chunk_size
		elsif header.type == TYPE_XML_STARTELEMENT
			tree_header = parse_tree_header(header, data, current)
			body_start = current+header.size
			# Parse the element/attribute data
			namespace = nil
			if read_word(data, body_start) != OFFSET_NO_ENTRY
				namespace = stringpool_main.values[read_word(data, body_start)]
			end
			name = stringpool_main.values[read_word(data, body_start+4)]
			
			attribute_offset = read_short(data, body_start+8)
			attribute_size = read_short(data, body_start+10)
			attribute_count = read_short(data, body_start+12)
			id_idx = read_short(data, body_start+14)
			class_idx = read_short(data, body_start+16)
			style_idx = read_short(data, body_start+18)
			
			attributes = Array.new()
			i=0
			while i < attribute_count
				index_offset = i * attribute_size + (body_start + attribute_offset)
				attr_namespace = nil
				if read_word(data, index_offset) != OFFSET_NO_ENTRY
					attr_uri = stringpool_main.values[read_word(data, index_offset)]
					attr_namespace = namespaces[attr_uri]
				end
				attr_name = stringpool_main.values[read_word(data, index_offset+4)]
				attr_raw = nil
				if read_word(data, index_offset+8) != OFFSET_NO_ENTRY
					# Attribute has a raw value, use it
					attr_raw = stringpool_main.values[read_word(data, index_offset+8)]
				end					
				entry = ResTypeEntry.new(0, nil, read_byte(data, index_offset+15), read_word(data, index_offset+16))
				
				attr_value = nil
				if attr_raw != nil # Use raw value
					attr_value = attr_raw
				elsif entry.data_type == 1 # Value is a references to a resource
					# Find the resource
					default_res = apk_resources.get_default_resource_value(entry.data)
					if resolve_resources && default_res != nil
						# Use the default resource value
						attr_value = default_res.data
					else
						key_value = apk_resources.get_resource_key(entry.data, true)
						if key_value != nil
							# Use the key string
							attr_value = key_value
						else
							#No key found, use raw id marked as a resource
							attr_value = "res:0x#{entry.data.to_s(16)}"
						end
					end
				else # Value is a constant
					attr_value = "0x#{entry.data.to_s(16)}"
				end

				
				attributes << XmlAttribute.new(attr_namespace, attr_name, attr_value)
				i += 1
			end
			
			element = XmlElement.new(tree_header, namespace, name, id_idx, class_idx, style_idx, attributes, xml_output == "")
			
			# Print the element/attribute data
			puts "ELEMENT_START: #{element.namespace} #{element.name}" if DEBUG
			display_name = element.namespace == nil ? element.name : "#{element.namespace}:#{element.name}"

			if pretty
				xml_output += "\n" + ("  " * indent)
				indent += 1
			end
			xml_output += "<#{display_name} "
			# Only print namespaces on the root element
			if element.is_root
				keys = namespaces.keys
				keys.each do |key|
					xml_output += "xmlns:#{namespaces[key]}=\"#{key}\" "
					if pretty && key != keys.last
						xml_output += "\n" + ("  " * indent)
					end
				end
			end

			element.attributes.each do |attr|
				puts "---ATTRIBUTE: #{attr.namespace} #{attr.name} #{attr.value}" if DEBUG
				display_name = attr.namespace == nil ? attr.name : "#{attr.namespace}:#{attr.name}"
				if pretty
					xml_output += "\n" + ("  " * indent)
				end					
				xml_output += "#{display_name}=\"#{attr.value}\" "
			end
			
			xml_output += ">"
			
			# Push every new element onto the array
			@xml_elements << element
			
			current += header.chunk_size
		elsif header.type == TYPE_XML_ENDELEMENT
			tree_header = parse_tree_header(header, data, current)
			body_start = current+header.size
			namespace = nil
			if read_word(data, body_start) != OFFSET_NO_ENTRY
				namespace = stringpool_main.values[read_word(data, body_start)]
			end
			name = stringpool_main.values[read_word(data, body_start+4)]
			
			puts "ELEMENT END: #{namespace} #{name}" if DEBUG
			display_name = namespace == nil ? name : "#{namespace}:#{name}"
			if pretty
				indent -= 1
				if indent < 0
					indent = 0
				end
				xml_output += "\n" + ("  " * indent)
			end
			xml_output += "</#{display_name}>"

			
			current += header.chunk_size
		elsif header.type == TYPE_XML_CDATA
			tree_header = parse_tree_header(header, data, current)
			body_start = current+header.size

			cdata = stringpool_main.values[read_word(data, body_start)]
			cdata_type = read_word(data, body_start+7)
			cdata_value = read_word(data, body_start+8)
			puts "CDATA: #{cdata} #{cdata_type} #{cdata_value}" if DEBUG
			
			cdata.split(/\r?\n/).each do |item|
				if pretty
					xml_output += "\n" + ("  " * indent)
				end	
				xml_output += "<![CDATA[#{item.strip}]]>"
			end
			
			current += header.chunk_size
		else
			puts "Unknown Chunk Found: #{header.type} #{header.size}" if DEBUG
			## End Immediately
			current = data.length
		end
	end
	
	return xml_output
end