001/*
002 * Copyright (C) 2009 The Android Open Source Project
003 * Copyright (C) 2015-2017 Keepsafe Software
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package com.android.dexdeps;
019
020import java.io.IOException;
021import java.io.RandomAccessFile;
022import java.nio.charset.StandardCharsets;
023import java.util.Arrays;
024
025/**
026 * Data extracted from a DEX file.
027 */
028public class DexData {
029    private RandomAccessFile mDexFile;
030    private HeaderItem mHeaderItem;
031    private String[] mStrings;              // strings from string_data_*
032    private TypeIdItem[] mTypeIds;
033    private ProtoIdItem[] mProtoIds;
034    private FieldIdItem[] mFieldIds;
035    private MethodIdItem[] mMethodIds;
036    private ClassDefItem[] mClassDefs;
037
038    private byte tmpBuf[] = new byte[4];
039    private boolean isBigEndian = false;
040
041    /**
042     * Constructs a new DexData for this file.
043     */
044    public DexData(RandomAccessFile raf) {
045        mDexFile = raf;
046    }
047
048    /**
049     * Loads the contents of the DEX file into our data structures.
050     *
051     * @throws IOException if we encounter a problem while reading
052     * @throws DexDataException if the DEX contents look bad
053     */
054    public void load() throws IOException {
055        parseHeaderItem();
056
057        loadStrings();
058        loadTypeIds();
059        loadProtoIds();
060        loadFieldIds();
061        loadMethodIds();
062        loadClassDefs();
063
064        markInternalClasses();
065    }
066
067    /**
068     * Verifies the given magic number.
069     */
070    private static boolean verifyMagic(byte[] magic) {
071        return Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v035) ||
072            Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v037) ||
073            Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v038) ||
074            Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC_v039);
075    }
076
077    /**
078     * Parses the interesting bits out of the header.
079     */
080    void parseHeaderItem() throws IOException {
081        mHeaderItem = new HeaderItem();
082
083        seek(0);
084
085        byte[] magic = new byte[8];
086        readBytes(magic);
087        if (!verifyMagic(magic)) {
088            System.err.println("Magic number is wrong -- are you sure " +
089                "this is a DEX file?");
090            throw new DexDataException();
091        }
092
093        /*
094         * Read the endian tag, so we properly swap things as we read
095         * them from here on.
096         */
097        seek(8+4+20+4+4);
098        mHeaderItem.endianTag = readInt();
099        if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) {
100            /* do nothing */
101        } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){
102            /* file is big-endian (!), reverse future reads */
103            isBigEndian = true;
104        } else {
105            System.err.println("Endian constant has unexpected value " +
106                Integer.toHexString(mHeaderItem.endianTag));
107            throw new DexDataException();
108        }
109
110        seek(8+4+20);  // magic, checksum, signature
111        mHeaderItem.fileSize = readInt();
112        mHeaderItem.headerSize = readInt();
113        /*mHeaderItem.endianTag =*/ readInt();
114        /*mHeaderItem.linkSize =*/ readInt();
115        /*mHeaderItem.linkOff =*/ readInt();
116        /*mHeaderItem.mapOff =*/ readInt();
117        mHeaderItem.stringIdsSize = readInt();
118        mHeaderItem.stringIdsOff = readInt();
119        mHeaderItem.typeIdsSize = readInt();
120        mHeaderItem.typeIdsOff = readInt();
121        mHeaderItem.protoIdsSize = readInt();
122        mHeaderItem.protoIdsOff = readInt();
123        mHeaderItem.fieldIdsSize = readInt();
124        mHeaderItem.fieldIdsOff = readInt();
125        mHeaderItem.methodIdsSize = readInt();
126        mHeaderItem.methodIdsOff = readInt();
127        mHeaderItem.classDefsSize = readInt();
128        mHeaderItem.classDefsOff = readInt();
129        /*mHeaderItem.dataSize =*/ readInt();
130        /*mHeaderItem.dataOff =*/ readInt();
131    }
132
133    /**
134     * Loads the string table out of the DEX.
135     *
136     * First we read all of the string_id_items, then we read all of the
137     * string_data_item.  Doing it this way should allow us to avoid
138     * seeking around in the file.
139     */
140    void loadStrings() throws IOException {
141        int count = mHeaderItem.stringIdsSize;
142        int stringOffsets[] = new int[count];
143
144        //System.out.println("reading " + count + " strings");
145
146        seek(mHeaderItem.stringIdsOff);
147        for (int i = 0; i < count; i++) {
148            stringOffsets[i] = readInt();
149        }
150
151        mStrings = new String[count];
152
153        seek(stringOffsets[0]);
154        for (int i = 0; i < count; i++) {
155            seek(stringOffsets[i]);         // should be a no-op
156            mStrings[i] = readString();
157            //System.out.println("STR: " + i + ": " + mStrings[i]);
158        }
159    }
160
161    /**
162     * Loads the type ID list.
163     */
164    void loadTypeIds() throws IOException {
165        int count = mHeaderItem.typeIdsSize;
166        mTypeIds = new TypeIdItem[count];
167
168        //System.out.println("reading " + count + " typeIds");
169        seek(mHeaderItem.typeIdsOff);
170        for (int i = 0; i < count; i++) {
171            mTypeIds[i] = new TypeIdItem();
172            mTypeIds[i].descriptorIdx = readInt();
173
174            //System.out.println(i + ": " + mTypeIds[i].descriptorIdx +
175            //    " " + mStrings[mTypeIds[i].descriptorIdx]);
176        }
177    }
178
179    /**
180     * Loads the proto ID list.
181     */
182    void loadProtoIds() throws IOException {
183        int count = mHeaderItem.protoIdsSize;
184        mProtoIds = new ProtoIdItem[count];
185
186        //System.out.println("reading " + count + " protoIds");
187        seek(mHeaderItem.protoIdsOff);
188
189        /*
190         * Read the proto ID items.
191         */
192        for (int i = 0; i < count; i++) {
193            mProtoIds[i] = new ProtoIdItem();
194            mProtoIds[i].shortyIdx = readInt();
195            mProtoIds[i].returnTypeIdx = readInt();
196            mProtoIds[i].parametersOff = readInt();
197
198            //System.out.println(i + ": " + mProtoIds[i].shortyIdx +
199            //    " " + mStrings[mProtoIds[i].shortyIdx]);
200        }
201
202        /*
203         * Go back through and read the type lists.
204         */
205        for (int i = 0; i < count; i++) {
206            ProtoIdItem protoId = mProtoIds[i];
207
208            int offset = protoId.parametersOff;
209
210            if (offset == 0) {
211                protoId.types = new int[0];
212                continue;
213            } else {
214                seek(offset);
215                int size = readInt();       // #of entries in list
216                protoId.types = new int[size];
217
218                for (int j = 0; j < size; j++) {
219                    protoId.types[j] = readShort() & 0xffff;
220                }
221            }
222        }
223    }
224
225    /**
226     * Loads the field ID list.
227     */
228    void loadFieldIds() throws IOException {
229        int count = mHeaderItem.fieldIdsSize;
230        mFieldIds = new FieldIdItem[count];
231
232        //System.out.println("reading " + count + " fieldIds");
233        seek(mHeaderItem.fieldIdsOff);
234        for (int i = 0; i < count; i++) {
235            mFieldIds[i] = new FieldIdItem();
236            mFieldIds[i].classIdx = readShort() & 0xffff;
237            mFieldIds[i].typeIdx = readShort() & 0xffff;
238            mFieldIds[i].nameIdx = readInt();
239
240            //System.out.println(i + ": " + mFieldIds[i].nameIdx +
241            //    " " + mStrings[mFieldIds[i].nameIdx]);
242        }
243    }
244
245    /**
246     * Loads the method ID list.
247     */
248    void loadMethodIds() throws IOException {
249        int count = mHeaderItem.methodIdsSize;
250        mMethodIds = new MethodIdItem[count];
251
252        //System.out.println("reading " + count + " methodIds");
253        seek(mHeaderItem.methodIdsOff);
254        for (int i = 0; i < count; i++) {
255            mMethodIds[i] = new MethodIdItem();
256            mMethodIds[i].classIdx = readShort() & 0xffff;
257            mMethodIds[i].protoIdx = readShort() & 0xffff;
258            mMethodIds[i].nameIdx = readInt();
259
260            //System.out.println(i + ": " + mMethodIds[i].nameIdx +
261            //    " " + mStrings[mMethodIds[i].nameIdx]);
262        }
263    }
264
265    /**
266     * Loads the class defs list.
267     */
268    void loadClassDefs() throws IOException {
269        int count = mHeaderItem.classDefsSize;
270        mClassDefs = new ClassDefItem[count];
271
272        //System.out.println("reading " + count + " classDefs");
273        seek(mHeaderItem.classDefsOff);
274        for (int i = 0; i < count; i++) {
275            mClassDefs[i] = new ClassDefItem();
276            mClassDefs[i].classIdx = readInt();
277
278            /* access_flags = */ readInt();
279            /* superclass_idx = */ readInt();
280            /* interfaces_off = */ readInt();
281            /* source_file_idx = */ readInt();
282            /* annotations_off = */ readInt();
283            /* class_data_off = */ readInt();
284            /* static_values_off = */ readInt();
285
286            //System.out.println(i + ": " + mClassDefs[i].classIdx + " " +
287            //    mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]);
288        }
289    }
290
291    /**
292     * Sets the "internal" flag on type IDs which are defined in the
293     * DEX file or within the VM (e.g. primitive classes and arrays).
294     */
295    void markInternalClasses() {
296        for (int i = mClassDefs.length -1; i >= 0; i--) {
297            mTypeIds[mClassDefs[i].classIdx].internal = true;
298        }
299
300        for (int i = 0; i < mTypeIds.length; i++) {
301            String className = mStrings[mTypeIds[i].descriptorIdx];
302
303            if (className.length() == 1) {
304                // primitive class
305                mTypeIds[i].internal = true;
306            } else if (className.charAt(0) == '[') {
307                mTypeIds[i].internal = true;
308            }
309
310            //System.out.println(i + " " +
311            //    (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " +
312            //    mStrings[mTypeIds[i].descriptorIdx]);
313        }
314    }
315
316
317    /*
318     * =======================================================================
319     *      Queries
320     * =======================================================================
321     */
322
323    /**
324     * Returns the class name, given an index into the type_ids table.
325     */
326    private String classNameFromTypeIndex(int idx) {
327        return mStrings[mTypeIds[idx].descriptorIdx];
328    }
329
330    /**
331     * Returns an array of method argument type strings, given an index
332     * into the proto_ids table.
333     */
334    private String[] argArrayFromProtoIndex(int idx) {
335        ProtoIdItem protoId = mProtoIds[idx];
336        String[] result = new String[protoId.types.length];
337
338        for (int i = 0; i < protoId.types.length; i++) {
339            result[i] = mStrings[mTypeIds[protoId.types[i]].descriptorIdx];
340        }
341
342        return result;
343    }
344
345    /**
346     * Returns a string representing the method's return type, given an
347     * index into the proto_ids table.
348     */
349    private String returnTypeFromProtoIndex(int idx) {
350        ProtoIdItem protoId = mProtoIds[idx];
351        return mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx];
352    }
353
354    /**
355     * Returns an array with all of the class references that don't
356     * correspond to classes in the DEX file.  Each class reference has
357     * a list of the referenced fields and methods associated with
358     * that class.
359     */
360    public ClassRef[] getExternalReferences() {
361        // create a sparse array of ClassRef that parallels mTypeIds
362        ClassRef[] sparseRefs = new ClassRef[mTypeIds.length];
363
364        // create entries for all externally-referenced classes
365        int count = 0;
366        for (int i = 0; i < mTypeIds.length; i++) {
367            if (!mTypeIds[i].internal) {
368                sparseRefs[i] =
369                    new ClassRef(mStrings[mTypeIds[i].descriptorIdx]);
370                count++;
371            }
372        }
373
374        // add fields and methods to the appropriate class entry
375        addExternalFieldReferences(sparseRefs);
376        addExternalMethodReferences(sparseRefs);
377
378        // crunch out the sparseness
379        ClassRef[] classRefs = new ClassRef[count];
380        int idx = 0;
381        for (int i = 0; i < mTypeIds.length; i++) {
382            if (sparseRefs[i] != null)
383                classRefs[idx++] = sparseRefs[i];
384        }
385
386        assert idx == count;
387
388        return classRefs;
389    }
390
391    /**
392     * Runs through the list of field references, inserting external
393     * references into the appropriate ClassRef.
394     */
395    private void addExternalFieldReferences(ClassRef[] sparseRefs) {
396        for (int i = 0; i < mFieldIds.length; i++) {
397            if (!mTypeIds[mFieldIds[i].classIdx].internal) {
398                FieldIdItem fieldId = mFieldIds[i];
399                FieldRef newFieldRef = new FieldRef(
400                        classNameFromTypeIndex(fieldId.classIdx),
401                        classNameFromTypeIndex(fieldId.typeIdx),
402                        mStrings[fieldId.nameIdx]);
403                sparseRefs[mFieldIds[i].classIdx].addField(newFieldRef);
404            }
405        }
406    }
407
408    /**
409     * Runs through the list of method references, inserting external
410     * references into the appropriate ClassRef.
411     */
412    private void addExternalMethodReferences(ClassRef[] sparseRefs) {
413        for (int i = 0; i < mMethodIds.length; i++) {
414            if (!mTypeIds[mMethodIds[i].classIdx].internal) {
415                MethodIdItem methodId = mMethodIds[i];
416                MethodRef newMethodRef = new MethodRef(
417                        classNameFromTypeIndex(methodId.classIdx),
418                        argArrayFromProtoIndex(methodId.protoIdx),
419                        returnTypeFromProtoIndex(methodId.protoIdx),
420                        mStrings[methodId.nameIdx]);
421                sparseRefs[mMethodIds[i].classIdx].addMethod(newMethodRef);
422            }
423        }
424    }
425
426    /*
427     * BEGIN MODIFIED SECTION
428     */
429
430    /**
431     * Returns the list of all method references.
432     * @return method refs
433     */
434    public MethodRef[] getMethodRefs() {
435        MethodRef[] methodRefs = new MethodRef[mMethodIds.length];
436        for (int i = 0; i < mMethodIds.length; i++) {
437            MethodIdItem methodId = mMethodIds[i];
438            methodRefs[i] = new MethodRef(
439                    classNameFromTypeIndex(methodId.classIdx),
440                    argArrayFromProtoIndex(methodId.protoIdx),
441                    returnTypeFromProtoIndex(methodId.protoIdx),
442                    mStrings[methodId.nameIdx]);
443        }
444        return methodRefs;
445    }
446
447    public FieldRef[] getFieldRefs() {
448        FieldRef[] fieldRefs = new FieldRef[mFieldIds.length];
449        for (int i = 0; i < mFieldIds.length; i++) {
450            FieldIdItem fieldId = mFieldIds[i];
451            fieldRefs[i] = new FieldRef(
452                    classNameFromTypeIndex(fieldId.classIdx),
453                    classNameFromTypeIndex(fieldId.typeIdx),
454                    mStrings[fieldId.nameIdx]);
455        }
456        return fieldRefs;
457    }
458
459    /*
460     * END MODIFIED SECTION
461     */
462
463
464
465    /*
466     * =======================================================================
467     *      Basic I/O functions
468     * =======================================================================
469     */
470
471    /**
472     * Seeks the DEX file to the specified absolute position.
473     */
474    void seek(int position) throws IOException {
475        mDexFile.seek(position);
476    }
477
478    /**
479     * Fills the buffer by reading bytes from the DEX file.
480     */
481    void readBytes(byte[] buffer) throws IOException {
482        mDexFile.readFully(buffer);
483    }
484
485    /**
486     * Reads a single signed byte value.
487     */
488    byte readByte() throws IOException {
489        mDexFile.readFully(tmpBuf, 0, 1);
490        return tmpBuf[0];
491    }
492
493    /**
494     * Reads a signed 16-bit integer, byte-swapping if necessary.
495     */
496    short readShort() throws IOException {
497        mDexFile.readFully(tmpBuf, 0, 2);
498        if (isBigEndian) {
499            return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8));
500        } else {
501            return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8));
502        }
503    }
504
505    /**
506     * Reads a signed 32-bit integer, byte-swapping if necessary.
507     */
508    int readInt() throws IOException {
509        mDexFile.readFully(tmpBuf, 0, 4);
510
511        if (isBigEndian) {
512            return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) |
513                   ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24);
514        } else {
515            return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) |
516                   ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24);
517        }
518    }
519
520    /**
521     * Reads a variable-length unsigned LEB128 value.  Does not attempt to
522     * verify that the value is valid.
523     *
524     * @throws java.io.EOFException if we run off the end of the file
525     */
526    int readUnsignedLeb128() throws IOException {
527        int result = 0;
528        byte val;
529
530        do {
531            val = readByte();
532            result = (result << 7) | (val & 0x7f);
533        } while (val < 0);
534
535        return result;
536    }
537
538    /**
539     * Reads a UTF-8 string.
540     *
541     * We don't know how long the UTF-8 string is, so we have to read one
542     * byte at a time.  We could make an educated guess based on the
543     * utf16_size and seek back if we get it wrong, but seeking backward
544     * may cause the underlying implementation to reload I/O buffers.
545     */
546    String readString() throws IOException {
547        int utf16len = readUnsignedLeb128();
548        byte inBuf[] = new byte[utf16len * 3];      // worst case
549        int idx;
550
551        for (idx = 0; idx < inBuf.length; idx++) {
552            byte val = readByte();
553            if (val == 0)
554                break;
555            inBuf[idx] = val;
556        }
557
558        return new String(inBuf, 0, idx, "UTF-8");
559    }
560
561
562    /*
563     * =======================================================================
564     *      Internal "structure" declarations
565     * =======================================================================
566     */
567
568    /**
569     * Holds the contents of a header_item.
570     */
571    static class HeaderItem {
572        public int fileSize;
573        public int headerSize;
574        public int endianTag;
575        public int stringIdsSize, stringIdsOff;
576        public int typeIdsSize, typeIdsOff;
577        public int protoIdsSize, protoIdsOff;
578        public int fieldIdsSize, fieldIdsOff;
579        public int methodIdsSize, methodIdsOff;
580        public int classDefsSize, classDefsOff;
581
582        /* expected magic values */
583        public static final byte[] DEX_FILE_MAGIC_v035 =
584            "dex\n035\0".getBytes(StandardCharsets.US_ASCII);
585
586        // Dex version 036 skipped because of an old dalvik bug on some versions
587        // of android where dex files with that version number would erroneously
588        // be accepted and run. See: art/runtime/dex_file.cc
589
590        // V037 was introduced in API LEVEL 24
591        public static final byte[] DEX_FILE_MAGIC_v037 =
592            "dex\n037\0".getBytes(StandardCharsets.US_ASCII);
593
594        // V038 was introduced in API LEVEL 26
595        public static final byte[] DEX_FILE_MAGIC_v038 =
596            "dex\n038\0".getBytes(StandardCharsets.US_ASCII);
597
598        // V039 was introduced in API LEVEL 28
599        public static final byte[] DEX_FILE_MAGIC_v039 =
600            "dex\n039\0".getBytes(StandardCharsets.US_ASCII);
601
602        public static final int ENDIAN_CONSTANT = 0x12345678;
603        public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412;
604    }
605
606    /**
607     * Holds the contents of a type_id_item.
608     *
609     * This is chiefly a list of indices into the string table.  We need
610     * some additional bits of data, such as whether or not the type ID
611     * represents a class defined in this DEX, so we use an object for
612     * each instead of a simple integer.  (Could use a parallel array, but
613     * since this is a desktop app it's not essential.)
614     */
615    static class TypeIdItem {
616        public int descriptorIdx;       // index into string_ids
617
618        public boolean internal;        // defined within this DEX file?
619    }
620
621    /**
622     * Holds the contents of a proto_id_item.
623     */
624    static class ProtoIdItem {
625        public int shortyIdx;           // index into string_ids
626        public int returnTypeIdx;       // index into type_ids
627        public int parametersOff;       // file offset to a type_list
628
629        public int types[];             // contents of type list
630    }
631
632    /**
633     * Holds the contents of a field_id_item.
634     */
635    static class FieldIdItem {
636        public int classIdx;            // index into type_ids (defining class)
637        public int typeIdx;             // index into type_ids (field type)
638        public int nameIdx;             // index into string_ids
639    }
640
641    /**
642     * Holds the contents of a method_id_item.
643     */
644    static class MethodIdItem {
645        public int classIdx;            // index into type_ids
646        public int protoIdx;            // index into proto_ids
647        public int nameIdx;             // index into string_ids
648    }
649
650    /**
651     * Holds the contents of a class_def_item.
652     *
653     * We don't really need a class for this, but there's some stuff in
654     * the class_def_item that we might want later.
655     */
656    static class ClassDefItem {
657        public int classIdx;            // index into type_ids
658    }
659}