Friday, August 02, 2013

A Dalvik Bytecode Reference for Android Hackers (1)

This is a post for Android hackers to try to do reverse engineering with Android applications (APK). The contents will help to understand how Dalvik bytecode instructions (in Android)  look like, by showing a series of examples. Each consists of a triple, a piece of Java code, Java bytecode instructions, and Dalvik bytecode instructions, which are all the same meaning.


1. Constants, Local variables, Control structure


// Java (int)
int i;
for (i = 0; i < 100; i++) {
   ;  // Loop body is empty
}

// Java Bytecode
0   iconst_0  // Push int constant 0
1   istore_1  // Store into local variable 1 (i=0)
2   goto 8  // First time through don't increment
5   iinc 1 1  // Increment local variable 1 by 1 (i++)
8   iload_1  // Push local variable 1 (i)
9   bipush 100  // Push int constant 100
11  if_icmplt 5  // Compare and loop if less than (i < 100)
14  return  // Return void when done

// DEX Bytecode
 .registers 3
    .prologue
    .line 64
    const/4 v0, 0x0
    .local v0, i:I
    :goto_1
    const/16 v1, 0x64
    if-lt v0, v1, :cond_6
    .line 67
    return-void
    .line 64
    :cond_6
    add-int/lit8 v0, v0, 0x1
    goto :goto_1



// Java (double)
double i;
for (i = 0.0; i < 100.0; i++) {
  ;   // Loop body is empty
}

// Java Bytecode
 0  dconst_0  // Push double constant 0.0
 1  dstore_1  // Store into local variables 1 and 2
 2  goto 9  // First time through don't increment
 5  dload_1  // Push local variables 1 and 2
 6  dconst_1  // Push double constant 1.0
 7  dadd  // Add; there is no dinc instruction
 8  dstore_1  // Store result in local variables 1 and 2
 9  dload_1  // Push local variables 1 and 2
10  ldc2_w #4   // Push double constant 100.0
13  dcmpg  // There is no if_dcmplt instruction
14  iflt 5  // Compare and loop if less than (i < 100.0)
17  return  // Return void when done

// DEX Bytecode
 .registers 5
    .prologue
    .line 110
    const-wide/16 v0, 0x0
    .local v0, i:D
    :goto_2
    const-wide/high16 v2, 0x4059
    cmpg-double v2, v0, v2
    if-ltz v2, :cond_9
    .line 113
    return-void
    .line 110
    :cond_9
    const-wide/high16 v2, 0x3ff0
    add-double/2addr v0, v2
    goto :goto_2


// Java (short)
short i;
for (i = 0.0; i < 100.0; i++) {
  ;   // Loop body is empty
}

// Java Bytecode
 0   iconst_0
 1   istore_1
 2   goto 10
 5   iload_1  // The short is treated as though an int
 6   iconst_1
 7  iadd
 8   i2s  // Truncate int to short
 9   istore_1
10   iload_1
11   bipush 100
13   if_icmplt 5
16   return

// DEX Bytecode
 .registers 3
    .prologue
    .line 159
    const/4 v0, 0x0
    .local v0, i:S
    :goto_1
    const/16 v1, 0x64
    if-lt v0, v1, :cond_6
    .line 162
    return-void
    .line 159
    :cond_6
    add-int/lit8 v1, v0, 0x1
    int-to-short v0, v1
    goto :goto_1


2. Arithmetic Expressions

// Java
int align2grain(int i, int grain) {
  return ((i + grain-1) & ~(grain-1));
}

// Java Bytecode
Method int align2grain(int,int)
 0   iload_1
 1   iload_2
 2   iadd
 3   iconst_1
 4   isub
 5   iload_2
 6   iconst_1
 7   isub
 8   iconst_m1
 9   ixor
10  iand
11  ireturn

// DEX Bytecode
.method align2grain(II)I
    .registers 5
    .parameter "i"
    .parameter "grain"
    .prologue
    .line 192
    add-int v0, p1, p2
    add-int/lit8 v0, v0, -0x1
    add-int/lit8 v1, p2, -0x1
    xor-int/lit8 v1, v1, -0x1
    and-int/2addr v0, v1
    return v0
.end method


3. Runtime Constant Pools

// Java
void useManyNumeric() {
  int i = 100;
  int j = 1000000;
  long l1 = 1;
  long l2 = 0xffffffff;
  double d = 2.2;
  //...do some calculations...
}

// Java Bytecode
Method void useManyNumeric()
 0  bipush 100  // Push a small int with bipush
 2  istore_1
 3   ldc #1   // Push int constant 1000000; a larger int
  // value uses ldc
 5   istore_2
 6   lconst_1  // A tiny long value uses short, fast lconst_1
 7   lstore_3
 8  ldc2_w #6   // Push long 0xffffffff (that is, an int -1); any
  // long constant value can be pushed using ldc2_w
11  lstore 5
13   ldc2_w #8   // Push double constant 2.200000; uncommon
  // double values are also pushed using ldc2_w
16   dstore 7
//...do those calculations...


// DEX Bytecode
.method useManyNumeric()V
    .registers 9
    .prologue
    .line 236
    const/16 v2, 0x64
    .line 237
    .local v2, i:I
    const v3, 0xf4240
    .line 238
    .local v3, j:I
    const-wide/16 v4, 0x1
    .line 239
    .local v4, l1:J
    const-wide/16 v6, -0x1
    .line 240
    .local v6, l2:J
    const-wide v0, 0x400199999999999aL
    .line 242
    .local v0, d:D
    return-void
.end method


4. Control Structure

// Java (int)
void whileInt() {
  int i = 0;
  while (i < 100) {
  i++;
  }
}

// Java Bytecode
Method void whileInt()
 0   iconst_0
 1   istore_1
 2   goto 8
 5   iinc 1 1
 8   iload_1
 9   bipush 100
11   if_icmplt 5
14   return

// DEX Bytecode
.method whileInt()V
    .registers 3
    .prologue
    .line 273
    const/4 v0, 0x0
    .line 274
    .local v0, i:I
    :goto_1
    const/16 v1, 0x64
    if-lt v0, v1, :cond_6
    .line 277
    return-void
    .line 275
    :cond_6
    add-int/lit8 v0, v0, 0x1
    goto :goto_1
.end method



// Java (double)
void whileDouble() {
  double i = 0.0;
  while (i < 100.1) {
  i++;
  }
}

// Java Bytecode
Method void whileDouble()
 0   dconst_0
 1   dstore_1
 2   goto 9
 5   dload_1
 6   dconst_1
 7   dadd
 8   dstore_1
 9   dload_1
10   ldc2_w #4   // Push double constant 100.1
13   dcmpg  // To do the compare and branch we have to use...
14   iflt 5  // ...two instructions
17   return

// DEX Bytecode
.method whileDouble()V
    .registers 5
    .prologue
    .line 298
    const-wide/16 v0, 0x0
    .line 299
    .local v0, i:D
    :goto_2
    const-wide v2, 0x4059066666666666L
    cmpg-double v2, v0, v2
    if-ltz v2, :cond_c
    .line 302
    return-void
    .line 300
    :cond_c
    const-wide/high16 v2, 0x3ff0
    add-double/2addr v0, v2
    goto :goto_2
.end method


// Java (if, double)
int lessThan100(double d) {
  if (d < 100.0) {
  return 1; 
  } else {
  return -1; 
  }
}

// Java Bytecode
Method int lessThan100(double)
 0   dload_1
 1   ldc2_w #4   // Push double constant 100.0
 4   dcmpg  // Push 1 if d is NaN or d \> 100.0;
  // push 0 if d == 100.0
 5   ifge 10  // Branch on 0 or 1
 8   iconst_1
 9   ireturn
10   iconst_m1
11   ireturn

// DEX Bytecode
.method lessThan100(D)I
    .registers 5
    .parameter "d"
    .prologue
    .line 327
    const-wide/high16 v0, 0x4059
    cmpg-double v0, p1, v0
    if-gez v0, :cond_8
    .line 328
    const/4 v0, 0x1
    .line 330
    :goto_7
    return v0
    :cond_8
    const/4 v0, -0x1
    goto :goto_7
.end method


5. Method Arguments

// Java
int addTwo(int i, int j) {
  return i + j;
}

// Java Bytecode
Method int addTwo(int,int)
 0  iload_1  // Push value of local variable 1 (i)
 1  iload_2  // Push value of local variable 2 (j)
 2  iadd  // Add; leave int result on operand stack
 3   ireturn  // Return int result

// DEX Bytecode
.method addTwo(II)I
    .registers 4
    .parameter "i"
    .parameter "j"
    .prologue
    .line 386
    add-int v0, p1, p2
    return v0
.end method


// Java (Static method arguments)
static int addTwoStatic(int i, int j) {
  return i + j;
}

// Java Bytecode
Method int addTwoStatic(int,int)
 0   iload_0
 1  iload_1
 2   iadd
 3   ireturn

// DEX Bytecode
.method static addTwoStatic(II)I
    .registers 3
    .parameter "i"
    .parameter "j"
    .prologue
    .line 404
    add-int v0, p0, p1
    return v0
.end method


6. Method Invocation

// Java
int add12and13() {
  return addTwo(12, 13);
}
int addTwo(int x, int y)
{
  return x + y;
}

// Java Bytecode
Method int add12and13()
 0   aload_0  // Push local variable 0 (this)
 1   bipush 12  // Push int constant 12
 3   bipush 13  // Push int constant 13
 5   invokevirtual #4  // Method Example.addtwo(II)I
 8  ireturn  // Return int on top of operand stack; it is
  // the int result of addTwo()

// DEX Bytecode
.method add12and13()I
    .registers 3
    .prologue
    .line 427
    const/16 v0, 0xc
    const/16 v1, 0xd
    invoke-virtual {p0, v0, v1}, Lcom/java/test/Section7_7_1;->addTwo(II)I
    move-result v0
    return v0
.end method

// Java (static method invocation) Note that there is an important error here.
int add12and13() {
  return addTwoStatic(12, 13);
}
int addTwoStatic(int x, int y)
{
  return x + y;
}

// Java Bytecode
Method int add12and13()
 0   bipush 12
 2   bipush 13
 4   invokestatic #3   // Method Example.addTwoStatic(II)I
 7   ireturn

// DEX Bytecode
.method add12and13()I
    .registers 3
    .prologue
    .line 458
    const/16 v0, 0xc
    const/16 v1, 0xd
    invoke-virtual {p0, v0, v1}, Lcom/java/test/Section7_7_2;->addTwoStatic(II)I
    move-result v0
    return v0
.end method



// Java (super)
class Near {
  int it;
  public int getItNear() {
  return getIt();
  }
  private int getIt() {
  return it;
  }
}
class Far extends Near {
  int getItFar() {
  return super.getItNear();
  }
}

// Java Bytecode
Method int getItNear()
 0   aload_0
 1   invokespecial #5   // Method Near.getIt()I
 4   ireturn
Method int getItFar()
 0   aload_0
 1   invokespecial #4  // Method Near.getItNear()I
 4  ireturn

// DEX Bytecode
.method public getItNear()I
    .registers 2
    .prologue
    .line 483
    invoke-direct {p0}, Lcom/java/test/Section7_7_3$Near;->getIt()I
    move-result v0
    return v0
.end method
.method getItFar()I
    .registers 2
    .prologue
    .line 491
    invoke-super {p0}, Lcom/java/test/Section7_7_3$Near;->getItNear()I
    move-result v0
    return v0
.end method


7. Object 

// Java
Object create() {
  return new Object();
}

// Java Bytecode
Method java.lang.Object create()
 0   new #1   // Class java.lang.Object
 3   dup
 4   invokespecial #4   // Method java.lang.Object.<init>()V
 7   areturn

// DEX Bytecode
.method create()Ljava/lang/Object;
    .registers 2
    .prologue
    .line 521
    new-instance v0, Ljava/lang/Object;
    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
    return-object v0
.end method


// Java
int i;  // An instance variable
MyObj example() {
  MyObj o = new MyObj();
  return silly(o);
}
MyObj silly(MyObj o) {
  if (o != null) {
  return o;
  } else {
  return o;
  }
}

// Java Bytecode
Method MyObj example()
 0   new #2   // Class MyObj
 3   dup
 4   invokespecial #5   // Method MyObj.<init>()V
 7   astore_1
 8   aload_0
 9   aload_1
10   invokevirtual #4  
13   areturn
Method MyObj silly(MyObj)
 0   aload_1
 1   ifnull 6
 4  aload_1
 5   areturn
 6   aload_1
 7   areturn

// DEX Bytecode
.method example()Lcom/java/test/MyObj;
    .registers 3
    .prologue
    .line 539
    new-instance v0, Lcom/java/test/MyObj;
    invoke-direct {v0}, Lcom/java/test/MyObj;-><init>()V
    .line 540
    .local v0, o:Lcom/java/test/MyObj;
    invoke-virtual {p0, v0}, Lcom/java/test/Section7_8_2;->silly(Lcom/java/test/MyObj;)Lcom/java/test/MyObj;
    move-result-object v1
    return-object v1
.end method

.method silly(Lcom/java/test/MyObj;)Lcom/java/test/MyObj;
    .registers 2
    .parameter "o"
    .prologue
    .line 543
    if-eqz p1, :cond_2
    .line 546
    :cond_2
    return-object p1
.end method



// Java
int i;
void setIt(int value) {
  i = value;
}
int getIt() {
  return i;
}

// Java Bytecode
Method void setIt(int)
 0   aload_0
 1   iload_1
 2   putfield #4   // Field Example.i I
 5  return
Method int getIt()
 0   aload_0
 1   getfield #4   // Field Example.i I
 4  ireturn

// DEX Bytecode
.method getIt()I
    .registers 2
    .prologue
    .line 585
    iget v0, p0, Lcom/java/test/Section7_8_3;->i:I
    return v0
.end method
.method setIt(I)V
    .registers 2
    .parameter "value"
    .prologue
    .line 582
    iput p1, p0, Lcom/java/test/Section7_8_3;->i:I
    .line 583
    return-void
.end method





No comments: