iBoot/lib/lzss/arm/lzssdec.S

204 lines
4.7 KiB
ArmAsm

/*
* Copyright (C) 2011-2012, 2014 Apple Inc. All rights reserved.
*
* This document is the property of Apple Inc.
* It is considered confidential and proprietary.
*
* This document may not be reproduced or transmitted in any form,
* in whole or in part, without the express written permission of
* Apple Inc.
*/
#ifdef __arm__
.text
.syntax unified
.text
.align 2
#define N 4096 /* size of ring buffer - must be power of 2 */
#define F 18 /* upper limit for match_length */
#define THRESHOLD 2 /* encode string into position and length if match_length is greater than this */
.globl _decompress_lzss_vec
_decompress_lzss_vec:
// int decompress_lzss(
// u_int8_t * dst, - r0
// u_int32_t dstlen, - r1 never used
// u_int8_t * src, - r2
// u_int32_t srclen) - r3 (used once to define srcend)
// {
// u_int8_t text_buf[N + F - 1];
// u_int8_t * dststart = dst;
// const u_int8_t * srcend = src + srclen;
// int i, j, k, r, c;
// unsigned int flags;
// dst = dststart;
mov r12, r0
// for (i = 0; i < N - F; i++) text_buf[i] = ' ';
push {r4-r8,r10-r11,lr}
mov lr, sp // store sp in lr, need to aligned
mov r4, #0xfffffff0
and sp, r4 // 16-byte alignment
mov r5, #32 // r5 = ' ';
sub sp, #N // it appears that text_buf[N] is sufficient, the extra F-1 bytes is never used
add r5, r5, r5, lsl #8 // r5 = ' ';
mov r4, sp // r1 -> text_buf[0]
add r5, r5, r5, lsl #16 // r5 = ' ';
mov r1, #(N-F+2) // do extra 2, so that 4080 is a multiple of 16 (255)
#if WITH_VFP
vdup.32 q0, r5
#else
mov r6, r5
mov r7, r5
mov r8, r5
#endif
loop0:
#if WITH_VFP
vst1.f32 {q0}, [r4,:128]!
#else
stmia r4!, {r5-r8}
#endif
subs r1, #16
bgt loop0
// r = N - F;
// flags = 0;
mov r4, #N
sub r11, r4, #1 // r11 = N-1, will be used for circular buffering in text_buf[r]
sub r4, #F // r = N-r
mov r1, #0 // flag = 0
#define dststart r12
#define dst r0
#define flags r1
#define src r2
#define srcend r3
#define r r4
#define c r5
#define i r6
#define j r7
#define t r8
#define k r10
#define text_buf sp
#define Nm1 r11
add srcend, src, r3
for_loop:
// for ( ; ; ) {
// if (((flags >>= 1) & 0x100) == 0) {
// if (src < srcend) c = *src++; else break;
// flags = c | 0xFF00;
// }
lsr flags, #1 // flags >>= 1
tst flags, #0x100 // (flags >>= 1) & 0x100)
bne skip1 // if != 0, skip the following
cmp src, srcend // src vs srcend
bge break_for_loop // if (src>=srcend) break_for_loop
ldrb c,[src],#1 // c = *src++
orr flags,c,#0x00ff00 // flags = c | 0xFF00;
skip1:
// if (flags & 1) {
tst flags, #1
beq skip2
// if (src < srcend) c = *src++; else break;
// *dst++ = c;
// text_buf[r++] = c;
// r &= (N - 1);
cmp src, srcend // src vs srcend
bge break_for_loop // if (src>=srcend) break_for_loop
ldrb c,[src],#1 // c = *src++
strb c,[text_buf,r] // text_buf[r]
add r, #1 // r++
strb c,[dst],#1 // *dst++ = c;
and r, Nm1 // r &= (N - 1);
b for_loop
// } else {
skip2:
// if (src < srcend) i = *src++; else break;
// if (src < srcend) j = *src++; else break;
#if (0)
cmp src, srcend // src vs srcend
bge break_for_loop // if (src>=srcend) break_for_loop
ldrb i,[src],#1 // i = *src++
cmp src, srcend // src vs srcend
bge break_for_loop // if (src>=srcend) break_for_loop
ldrb j,[src],#1 // j = *src++
#else
ldrh i,[src],#2 //
cmp src, srcend //
lsr j, i, #8
bgt break_for_loop
sub i, i, j, lsl #8
#endif
// i |= ((j & 0xF0) << 4);
// j = (j & 0x0F) + THRESHOLD;
and t, j, #0xf0 // j&0xf0
and j, #0x0f // j&0x0f
orr i, i, t, lsl #4 // i |= ((j & 0xF0) << 4);
add j, #THRESHOLD // j = (j & 0x0F) + THRESHOLD;
// for (k = 0; k <= j; k++) {
// c = text_buf[(i + k) & (N - 1)];
// *dst++ = c;
// text_buf[r++] = c;
// r &= (N - 1);
// }
mov k, #0
k_loop:
add t, i, k // (i+k)
and t, Nm1 // (i + k) & (N - 1)
ldrb c, [text_buf, t] // c = text_buf[(i + k) & (N - 1)];
strb c, [dst], #1 // *dst++ = c;
strb c, [text_buf, r] // text_buf[r] = c;
add r, #1 // r++
and r, Nm1 // r &= (N - 1);
add k, #1 // k++
cmp k, j // k vs j
ble k_loop
b for_loop
// }
// }
break_for_loop:
sub dst, dststart
mov sp, lr // lr stores sp before 16-byte alignment
pop {r4-r8,r10-r11,pc}
// return dst - dststart;
// }
#endif // __arm__