204 lines
4.7 KiB
ArmAsm
204 lines
4.7 KiB
ArmAsm
/*
|
|
* Copyright (C) 2011-2012, 2014 Apple Inc. All rights reserved.
|
|
*
|
|
* This document is the property of Apple Inc.
|
|
* It is considered confidential and proprietary.
|
|
*
|
|
* This document may not be reproduced or transmitted in any form,
|
|
* in whole or in part, without the express written permission of
|
|
* Apple Inc.
|
|
*/
|
|
|
|
#ifdef __arm__
|
|
|
|
.text
|
|
.syntax unified
|
|
|
|
.text
|
|
.align 2
|
|
|
|
#define N 4096 /* size of ring buffer - must be power of 2 */
|
|
#define F 18 /* upper limit for match_length */
|
|
#define THRESHOLD 2 /* encode string into position and length if match_length is greater than this */
|
|
|
|
.globl _decompress_lzss_vec
|
|
_decompress_lzss_vec:
|
|
|
|
// int decompress_lzss(
|
|
// u_int8_t * dst, - r0
|
|
// u_int32_t dstlen, - r1 never used
|
|
// u_int8_t * src, - r2
|
|
// u_int32_t srclen) - r3 (used once to define srcend)
|
|
// {
|
|
// u_int8_t text_buf[N + F - 1];
|
|
// u_int8_t * dststart = dst;
|
|
// const u_int8_t * srcend = src + srclen;
|
|
// int i, j, k, r, c;
|
|
// unsigned int flags;
|
|
|
|
// dst = dststart;
|
|
|
|
mov r12, r0
|
|
|
|
// for (i = 0; i < N - F; i++) text_buf[i] = ' ';
|
|
|
|
push {r4-r8,r10-r11,lr}
|
|
|
|
mov lr, sp // store sp in lr, need to aligned
|
|
mov r4, #0xfffffff0
|
|
and sp, r4 // 16-byte alignment
|
|
|
|
mov r5, #32 // r5 = ' ';
|
|
sub sp, #N // it appears that text_buf[N] is sufficient, the extra F-1 bytes is never used
|
|
|
|
add r5, r5, r5, lsl #8 // r5 = ' ';
|
|
mov r4, sp // r1 -> text_buf[0]
|
|
add r5, r5, r5, lsl #16 // r5 = ' ';
|
|
|
|
mov r1, #(N-F+2) // do extra 2, so that 4080 is a multiple of 16 (255)
|
|
|
|
#if WITH_VFP
|
|
vdup.32 q0, r5
|
|
#else
|
|
mov r6, r5
|
|
mov r7, r5
|
|
mov r8, r5
|
|
#endif
|
|
|
|
loop0:
|
|
#if WITH_VFP
|
|
vst1.f32 {q0}, [r4,:128]!
|
|
#else
|
|
stmia r4!, {r5-r8}
|
|
#endif
|
|
subs r1, #16
|
|
bgt loop0
|
|
|
|
// r = N - F;
|
|
// flags = 0;
|
|
|
|
|
|
mov r4, #N
|
|
sub r11, r4, #1 // r11 = N-1, will be used for circular buffering in text_buf[r]
|
|
sub r4, #F // r = N-r
|
|
mov r1, #0 // flag = 0
|
|
|
|
#define dststart r12
|
|
#define dst r0
|
|
#define flags r1
|
|
#define src r2
|
|
#define srcend r3
|
|
#define r r4
|
|
#define c r5
|
|
#define i r6
|
|
#define j r7
|
|
#define t r8
|
|
#define k r10
|
|
#define text_buf sp
|
|
#define Nm1 r11
|
|
|
|
add srcend, src, r3
|
|
|
|
for_loop:
|
|
|
|
// for ( ; ; ) {
|
|
|
|
// if (((flags >>= 1) & 0x100) == 0) {
|
|
// if (src < srcend) c = *src++; else break;
|
|
// flags = c | 0xFF00;
|
|
// }
|
|
|
|
lsr flags, #1 // flags >>= 1
|
|
tst flags, #0x100 // (flags >>= 1) & 0x100)
|
|
bne skip1 // if != 0, skip the following
|
|
cmp src, srcend // src vs srcend
|
|
bge break_for_loop // if (src>=srcend) break_for_loop
|
|
ldrb c,[src],#1 // c = *src++
|
|
orr flags,c,#0x00ff00 // flags = c | 0xFF00;
|
|
|
|
skip1:
|
|
|
|
// if (flags & 1) {
|
|
tst flags, #1
|
|
beq skip2
|
|
// if (src < srcend) c = *src++; else break;
|
|
// *dst++ = c;
|
|
// text_buf[r++] = c;
|
|
// r &= (N - 1);
|
|
|
|
cmp src, srcend // src vs srcend
|
|
bge break_for_loop // if (src>=srcend) break_for_loop
|
|
ldrb c,[src],#1 // c = *src++
|
|
strb c,[text_buf,r] // text_buf[r]
|
|
add r, #1 // r++
|
|
strb c,[dst],#1 // *dst++ = c;
|
|
and r, Nm1 // r &= (N - 1);
|
|
b for_loop
|
|
|
|
// } else {
|
|
skip2:
|
|
// if (src < srcend) i = *src++; else break;
|
|
// if (src < srcend) j = *src++; else break;
|
|
|
|
#if (0)
|
|
cmp src, srcend // src vs srcend
|
|
bge break_for_loop // if (src>=srcend) break_for_loop
|
|
ldrb i,[src],#1 // i = *src++
|
|
cmp src, srcend // src vs srcend
|
|
bge break_for_loop // if (src>=srcend) break_for_loop
|
|
ldrb j,[src],#1 // j = *src++
|
|
#else
|
|
ldrh i,[src],#2 //
|
|
cmp src, srcend //
|
|
lsr j, i, #8
|
|
bgt break_for_loop
|
|
sub i, i, j, lsl #8
|
|
|
|
#endif
|
|
|
|
// i |= ((j & 0xF0) << 4);
|
|
// j = (j & 0x0F) + THRESHOLD;
|
|
|
|
and t, j, #0xf0 // j&0xf0
|
|
and j, #0x0f // j&0x0f
|
|
orr i, i, t, lsl #4 // i |= ((j & 0xF0) << 4);
|
|
add j, #THRESHOLD // j = (j & 0x0F) + THRESHOLD;
|
|
|
|
// for (k = 0; k <= j; k++) {
|
|
// c = text_buf[(i + k) & (N - 1)];
|
|
// *dst++ = c;
|
|
// text_buf[r++] = c;
|
|
// r &= (N - 1);
|
|
// }
|
|
mov k, #0
|
|
|
|
k_loop:
|
|
add t, i, k // (i+k)
|
|
and t, Nm1 // (i + k) & (N - 1)
|
|
ldrb c, [text_buf, t] // c = text_buf[(i + k) & (N - 1)];
|
|
strb c, [dst], #1 // *dst++ = c;
|
|
strb c, [text_buf, r] // text_buf[r] = c;
|
|
add r, #1 // r++
|
|
and r, Nm1 // r &= (N - 1);
|
|
add k, #1 // k++
|
|
cmp k, j // k vs j
|
|
ble k_loop
|
|
b for_loop
|
|
|
|
// }
|
|
// }
|
|
|
|
break_for_loop:
|
|
sub dst, dststart
|
|
|
|
mov sp, lr // lr stores sp before 16-byte alignment
|
|
pop {r4-r8,r10-r11,pc}
|
|
|
|
|
|
|
|
// return dst - dststart;
|
|
// }
|
|
|
|
#endif // __arm__
|
|
|