180 lines
4.3 KiB
ArmAsm
180 lines
4.3 KiB
ArmAsm
|
/*
|
||
|
* Copyright (C) 2013-2014 Apple Inc. All rights reserved.
|
||
|
*
|
||
|
* This document is the property of Apple Inc.
|
||
|
* It is considered confidential and proprietary.
|
||
|
*
|
||
|
* This document may not be reproduced or transmitted in any form,
|
||
|
* in whole or in part, without the express written permission of
|
||
|
* Apple Inc.
|
||
|
*/
|
||
|
|
||
|
#ifdef __arm64__
|
||
|
|
||
|
.text
|
||
|
.align 2
|
||
|
|
||
|
#define N 4096 /* size of ring buffer - must be power of 2 */
|
||
|
#define F 18 /* upper limit for match_length */
|
||
|
#define THRESHOLD 2 /* encode string into position and length if match_length is greater than this */
|
||
|
|
||
|
.globl _decompress_lzss_vec
|
||
|
_decompress_lzss_vec:
|
||
|
|
||
|
// int decompress_lzss(
|
||
|
// u_int8_t * dst, - x0
|
||
|
// u_int32_t dstlen, - x1 never used
|
||
|
// u_int8_t * src, - x2
|
||
|
// u_int32_t srclen) - x3 (used once to define srcend)
|
||
|
// {
|
||
|
// u_int8_t text_buf[N + F - 1];
|
||
|
// u_int8_t * dststart = dst;
|
||
|
// const u_int8_t * srcend = src + srclen;
|
||
|
// int i, j, k, r, c;
|
||
|
// unsigned int flags;
|
||
|
|
||
|
#define dststart x12
|
||
|
#define dst x0
|
||
|
#define flags w1
|
||
|
#define src x2
|
||
|
#define srcend x3
|
||
|
#define r w4
|
||
|
#define c w5
|
||
|
#define i w6
|
||
|
#define j w7
|
||
|
#define t w8
|
||
|
#define k w10
|
||
|
#define text_buf sp
|
||
|
#define Nm1 w11
|
||
|
#define spsave x9
|
||
|
|
||
|
// dst = dststart;
|
||
|
|
||
|
mov spsave, sp // store sp as we need to align sp
|
||
|
mov dststart, dst
|
||
|
|
||
|
// for (i = 0; i < N - F; i++) text_buf[i] = ' ';
|
||
|
|
||
|
lsr x1, spsave, #4 // 16-byte alignment
|
||
|
|
||
|
mov w5, #((32<<8)+32) // r5 = ' ';
|
||
|
lsl x1, x1, #4 // 16-byte alignment
|
||
|
sub sp, x1, #N // it appears that text_buf[N] is sufficient, the extra F-1 bytes is never used
|
||
|
add w5, w5, w5, lsl #16 // r5 = ' ';
|
||
|
mov x4, sp // r1 -> text_buf[0]
|
||
|
|
||
|
mov x1, #(N-F+2) // do extra 2, so that 4080 is a multiple of 16 (255)
|
||
|
|
||
|
dup.4s v0, w5
|
||
|
|
||
|
0:
|
||
|
st1.4s {v0}, [x4]
|
||
|
add x4, x4, #16
|
||
|
subs x1, x1, #16
|
||
|
b.gt 0b
|
||
|
|
||
|
// r = N - F;
|
||
|
// flags = 0;
|
||
|
|
||
|
|
||
|
mov w4, #N
|
||
|
sub w11, w4, #1 // r11 = N-1, will be used for circular buffering in text_buf[r]
|
||
|
sub w4, w4, #F // r = N-r
|
||
|
mov w1, #0 // flag = 0
|
||
|
|
||
|
add srcend, src, x3
|
||
|
|
||
|
L_for_loop:
|
||
|
|
||
|
// for ( ; ; ) {
|
||
|
|
||
|
// if (((flags >>= 1) & 0x100) == 0) {
|
||
|
// if (src < srcend) c = *src++; else break;
|
||
|
// flags = c | 0xFF00;
|
||
|
// }
|
||
|
|
||
|
lsr flags, flags, #1 // flags >>= 1
|
||
|
tst flags, #0x100 // (flags >>= 1) & 0x100)
|
||
|
b.ne L_skip1 // if != 0, skip the following
|
||
|
cmp src, srcend // src vs srcend
|
||
|
b.ge L_break_for_loop // if (src>=srcend) break_for_loop
|
||
|
ldrb c,[src],#1 // c = *src++
|
||
|
orr flags,c,#0x00ff00 // flags = c | 0xFF00;
|
||
|
|
||
|
L_skip1:
|
||
|
|
||
|
// if (flags & 1) {
|
||
|
tst flags, #1
|
||
|
b.eq L_skip2
|
||
|
// if (src < srcend) c = *src++; else break;
|
||
|
// *dst++ = c;
|
||
|
// text_buf[r++] = c;
|
||
|
// r &= (N - 1);
|
||
|
|
||
|
cmp src, srcend // src vs srcend
|
||
|
b.ge L_break_for_loop // if (src>=srcend) break_for_loop
|
||
|
ldrb c,[src],#1 // c = *src++
|
||
|
strb c,[text_buf,r, uxtw] // text_buf[r]
|
||
|
add r, r, #1 // r++
|
||
|
strb c,[dst],#1 // *dst++ = c;
|
||
|
and r, r, Nm1 // r &= (N - 1);
|
||
|
b L_for_loop
|
||
|
|
||
|
// } else {
|
||
|
L_skip2:
|
||
|
// if (src < srcend) i = *src++; else break;
|
||
|
// if (src < srcend) j = *src++; else break;
|
||
|
|
||
|
ldrh i,[src],#2 //
|
||
|
cmp src, srcend //
|
||
|
lsr j, i, #8
|
||
|
b.gt L_break_for_loop
|
||
|
sub i, i, j, lsl #8
|
||
|
|
||
|
// i |= ((j & 0xF0) << 4);
|
||
|
// j = (j & 0x0F) + THRESHOLD;
|
||
|
|
||
|
and t, j, #0xf0 // j&0xf0
|
||
|
and j, j, #0x0f // j&0x0f
|
||
|
orr i, i, t, lsl #4 // i |= ((j & 0xF0) << 4);
|
||
|
add j, j, #THRESHOLD // j = (j & 0x0F) + THRESHOLD;
|
||
|
|
||
|
// for (k = 0; k <= j; k++) {
|
||
|
// c = text_buf[(i + k) & (N - 1)];
|
||
|
// *dst++ = c;
|
||
|
// text_buf[r++] = c;
|
||
|
// r &= (N - 1);
|
||
|
// }
|
||
|
mov k, #0
|
||
|
|
||
|
0:
|
||
|
add t, i, k // (i+k)
|
||
|
and t, t, Nm1 // (i + k) & (N - 1)
|
||
|
ldrb c, [text_buf, t, uxtw] // c = text_buf[(i + k) & (N - 1)];
|
||
|
strb c, [dst], #1 // *dst++ = c;
|
||
|
strb c, [text_buf, r, uxtw] // text_buf[r] = c;
|
||
|
add r, r, #1 // r++
|
||
|
and r, r, Nm1 // r &= (N - 1);
|
||
|
add k, k, #1 // k++
|
||
|
cmp k, j // k vs j
|
||
|
b.le 0b
|
||
|
b L_for_loop
|
||
|
|
||
|
// }
|
||
|
// }
|
||
|
|
||
|
L_break_for_loop:
|
||
|
sub x0, dst, dststart
|
||
|
|
||
|
mov sp, spsave // stores sp before 16-byte alignment
|
||
|
|
||
|
ret lr
|
||
|
|
||
|
|
||
|
|
||
|
// return dst - dststart;
|
||
|
// }
|
||
|
|
||
|
#endif // __arm64__
|
||
|
|