/* * Copyright (C) 2006 Apple Computer, Inc. All rights reserved. * * This document is the property of Apple Computer, Inc. * It is considered confidential and proprietary. * * This document may not be reproduced or transmitted in any form, * in whole or in part, without the express written permission of * Apple Computer, Inc. */ /* * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based * cores. * * The algorithm is to align the destination pointer on a 32 byte boundary and then * blast data 64 bytes at a time, in two stores of 32 bytes per loop. */ .text .align 2 .globl _memset /* void *memset(void *ptr, int c, size_t len); */ _memset: /* move len into r1, unpack c into r2 */ mov r3, r2 and r1, r1, #0xff orr r1, r1, r1, lsl #8 orr r2, r1, r1, lsl #16 mov r1, r3 b Lbzeroengine .globl _bzero /* void bzero(void *ptr, size_t len); */ _bzero: /* zero out r2 so we can be just like memset(0) */ mov r2, #0 Lbzeroengine: /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */ mov r12, r0 /* copy r2 into r3 for 64-bit stores */ mov r3, r2 /* check for zero len */ cmp r1, #0 bxeq lr /* fall back to a bytewise store for less than 32 bytes */ cmp r1, #32 blt L_bytewise /* check for 32 byte unaligned ptr */ tst r12, #0x1f bne L_unaligned /* make sure we have more than 64 bytes to zero */ cmp r1, #64 blt L_lessthan64aligned /* >= 64 bytes of len, 32 byte aligned */ L_64ormorealigned: /* we need some registers, avoid r9 */ stmfd sp!, { r4-r8, r10 } mov r4, r2 mov r5, r2 mov r6, r2 mov r7, r2 mov r8, r2 mov r10, r2 /* pre-subtract 64 from the len to avoid an extra compare in the loop */ sub r1, r1, #64 L_64loop: stmia r12!, { r2-r8, r10 } subs r1, r1, #64 stmia r12!, { r2-r8, r10 } bge L_64loop /* restore the saved regs */ ldmfd sp!, { r4-r8, r10 } /* check for completion (had previously subtracted an extra 64 from len) */ adds r1, r1, #64 bxeq lr L_lessthan64aligned: /* do we have 16 or more bytes left */ cmp r1, #16 stmiage r12!, { r2-r3 } stmiage r12!, { r2-r3 } subsge r1, r1, #16 bgt L_lessthan64aligned bxeq lr L_lessthan16aligned: /* store 0 to 15 bytes */ mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */ msr cpsr_f, r1 stmiami r12!, { r2-r3 } /* n is set, store 8 bytes */ streq r2, [r12], #4 /* z is set, store 4 bytes */ strhcs r2, [r12], #2 /* c is set, store 2 bytes */ strbvs r2, [r12], #1 /* v is set, store 1 byte */ bx lr L_bytewise: /* bytewise copy, 2 bytes at a time, alignment not guaranteed */ subs r1, r1, #2 strb r2, [r12], #1 strbpl r2, [r12], #1 bhi L_bytewise bx lr L_unaligned: /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */ mov r3, r12, lsl #28 rsb r3, r3, #0x00000000 msr cpsr_f, r3 strbvs r2, [r12], #1 /* v is set, unaligned in the 1s column */ strhcs r2, [r12], #2 /* c is set, unaligned in the 2s column */ streq r2, [r12], #4 /* z is set, unaligned in the 4s column */ strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */ strmi r2, [r12], #4 subs r1, r1, r3, lsr #28 bxeq lr /* we had previously trashed r3, restore it */ mov r3, r2 /* now make sure we're 32 byte aligned */ tst r12, #(1 << 4) stmiane r12!, { r2-r3 } stmiane r12!, { r2-r3 } subsne r1, r1, #16 /* we're now aligned, check for >= 64 bytes left */ cmp r1, #64 bge L_64ormorealigned b L_lessthan64aligned