/* * Copyright (C) 2006 Apple Computer, Inc. All rights reserved. * * This document is the property of Apple Computer, Inc. * It is considered confidential and proprietary. * * This document may not be reproduced or transmitted in any form, * in whole or in part, without the express written permission of * Apple Computer, Inc. */ .text .align 2 .globl _memcpy .globl _bcopy .globl _memmove _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ mov r3, r0 mov r0, r1 mov r1, r3 _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ _memmove: /* void *memmove(void *dest, const void *src, size_t len); */ /* check for zero len or if the pointers are the same */ cmp r2, #0 cmpne r0, r1 bxeq lr /* save r0 (return value), r4 (scratch), and r5 (scratch) */ stmfd sp!, { r0, r4, r5 } /* check for overlap. r3 <- distance between src & dest */ subhs r3, r0, r1 sublo r3, r1, r0 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ blo Loverlap Lnormalforwardcopy: /* are src and dest dissimilarly word aligned? */ mov r12, r0, lsl #30 cmp r12, r1, lsl #30 bne Lnonwordaligned_forward /* if len < 64, do a quick forward copy */ cmp r2, #64 blt Lsmallforwardcopy /* check for 16 byte src/dest unalignment */ tst r0, #0xf bne Lsimilarlyunaligned /* check for 32 byte dest unalignment */ tst r0, #(1<<4) bne Lunaligned_32 Lmorethan64_aligned: /* save some more registers to use in the copy */ stmfd sp!, { r6, r7, r10, r11 } /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ sub r2, r2, #64 L64loop: /* copy 64 bytes at a time */ ldmia r1!, { r3, r4, r5, r6, r7, r10, r11, r12 } #if ARCH_ARMv7 pld [r1, #32] #endif stmia r0!, { r3, r4, r5, r6, r7, r10, r11, r12 } ldmia r1!, { r3, r4, r5, r6, r7, r10, r11, r12 } subs r2, r2, #64 #if ARCH_ARMv7 pld [r1, #32] #endif stmia r0!, { r3, r4, r5, r6, r7, r10, r11, r12 } bge L64loop /* restore the scratch registers we just saved */ ldmfd sp!, { r6, r7, r10, r11 } /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ adds r2, r2, #64 beq Lexit Llessthan64_aligned: /* copy 16 bytes at a time until we have < 16 bytes */ cmp r2, #16 ldmiage r1!, { r3, r4, r5, r12 } stmiage r0!, { r3, r4, r5, r12 } subsge r2, r2, #16 bgt Llessthan64_aligned beq Lexit Llessthan16_aligned: mov r2, r2, lsl #28 msr cpsr_f, r2 ldmiami r1!, { r2, r3 } ldreq r4, [r1], #4 ldrhcs r5, [r1], #2 ldrbvs r12, [r1], #1 stmiami r0!, { r2, r3 } streq r4, [r0], #4 strhcs r5, [r0], #2 strbvs r12, [r0], #1 b Lexit Lsimilarlyunaligned: /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ mov r12, r0, lsl #28 rsb r12, r12, #0 msr cpsr_f, r12 ldrbvs r3, [r1], #1 ldrhcs r4, [r1], #2 ldreq r5, [r1], #4 strbvs r3, [r0], #1 strhcs r4, [r0], #2 streq r5, [r0], #4 ldmiami r1!, { r3, r4 } stmiami r0!, { r3, r4 } subs r2, r2, r12, lsr #28 beq Lexit Lunaligned_32: /* bring up to dest 32 byte alignment */ tst r0, #(1 << 4) ldmiane r1!, { r3, r4, r5, r12 } stmiane r0!, { r3, r4, r5, r12 } subne r2, r2, #16 /* we should now be aligned, see what copy method we should use */ cmp r2, #64 bge Lmorethan64_aligned b Llessthan64_aligned Lbytewise2: /* copy 2 bytes at a time */ subs r2, r2, #2 ldrb r3, [r1], #1 ldrbpl r4, [r1], #1 strb r3, [r0], #1 strbpl r4, [r0], #1 bhi Lbytewise2 b Lexit Lbytewise: /* simple bytewise forward copy */ ldrb r3, [r1], #1 subs r2, r2, #1 strb r3, [r0], #1 bne Lbytewise b Lexit Lsmallforwardcopy: /* src and dest are word aligned similarly, less than 64 bytes to copy */ cmp r2, #4 blt Lbytewise2 /* bytewise copy until word aligned */ tst r1, #3 Lwordalignloop: ldrbne r3, [r1], #1 strbne r3, [r0], #1 subne r2, r2, #1 tstne r1, #3 bne Lwordalignloop cmp r2, #16 bge Llessthan64_aligned blt Llessthan16_aligned Loverlap: /* src and dest overlap in some way, len > 0 */ cmp r0, r1 /* if dest > src */ bhi Loverlap_srclower Loverlap_destlower: /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ cmp r3, #64 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ cmp r3, #2 bge Lbytewise2 b Lbytewise /* the following routines deal with having to copy in the reverse direction */ Loverlap_srclower: /* src < dest, with overlap */ /* src += len; dest += len; */ add r0, r0, r2 add r1, r1, r2 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ cmp r2, #64 /* less than 64 bytes to copy? */ cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ blt Lbytewise_reverse /* test of src and dest are nonword aligned differently */ mov r3, r0, lsl #30 cmp r3, r1, lsl #30 bne Lbytewise_reverse /* test if src and dest are non word aligned or dest is non 16 byte aligned */ tst r0, #0xf bne Lunaligned_reverse_similarly /* test for dest 32 byte alignment */ tst r0, #(1<<4) bne Lunaligned_32_reverse_similarly /* 64 byte reverse block copy, src and dest aligned */ Lmorethan64_aligned_reverse: /* save some more registers to use in the copy */ stmfd sp!, { r6, r7, r10, r11 } /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ sub r2, r2, #64 L64loop_reverse: /* copy 64 bytes at a time */ ldmdb r1!, { r3, r4, r5, r6, r7, r10, r11, r12 } #if ARCH_ARMv7 pld [r1, #-32] #endif stmdb r0!, { r3, r4, r5, r6, r7, r10, r11, r12 } ldmdb r1!, { r3, r4, r5, r6, r7, r10, r11, r12 } subs r2, r2, #64 #if ARCH_ARMv7 pld [r1, #-32] #endif stmdb r0!, { r3, r4, r5, r6, r7, r10, r11, r12 } bge L64loop_reverse /* restore the scratch registers we just saved */ ldmfd sp!, { r6, r7, r10, r11 } /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ adds r2, r2, #64 beq Lexit Lbytewise_reverse: ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! subs r2, r2, #1 bne Lbytewise_reverse b Lexit Lunaligned_reverse_similarly: /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ mov r12, r0, lsl #28 msr cpsr_f, r12 ldrbvs r3, [r1, #-1]! ldrhcs r4, [r1, #-2]! ldreq r5, [r1, #-4]! strbvs r3, [r0, #-1]! strhcs r4, [r0, #-2]! streq r5, [r0, #-4]! ldmdbmi r1!, { r3, r4 } stmdbmi r0!, { r3, r4 } subs r2, r2, r12, lsr #28 beq Lexit Lunaligned_32_reverse_similarly: /* bring up to dest 32 byte alignment */ tst r0, #(1 << 4) ldmdbne r1!, { r3, r4, r5, r12 } stmdbne r0!, { r3, r4, r5, r12 } subne r2, r2, #16 /* we should now be aligned, see what copy method we should use */ cmp r2, #64 bge Lmorethan64_aligned_reverse b Lbytewise_reverse /* the following routines deal with non word aligned copies */ Lnonwordaligned_forward: cmp r2, #8 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ /* bytewise copy until src word aligned */ tst r1, #3 Lwordalignloop2: ldrbne r3, [r1], #1 strbne r3, [r0], #1 subne r2, r2, #1 tstne r1, #3 bne Lwordalignloop2 /* figure out how the src and dest are unaligned */ and r3, r0, #3 cmp r3, #2 blt Lalign1_forward beq Lalign2_forward bgt Lalign3_forward Lalign1_forward: /* the dest pointer is 1 byte off from src */ mov r12, r2, lsr #2 /* number of words we should copy */ sub r0, r0, #1 /* prime the copy */ ldrb r4, [r0] /* load D[7:0] */ Lalign1_forward_loop: ldr r3, [r1], #4 /* load S */ orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ str r4, [r0], #4 /* save D */ mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ subs r12, r12, #1 bne Lalign1_forward_loop /* finish the copy off */ strb r4, [r0], #1 /* save D[7:0] */ ands r2, r2, #3 beq Lexit b Lbytewise2 Lalign2_forward: /* the dest pointer is 2 bytes off from src */ mov r12, r2, lsr #2 /* number of words we should copy */ sub r0, r0, #2 /* prime the copy */ ldrh r4, [r0] /* load D[15:0] */ Lalign2_forward_loop: ldr r3, [r1], #4 /* load S */ orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ str r4, [r0], #4 /* save D */ mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ subs r12, r12, #1 bne Lalign2_forward_loop /* finish the copy off */ strh r4, [r0], #2 /* save D[15:0] */ ands r2, r2, #3 beq Lexit b Lbytewise2 Lalign3_forward: /* the dest pointer is 3 bytes off from src */ mov r12, r2, lsr #2 /* number of words we should copy */ sub r0, r0, #3 /* prime the copy */ ldr r4, [r0] and r4, r4, #0x00ffffff /* load D[24:0] */ Lalign3_forward_loop: ldr r3, [r1], #4 /* load S */ orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ str r4, [r0], #4 /* save D */ mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ subs r12, r12, #1 bne Lalign3_forward_loop /* finish the copy off */ strh r4, [r0], #2 /* save D[15:0] */ mov r4, r4, lsr #16 strb r4, [r0], #1 /* save D[23:16] */ ands r2, r2, #3 beq Lexit b Lbytewise2 Lexit: ldmfd sp!, { r0, r4, r5 } bx lr