/* * Copyright (c) 2008 ARM Ltd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "arm_asm.h" #include <_ansi.h> #include <string.h> #ifdef __thumb2__ #define magic1(REG) "#0x01010101" #define magic2(REG) "#0x80808080" #else #define magic1(REG) #REG #define magic2(REG) #REG ", lsl #7" #endif char* __attribute__((naked)) strcpy (char* dst, const char* src) { asm ( ".syntax unified\n\t" #if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ (defined (__thumb__) && !defined (__thumb2__))) #ifdef _ISA_ARM_7 "pld [r1]\n\t" #endif "eor r2, r0, r1\n\t" "mov ip, r0\n\t" "tst r2, #3\n\t" "bne 4f\n\t" "tst r1, #3\n\t" "bne 3f\n" "5:\n\t" #ifndef __thumb2__ "str r5, [sp, #-4]!\n\t" "mov r5, #0x01\n\t" "orr r5, r5, r5, lsl #8\n\t" "orr r5, r5, r5, lsl #16\n\t" #endif "str r4, [sp, #-4]!\n\t" "tst r1, #4\n\t" "ldr r3, [r1], #4\n\t" "beq 2f\n\t" "sub r2, r3, "magic1(r5)"\n\t" "bics r2, r2, r3\n\t" "tst r2, "magic2(r5)"\n\t" "itt eq\n\t" "streq r3, [ip], #4\n\t" "ldreq r3, [r1], #4\n" "bne 1f\n\t" /* Inner loop. We now know that r1 is 64-bit aligned, so we can safely fetch up to two words. This allows us to avoid load stalls. */ ".p2align 2\n" "2:\n\t" #ifdef _ISA_ARM_7 "pld [r1, #8]\n\t" #endif "ldr r4, [r1], #4\n\t" "sub r2, r3, "magic1(r5)"\n\t" "bics r2, r2, r3\n\t" "tst r2, "magic2(r5)"\n\t" "sub r2, r4, "magic1(r5)"\n\t" "bne 1f\n\t" "str r3, [ip], #4\n\t" "bics r2, r2, r4\n\t" "tst r2, "magic2(r5)"\n\t" "itt eq\n\t" "ldreq r3, [r1], #4\n\t" "streq r4, [ip], #4\n\t" "beq 2b\n\t" "mov r3, r4\n" "1:\n\t" #ifdef __ARMEB__ "rors r3, r3, #24\n\t" #endif "strb r3, [ip], #1\n\t" "tst r3, #0xff\n\t" #ifdef __ARMEL__ "ror r3, r3, #8\n\t" #endif "bne 1b\n\t" "ldr r4, [sp], #4\n\t" #ifndef __thumb2__ "ldr r5, [sp], #4\n\t" #endif "bx lr\n" /* Strings have the same offset from word alignment, but it's not zero. */ "3:\n\t" "tst r1, #1\n\t" "beq 1f\n\t" "ldrb r2, [r1], #1\n\t" "strb r2, [ip], #1\n\t" "cmp r2, #0\n\t" "it eq\n" "bxeq lr\n" "1:\n\t" "tst r1, #2\n\t" "beq 5b\n\t" "ldrh r2, [r1], #2\n\t" #ifdef __ARMEB__ "tst r2, #0xff00\n\t" "iteet ne\n\t" "strhne r2, [ip], #2\n\t" "lsreq r2, r2, #8\n\t" "strbeq r2, [ip]\n\t" "tstne r2, #0xff\n\t" #else "tst r2, #0xff\n\t" "itet ne\n\t" "strhne r2, [ip], #2\n\t" "strbeq r2, [ip]\n\t" "tstne r2, #0xff00\n\t" #endif "bne 5b\n\t" "bx lr\n" /* src and dst do not have a common word-alignement. Fall back to byte copying. */ "4:\n\t" "ldrb r2, [r1], #1\n\t" "strb r2, [ip], #1\n\t" "cmp r2, #0\n\t" "bne 4b\n\t" "bx lr\n\t" #elif !defined (__thumb__) || defined (__thumb2__) "mov r3, r0\n\t" "1:\n\t" "ldrb r2, [r1], #1\n\t" "strb r2, [r3], #1\n\t" "cmp r2, #0\n\t" "bne 1b\n\t" "bx lr\n\t" #else "movs r3, r0\n\t" "1:\n\t" "ldrb r2, [r1]\n\t" "adds r1, #1\n\t" "strb r2, [r3]\n\t" "adds r3, #1\n\t" "cmp r2, #0\n\t" "bne 1b\n\t" "bx lr\n\t" #endif ); }