1/* 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <arm/proc_reg.h> 30 31.syntax unified 32.text 33.align 2 34 35 .globl _ovbcopy 36 .globl _memcpy 37 .globl _bcopy 38 .globl _memmove 39 40_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ 41_ovbcopy: 42 mov r3, r0 43 mov r0, r1 44 mov r1, r3 45 46_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ 47_memmove: /* void *memmove(void *dest, const void *src, size_t len); */ 48 /* check for zero len or if the pointers are the same */ 49 cmp r2, #0 50 cmpne r0, r1 51 bxeq lr 52 53 /* save r0 (return value), r4 (scratch), and r5 (scratch) */ 54 stmfd sp!, { r0, r4, r5, r7, lr } 55 add r7, sp, #12 56 57 /* check for overlap. r3 <- distance between src & dest */ 58 subhs r3, r0, r1 59 sublo r3, r1, r0 60 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ 61 blo Loverlap 62 63Lnormalforwardcopy: 64 /* are src and dest dissimilarly word aligned? */ 65 mov r12, r0, lsl #30 66 cmp r12, r1, lsl #30 67 bne Lnonwordaligned_forward 68 69 /* if len < 64, do a quick forward copy */ 70 cmp r2, #64 71 blt Lsmallforwardcopy 72 73 /* check for 16 byte src/dest unalignment */ 74 tst r0, #0xf 75 bne Lsimilarlyunaligned 76 77 /* check for 32 byte dest unalignment */ 78 tst r0, #(1<<4) 79 bne Lunaligned_32 80 81Lmorethan64_aligned: 82 /* save some more registers to use in the copy */ 83 stmfd sp!, { r6, r8, r10, r11 } 84 85 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ 86 sub r2, r2, #64 87 88L64loop: 89 /* copy 64 bytes at a time */ 90 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 91 pld [r1, #32] 92 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 93 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 94 subs r2, r2, #64 95 pld [r1, #32] 96 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 97 bge L64loop 98 99 /* restore the scratch registers we just saved */ 100 ldmfd sp!, { r6, r8, r10, r11 } 101 102 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ 103 adds r2, r2, #64 104 beq Lexit 105 106Llessthan64_aligned: 107 /* copy 16 bytes at a time until we have < 16 bytes */ 108 cmp r2, #16 109 ldmiage r1!, { r3, r4, r5, r12 } 110 stmiage r0!, { r3, r4, r5, r12 } 111 subsge r2, r2, #16 112 bgt Llessthan64_aligned 113 beq Lexit 114 115Llessthan16_aligned: 116 mov r2, r2, lsl #28 117 msr cpsr_f, r2 118 119 ldmiami r1!, { r2, r3 } 120 ldreq r4, [r1], #4 121 ldrhcs r5, [r1], #2 122 ldrbvs r12, [r1], #1 123 124 stmiami r0!, { r2, r3 } 125 streq r4, [r0], #4 126 strhcs r5, [r0], #2 127 strbvs r12, [r0], #1 128 b Lexit 129 130Lsimilarlyunaligned: 131 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ 132 mov r12, r0, lsl #28 133 rsb r12, r12, #0 134 msr cpsr_f, r12 135 136 ldrbvs r3, [r1], #1 137 ldrhcs r4, [r1], #2 138 ldreq r5, [r1], #4 139 140 strbvs r3, [r0], #1 141 strhcs r4, [r0], #2 142 streq r5, [r0], #4 143 144 ldmiami r1!, { r3, r4 } 145 stmiami r0!, { r3, r4 } 146 147 subs r2, r2, r12, lsr #28 148 beq Lexit 149 150Lunaligned_32: 151 /* bring up to dest 32 byte alignment */ 152 tst r0, #(1 << 4) 153 ldmiane r1!, { r3, r4, r5, r12 } 154 stmiane r0!, { r3, r4, r5, r12 } 155 subne r2, r2, #16 156 157 /* we should now be aligned, see what copy method we should use */ 158 cmp r2, #64 159 bge Lmorethan64_aligned 160 b Llessthan64_aligned 161 162Lbytewise2: 163 /* copy 2 bytes at a time */ 164 subs r2, r2, #2 165 166 ldrb r3, [r1], #1 167 ldrbpl r4, [r1], #1 168 169 strb r3, [r0], #1 170 strbpl r4, [r0], #1 171 172 bhi Lbytewise2 173 b Lexit 174 175Lbytewise: 176 /* simple bytewise forward copy */ 177 ldrb r3, [r1], #1 178 subs r2, r2, #1 179 strb r3, [r0], #1 180 bne Lbytewise 181 b Lexit 182 183Lsmallforwardcopy: 184 /* src and dest are word aligned similarly, less than 64 bytes to copy */ 185 cmp r2, #4 186 blt Lbytewise2 187 188 /* bytewise copy until word aligned */ 189 tst r1, #3 190Lwordalignloop: 191 ldrbne r3, [r1], #1 192 strbne r3, [r0], #1 193 subne r2, r2, #1 194 tstne r1, #3 195 bne Lwordalignloop 196 197 cmp r2, #16 198 bge Llessthan64_aligned 199 blt Llessthan16_aligned 200 201Loverlap: 202 /* src and dest overlap in some way, len > 0 */ 203 cmp r0, r1 /* if dest > src */ 204 bhi Loverlap_srclower 205 206Loverlap_destlower: 207 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ 208 cmp r3, #64 209 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ 210 211 cmp r3, #2 212 bge Lbytewise2 213 b Lbytewise 214 215 /* the following routines deal with having to copy in the reverse direction */ 216Loverlap_srclower: 217 /* src < dest, with overlap */ 218 219 /* src += len; dest += len; */ 220 add r0, r0, r2 221 add r1, r1, r2 222 223 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ 224 cmp r2, #64 /* less than 64 bytes to copy? */ 225 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ 226 blt Lbytewise_reverse 227 228 /* test of src and dest are nonword aligned differently */ 229 mov r3, r0, lsl #30 230 cmp r3, r1, lsl #30 231 bne Lbytewise_reverse 232 233 /* test if src and dest are non word aligned or dest is non 16 byte aligned */ 234 tst r0, #0xf 235 bne Lunaligned_reverse_similarly 236 237 /* test for dest 32 byte alignment */ 238 tst r0, #(1<<4) 239 bne Lunaligned_32_reverse_similarly 240 241 /* 64 byte reverse block copy, src and dest aligned */ 242Lmorethan64_aligned_reverse: 243 /* save some more registers to use in the copy */ 244 stmfd sp!, { r6, r8, r10, r11 } 245 246 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ 247 sub r2, r2, #64 248 249L64loop_reverse: 250 /* copy 64 bytes at a time */ 251 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 252#if ARCH_ARMv5 || ARCH_ARMv5e || ARCH_ARMv6 253 pld [r1, #-32] 254#endif 255 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 256 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 257 subs r2, r2, #64 258 pld [r1, #-32] 259 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 260 bge L64loop_reverse 261 262 /* restore the scratch registers we just saved */ 263 ldmfd sp!, { r6, r8, r10, r11 } 264 265 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ 266 adds r2, r2, #64 267 beq Lexit 268 269Lbytewise_reverse: 270 ldrb r3, [r1, #-1]! 271 strb r3, [r0, #-1]! 272 subs r2, r2, #1 273 bne Lbytewise_reverse 274 b Lexit 275 276Lunaligned_reverse_similarly: 277 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ 278 mov r12, r0, lsl #28 279 msr cpsr_f, r12 280 281 ldrbvs r3, [r1, #-1]! 282 ldrhcs r4, [r1, #-2]! 283 ldreq r5, [r1, #-4]! 284 285 strbvs r3, [r0, #-1]! 286 strhcs r4, [r0, #-2]! 287 streq r5, [r0, #-4]! 288 289 ldmdbmi r1!, { r3, r4 } 290 stmdbmi r0!, { r3, r4 } 291 292 subs r2, r2, r12, lsr #28 293 beq Lexit 294 295Lunaligned_32_reverse_similarly: 296 /* bring up to dest 32 byte alignment */ 297 tst r0, #(1 << 4) 298 ldmdbne r1!, { r3, r4, r5, r12 } 299 stmdbne r0!, { r3, r4, r5, r12 } 300 subne r2, r2, #16 301 302 /* we should now be aligned, see what copy method we should use */ 303 cmp r2, #64 304 bge Lmorethan64_aligned_reverse 305 b Lbytewise_reverse 306 307 /* the following routines deal with non word aligned copies */ 308Lnonwordaligned_forward: 309 cmp r2, #8 310 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ 311 312 /* bytewise copy until src word aligned */ 313 tst r1, #3 314Lwordalignloop2: 315 ldrbne r3, [r1], #1 316 strbne r3, [r0], #1 317 subne r2, r2, #1 318 tstne r1, #3 319 bne Lwordalignloop2 320 321 /* figure out how the src and dest are unaligned */ 322 and r3, r0, #3 323 cmp r3, #2 324 blt Lalign1_forward 325 beq Lalign2_forward 326 bgt Lalign3_forward 327 328Lalign1_forward: 329 /* the dest pointer is 1 byte off from src */ 330 mov r12, r2, lsr #2 /* number of words we should copy */ 331 sub r0, r0, #1 332 333 /* prime the copy */ 334 ldrb r4, [r0] /* load D[7:0] */ 335 336Lalign1_forward_loop: 337 ldr r3, [r1], #4 /* load S */ 338 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ 339 str r4, [r0], #4 /* save D */ 340 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ 341 subs r12, r12, #1 342 bne Lalign1_forward_loop 343 344 /* finish the copy off */ 345 strb r4, [r0], #1 /* save D[7:0] */ 346 347 ands r2, r2, #3 348 beq Lexit 349 b Lbytewise2 350 351Lalign2_forward: 352 /* the dest pointer is 2 bytes off from src */ 353 mov r12, r2, lsr #2 /* number of words we should copy */ 354 sub r0, r0, #2 355 356 /* prime the copy */ 357 ldrh r4, [r0] /* load D[15:0] */ 358 359Lalign2_forward_loop: 360 ldr r3, [r1], #4 /* load S */ 361 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ 362 str r4, [r0], #4 /* save D */ 363 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ 364 subs r12, r12, #1 365 bne Lalign2_forward_loop 366 367 /* finish the copy off */ 368 strh r4, [r0], #2 /* save D[15:0] */ 369 370 ands r2, r2, #3 371 beq Lexit 372 b Lbytewise2 373 374Lalign3_forward: 375 /* the dest pointer is 3 bytes off from src */ 376 mov r12, r2, lsr #2 /* number of words we should copy */ 377 sub r0, r0, #3 378 379 /* prime the copy */ 380 ldr r4, [r0] 381 and r4, r4, #0x00ffffff /* load D[24:0] */ 382 383Lalign3_forward_loop: 384 ldr r3, [r1], #4 /* load S */ 385 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ 386 str r4, [r0], #4 /* save D */ 387 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ 388 subs r12, r12, #1 389 bne Lalign3_forward_loop 390 391 /* finish the copy off */ 392 strh r4, [r0], #2 /* save D[15:0] */ 393 mov r4, r4, lsr #16 394 strb r4, [r0], #1 /* save D[23:16] */ 395 396 ands r2, r2, #3 397 beq Lexit 398 b Lbytewise2 399 400Lexit: 401 ldmfd sp!, { r0, r4, r5, r7, pc } 402 403