mirror of
https://github.com/Atmosphere-NX/Atmosphere.git
synced 2025-05-29 05:55:16 -04:00
kernel_ldr: use unoptimized memcpy before mmu bringup (closes #1102)
Before the MMU is up, all reads/writes must be aligned; the optimized memcpy implementation does not guarantee all reads/writes it performs are aligned. This commit splits the libc impl to be separate for kernel/kernel_ldr, and so now only kernel will use the optimized impl. This is safe, as the MMU is brought up before kernel begins executing.
This commit is contained in:
parent
a82914d58e
commit
29358dc593
10 changed files with 723 additions and 0 deletions
31
mesosphere/kernel/source/libc/arch/arm64/asmdefs.h
Normal file
31
mesosphere/kernel/source/libc/arch/arm64/asmdefs.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Macros for asm code.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef _ASMDEFS_H
|
||||
#define _ASMDEFS_H
|
||||
|
||||
#define ENTRY_ALIGN(name, alignment) \
|
||||
.global name; \
|
||||
.type name,%function; \
|
||||
.align alignment; \
|
||||
name: \
|
||||
.cfi_startproc;
|
||||
|
||||
#define ENTRY(name) ENTRY_ALIGN(name, 6)
|
||||
|
||||
#define ENTRY_ALIAS(name) \
|
||||
.global name; \
|
||||
.type name,%function; \
|
||||
name:
|
||||
|
||||
#define END(name) \
|
||||
.cfi_endproc; \
|
||||
.size name, .-name;
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
#endif
|
133
mesosphere/kernel/source/libc/arch/arm64/memcmp.arch.arm64.s
Normal file
133
mesosphere/kernel/source/libc/arch/arm64/memcmp.arch.arm64.s
Normal file
|
@ -0,0 +1,133 @@
|
|||
/* memcmp - compare memory
|
||||
*
|
||||
* Copyright (c) 2013, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*/
|
||||
|
||||
#include "asmdefs.h"
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result w0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data1h x4
|
||||
#define data2 x5
|
||||
#define data2w w5
|
||||
#define data2h x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
|
||||
ENTRY (memcmp)
|
||||
subs limit, limit, 8
|
||||
b.lo L(less8)
|
||||
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
b.ne L(return)
|
||||
|
||||
subs limit, limit, 8
|
||||
b.gt L(more16)
|
||||
|
||||
ldr data1, [src1, limit]
|
||||
ldr data2, [src2, limit]
|
||||
b L(return)
|
||||
|
||||
L(more16):
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
|
||||
strings. */
|
||||
subs limit, limit, 16
|
||||
b.ls L(last_bytes)
|
||||
|
||||
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
|
||||
try to align, so limit it only to strings larger than 128 bytes. */
|
||||
cmp limit, 96
|
||||
b.ls L(loop16)
|
||||
|
||||
/* Align src1 and adjust src2 with bytes not yet done. */
|
||||
and tmp1, src1, 15
|
||||
add limit, limit, tmp1
|
||||
sub src1, src1, tmp1
|
||||
sub src2, src2, tmp1
|
||||
|
||||
/* Loop performing 16 bytes per iteration using aligned src1.
|
||||
Limit is pre-decremented by 16 and must be larger than zero.
|
||||
Exit if <= 16 bytes left to do or if the data is not equal. */
|
||||
.p2align 4
|
||||
L(loop16):
|
||||
ldp data1, data1h, [src1], 16
|
||||
ldp data2, data2h, [src2], 16
|
||||
subs limit, limit, 16
|
||||
ccmp data1, data2, 0, hi
|
||||
ccmp data1h, data2h, 0, eq
|
||||
b.eq L(loop16)
|
||||
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Compare last 1-16 bytes using unaligned access. */
|
||||
L(last_bytes):
|
||||
add src1, src1, limit
|
||||
add src2, src2, limit
|
||||
ldp data1, data1h, [src1]
|
||||
ldp data2, data2h, [src2]
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
|
||||
/* Compare data bytes and set return value to 0, -1 or 1. */
|
||||
L(return):
|
||||
#ifndef __AARCH64EB__
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
cmp data1, data2
|
||||
L(ret_eq):
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Compare up to 8 bytes. Limit is [-8..-1]. */
|
||||
L(less8):
|
||||
adds limit, limit, 4
|
||||
b.lo L(less4)
|
||||
ldr data1w, [src1], 4
|
||||
ldr data2w, [src2], 4
|
||||
cmp data1w, data2w
|
||||
b.ne L(return)
|
||||
sub limit, limit, 4
|
||||
L(less4):
|
||||
adds limit, limit, 4
|
||||
beq L(ret_eq)
|
||||
L(byte_loop):
|
||||
ldrb data1w, [src1], 1
|
||||
ldrb data2w, [src2], 1
|
||||
subs limit, limit, 1
|
||||
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
|
||||
b.eq L(byte_loop)
|
||||
sub result, data1w, data2w
|
||||
ret
|
||||
|
||||
END (memcmp)
|
239
mesosphere/kernel/source/libc/arch/arm64/memcpy.arch.arm64.s
Normal file
239
mesosphere/kernel/source/libc/arch/arm64/memcpy.arch.arm64.s
Normal file
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* memcpy - copy memory area
|
||||
*
|
||||
* Copyright (c) 2012-2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asmdefs.h"
|
||||
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define srcend x4
|
||||
#define dstend x5
|
||||
#define A_l x6
|
||||
#define A_lw w6
|
||||
#define A_h x7
|
||||
#define B_l x8
|
||||
#define B_lw w8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_lw w10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l x14
|
||||
#define E_h x15
|
||||
#define F_l x16
|
||||
#define F_h x17
|
||||
#define G_l count
|
||||
#define G_h dst
|
||||
#define H_l src
|
||||
#define H_h srcend
|
||||
#define tmp1 x14
|
||||
|
||||
/* This implementation handles overlaps and supports both memcpy and memmove
|
||||
from a single entry point. It uses unaligned accesses and branchless
|
||||
sequences to keep the code small, simple and improve performance.
|
||||
|
||||
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
|
||||
copies of up to 128 bytes, and large copies. The overhead of the overlap
|
||||
check is negligible since it is only required for large copies.
|
||||
|
||||
Large copies use a software pipelined loop processing 64 bytes per iteration.
|
||||
The destination pointer is 16-byte aligned to minimize unaligned accesses.
|
||||
The loop tail is handled by always copying 64 bytes from the end.
|
||||
*/
|
||||
|
||||
ENTRY (memcpy)
|
||||
ENTRY_ALIAS (memmove)
|
||||
add srcend, src, count
|
||||
add dstend, dstin, count
|
||||
cmp count, 128
|
||||
b.hi L(copy_long)
|
||||
cmp count, 32
|
||||
b.hi L(copy32_128)
|
||||
|
||||
/* Small copies: 0..32 bytes. */
|
||||
cmp count, 16
|
||||
b.lo L(copy16)
|
||||
ldp A_l, A_h, [src]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
stp A_l, A_h, [dstin]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
/* Copy 8-15 bytes. */
|
||||
L(copy16):
|
||||
tbz count, 3, L(copy8)
|
||||
ldr A_l, [src]
|
||||
ldr A_h, [srcend, -8]
|
||||
str A_l, [dstin]
|
||||
str A_h, [dstend, -8]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
/* Copy 4-7 bytes. */
|
||||
L(copy8):
|
||||
tbz count, 2, L(copy4)
|
||||
ldr A_lw, [src]
|
||||
ldr B_lw, [srcend, -4]
|
||||
str A_lw, [dstin]
|
||||
str B_lw, [dstend, -4]
|
||||
ret
|
||||
|
||||
/* Copy 0..3 bytes using a branchless sequence. */
|
||||
L(copy4):
|
||||
cbz count, L(copy0)
|
||||
lsr tmp1, count, 1
|
||||
ldrb A_lw, [src]
|
||||
ldrb C_lw, [srcend, -1]
|
||||
ldrb B_lw, [src, tmp1]
|
||||
strb A_lw, [dstin]
|
||||
strb B_lw, [dstin, tmp1]
|
||||
strb C_lw, [dstend, -1]
|
||||
L(copy0):
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Medium copies: 33..128 bytes. */
|
||||
L(copy32_128):
|
||||
ldp A_l, A_h, [src]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [srcend, -32]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
cmp count, 64
|
||||
b.hi L(copy128)
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy 65..128 bytes. */
|
||||
L(copy128):
|
||||
ldp E_l, E_h, [src, 32]
|
||||
ldp F_l, F_h, [src, 48]
|
||||
cmp count, 96
|
||||
b.ls L(copy96)
|
||||
ldp G_l, G_h, [srcend, -64]
|
||||
ldp H_l, H_h, [srcend, -48]
|
||||
stp G_l, G_h, [dstend, -64]
|
||||
stp H_l, H_h, [dstend, -48]
|
||||
L(copy96):
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp E_l, E_h, [dstin, 32]
|
||||
stp F_l, F_h, [dstin, 48]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy more than 128 bytes. */
|
||||
L(copy_long):
|
||||
/* Use backwards copy if there is an overlap. */
|
||||
sub tmp1, dstin, src
|
||||
cbz tmp1, L(copy0)
|
||||
cmp tmp1, count
|
||||
b.lo L(copy_long_backwards)
|
||||
|
||||
/* Copy 16 bytes and then align dst to 16-byte alignment. */
|
||||
|
||||
ldp D_l, D_h, [src]
|
||||
and tmp1, dstin, 15
|
||||
bic dst, dstin, 15
|
||||
sub src, src, tmp1
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
b.ls L(copy64_from_end)
|
||||
|
||||
L(loop64):
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
stp D_l, D_h, [dst, 64]!
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 64
|
||||
b.hi L(loop64)
|
||||
|
||||
/* Write the last iteration and copy 64 bytes from the end. */
|
||||
L(copy64_from_end):
|
||||
ldp E_l, E_h, [srcend, -64]
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [srcend, -48]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [srcend, -16]
|
||||
stp D_l, D_h, [dst, 64]
|
||||
stp E_l, E_h, [dstend, -64]
|
||||
stp A_l, A_h, [dstend, -48]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
stp C_l, C_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
||||
/* Large backwards copy for overlapping copies.
|
||||
Copy 16 bytes and then align dst to 16-byte alignment. */
|
||||
L(copy_long_backwards):
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
and tmp1, dstend, 15
|
||||
sub srcend, srcend, tmp1
|
||||
sub count, count, tmp1
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
sub dstend, dstend, tmp1
|
||||
subs count, count, 128
|
||||
b.ls L(copy64_from_start)
|
||||
|
||||
L(loop64_backwards):
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
stp D_l, D_h, [dstend, -64]!
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
subs count, count, 64
|
||||
b.hi L(loop64_backwards)
|
||||
|
||||
/* Write the last iteration and copy 64 bytes from the start. */
|
||||
L(copy64_from_start):
|
||||
ldp G_l, G_h, [src, 48]
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [src, 32]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [src]
|
||||
stp D_l, D_h, [dstend, -64]
|
||||
stp G_l, G_h, [dstin, 48]
|
||||
stp A_l, A_h, [dstin, 32]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstin]
|
||||
ret
|
||||
|
||||
END (memcpy)
|
172
mesosphere/kernel/source/libc/arch/arm64/memset.arch.arm64.s
Normal file
172
mesosphere/kernel/source/libc/arch/arm64/memset.arch.arm64.s
Normal file
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* memset - fill memory with a constant byte
|
||||
*
|
||||
* Copyright (c) 2012-2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asmdefs.h"
|
||||
|
||||
#define DC_ZVA_THRESHOLD 512
|
||||
|
||||
#define dstin x0
|
||||
#define val x1
|
||||
#define valw w1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define dstend x4
|
||||
#define zva_val x5
|
||||
|
||||
ENTRY (memset)
|
||||
|
||||
bfi valw, valw, 8, 8
|
||||
bfi valw, valw, 16, 16
|
||||
bfi val, val, 32, 32
|
||||
|
||||
add dstend, dstin, count
|
||||
|
||||
cmp count, 96
|
||||
b.hi L(set_long)
|
||||
cmp count, 16
|
||||
b.hs L(set_medium)
|
||||
|
||||
/* Set 0..15 bytes. */
|
||||
tbz count, 3, 1f
|
||||
str val, [dstin]
|
||||
str val, [dstend, -8]
|
||||
ret
|
||||
1: tbz count, 2, 2f
|
||||
str valw, [dstin]
|
||||
str valw, [dstend, -4]
|
||||
ret
|
||||
2: cbz count, 3f
|
||||
strb valw, [dstin]
|
||||
tbz count, 1, 3f
|
||||
strh valw, [dstend, -2]
|
||||
3: ret
|
||||
|
||||
/* Set 16..96 bytes. */
|
||||
.p2align 4
|
||||
L(set_medium):
|
||||
stp val, val, [dstin]
|
||||
tbnz count, 6, L(set96)
|
||||
stp val, val, [dstend, -16]
|
||||
tbz count, 5, 1f
|
||||
stp val, val, [dstin, 16]
|
||||
stp val, val, [dstend, -32]
|
||||
1: ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(set96):
|
||||
stp val, val, [dstin, 16]
|
||||
stp val, val, [dstin, 32]
|
||||
stp val, val, [dstin, 48]
|
||||
stp val, val, [dstend, -32]
|
||||
stp val, val, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(set_long):
|
||||
stp val, val, [dstin]
|
||||
#if DC_ZVA_THRESHOLD
|
||||
cmp count, DC_ZVA_THRESHOLD
|
||||
ccmp val, 0, 0, cs
|
||||
bic dst, dstin, 15
|
||||
b.eq L(zva_64)
|
||||
#else
|
||||
bic dst, dstin, 15
|
||||
#endif
|
||||
/* Small-size or non-zero memset does not use DC ZVA. */
|
||||
sub count, dstend, dst
|
||||
|
||||
/*
|
||||
* Adjust count and bias for loop. By substracting extra 1 from count,
|
||||
* it is easy to use tbz instruction to check whether loop tailing
|
||||
* count is less than 33 bytes, so as to bypass 2 unneccesary stps.
|
||||
*/
|
||||
sub count, count, 64+16+1
|
||||
|
||||
#if DC_ZVA_THRESHOLD
|
||||
/* Align loop on 16-byte boundary, this might be friendly to i-cache. */
|
||||
nop
|
||||
#endif
|
||||
|
||||
1: stp val, val, [dst, 16]
|
||||
stp val, val, [dst, 32]
|
||||
stp val, val, [dst, 48]
|
||||
stp val, val, [dst, 64]!
|
||||
subs count, count, 64
|
||||
b.hs 1b
|
||||
|
||||
tbz count, 5, 1f /* Remaining count is less than 33 bytes? */
|
||||
stp val, val, [dst, 16]
|
||||
stp val, val, [dst, 32]
|
||||
1: stp val, val, [dstend, -32]
|
||||
stp val, val, [dstend, -16]
|
||||
ret
|
||||
|
||||
#if DC_ZVA_THRESHOLD
|
||||
.p2align 4
|
||||
L(zva_64):
|
||||
stp val, val, [dst, 16]
|
||||
stp val, val, [dst, 32]
|
||||
stp val, val, [dst, 48]
|
||||
bic dst, dst, 63
|
||||
|
||||
/*
|
||||
* Previous memory writes might cross cache line boundary, and cause
|
||||
* cache line partially dirty. Zeroing this kind of cache line using
|
||||
* DC ZVA will incur extra cost, for it requires loading untouched
|
||||
* part of the line from memory before zeoring.
|
||||
*
|
||||
* So, write the first 64 byte aligned block using stp to force
|
||||
* fully dirty cache line.
|
||||
*/
|
||||
stp val, val, [dst, 64]
|
||||
stp val, val, [dst, 80]
|
||||
stp val, val, [dst, 96]
|
||||
stp val, val, [dst, 112]
|
||||
|
||||
sub count, dstend, dst
|
||||
/*
|
||||
* Adjust count and bias for loop. By substracting extra 1 from count,
|
||||
* it is easy to use tbz instruction to check whether loop tailing
|
||||
* count is less than 33 bytes, so as to bypass 2 unneccesary stps.
|
||||
*/
|
||||
sub count, count, 128+64+64+1
|
||||
add dst, dst, 128
|
||||
nop
|
||||
|
||||
/* DC ZVA sets 64 bytes each time. */
|
||||
1: dc zva, dst
|
||||
add dst, dst, 64
|
||||
subs count, count, 64
|
||||
b.hs 1b
|
||||
|
||||
/*
|
||||
* Write the last 64 byte aligned block using stp to force fully
|
||||
* dirty cache line.
|
||||
*/
|
||||
stp val, val, [dst, 0]
|
||||
stp val, val, [dst, 16]
|
||||
stp val, val, [dst, 32]
|
||||
stp val, val, [dst, 48]
|
||||
|
||||
tbz count, 5, 1f /* Remaining count is less than 33 bytes? */
|
||||
stp val, val, [dst, 64]
|
||||
stp val, val, [dst, 80]
|
||||
1: stp val, val, [dstend, -32]
|
||||
stp val, val, [dstend, -16]
|
||||
ret
|
||||
#endif
|
||||
|
||||
|
||||
END (memset)
|
24
mesosphere/kernel/source/libc/kern_libc_config.arch.arm64.h
Normal file
24
mesosphere/kernel/source/libc/kern_libc_config.arch.arm64.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
/* Definitions for libc genericity. */
|
||||
#define MESOSPHERE_LIBC_MEMCPY_GENERIC 0
|
||||
#define MESOSPHERE_LIBC_MEMCMP_GENERIC 0
|
||||
#define MESOSPHERE_LIBC_MEMMOVE_GENERIC 0
|
||||
#define MESOSPHERE_LIBC_MEMSET_GENERIC 0
|
||||
#define MESOSPHERE_LIBC_STRNCPY_GENERIC 1
|
||||
#define MESOSPHERE_LIBC_STRNCMP_GENERIC 1
|
26
mesosphere/kernel/source/libc/kern_libc_config.h
Normal file
26
mesosphere/kernel/source/libc/kern_libc_config.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#if defined(ATMOSPHERE_ARCH_ARM64)
|
||||
|
||||
#include "kern_libc_config.arch.arm64.h"
|
||||
|
||||
#else
|
||||
|
||||
#error "Unknown architecture for libc"
|
||||
|
||||
#endif
|
673
mesosphere/kernel/source/libc/kern_libc_generic.c
Normal file
673
mesosphere/kernel/source/libc/kern_libc_generic.c
Normal file
|
@ -0,0 +1,673 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include "kern_libc_config.h"
|
||||
|
||||
/* Note: copied from newlib */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<memmove>>---move possibly overlapping memory
|
||||
INDEX
|
||||
memmove
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
|
||||
DESCRIPTION
|
||||
This function moves <[length]> characters from the block of
|
||||
memory starting at <<*<[src]>>> to the memory starting at
|
||||
<<*<[dst]>>>. <<memmove>> reproduces the characters correctly
|
||||
at <<*<[dst]>>> even if the two areas overlap.
|
||||
RETURNS
|
||||
The function returns <[dst]> as passed.
|
||||
PORTABILITY
|
||||
<<memmove>> is ANSI C.
|
||||
<<memmove>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
memmove ansi pure
|
||||
*/
|
||||
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
|
||||
/* How many bytes are copied each iteration of the 4X unrolled loop. */
|
||||
#define BIGBLOCKSIZE (sizeof (long) << 2)
|
||||
|
||||
/* How many bytes are copied each iteration of the word copy loop. */
|
||||
#define LITTLEBLOCKSIZE (sizeof (long))
|
||||
|
||||
/* Threshhold for punting to the byte copier. */
|
||||
#undef TOO_SMALL
|
||||
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
|
||||
|
||||
#if MESOSPHERE_LIBC_MEMMOVE_GENERIC
|
||||
|
||||
/*SUPPRESS 20*/
|
||||
void *
|
||||
//__inhibit_loop_to_libcall
|
||||
__attribute__((weak))
|
||||
memmove (void *dst_void,
|
||||
const void *src_void,
|
||||
size_t length)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
char *dst = dst_void;
|
||||
const char *src = src_void;
|
||||
|
||||
if (src < dst && dst < src + length)
|
||||
{
|
||||
/* Have to copy backwards */
|
||||
src += length;
|
||||
dst += length;
|
||||
while (length--)
|
||||
{
|
||||
*--dst = *--src;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (length--)
|
||||
{
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return dst_void;
|
||||
#else
|
||||
char *dst = dst_void;
|
||||
const char *src = src_void;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
if (src < dst && dst < src + length)
|
||||
{
|
||||
/* Destructive overlap...have to copy backwards */
|
||||
src += length;
|
||||
dst += length;
|
||||
while (length--)
|
||||
{
|
||||
*--dst = *--src;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Use optimizing algorithm for a non-destructive copy to closely
|
||||
match memcpy. If the size is small or either SRC or DST is unaligned,
|
||||
then punt into the byte copy loop. This should be rare. */
|
||||
if (!TOO_SMALL(length) && !UNALIGNED (src, dst))
|
||||
{
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy 4X long words at a time if possible. */
|
||||
while (length >= BIGBLOCKSIZE)
|
||||
{
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= BIGBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (length >= LITTLEBLOCKSIZE)
|
||||
{
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= LITTLEBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (length--)
|
||||
{
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return dst_void;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_MEMMOVE_GENERIC */
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<memcpy>>---copy memory regions
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
void* memcpy(void *restrict <[out]>, const void *restrict <[in]>,
|
||||
size_t <[n]>);
|
||||
DESCRIPTION
|
||||
This function copies <[n]> bytes from the memory region
|
||||
pointed to by <[in]> to the memory region pointed to by
|
||||
<[out]>.
|
||||
If the regions overlap, the behavior is undefined.
|
||||
RETURNS
|
||||
<<memcpy>> returns a pointer to the first byte of the <[out]>
|
||||
region.
|
||||
PORTABILITY
|
||||
<<memcpy>> is ANSI C.
|
||||
<<memcpy>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
memcpy ansi pure
|
||||
*/
|
||||
|
||||
#if MESOSPHERE_LIBC_MEMCPY_GENERIC
|
||||
|
||||
void *
|
||||
__attribute__((weak))
|
||||
memcpy (void * dst0,
|
||||
const void * __restrict src0,
|
||||
size_t len0)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
char *dst = (char *) dst0;
|
||||
char *src = (char *) src0;
|
||||
|
||||
void *save = dst0;
|
||||
|
||||
while (len0--)
|
||||
{
|
||||
*dst++ = *src++;
|
||||
}
|
||||
|
||||
return save;
|
||||
#else
|
||||
char *dst = dst0;
|
||||
const char *src = src0;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
/* If the size is small, or either SRC or DST is unaligned,
|
||||
then punt into the byte copy loop. This should be rare. */
|
||||
if (!TOO_SMALL(len0) && !UNALIGNED (src, dst))
|
||||
{
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy 4X long words at a time if possible. */
|
||||
while (len0 >= BIGBLOCKSIZE)
|
||||
{
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
len0 -= BIGBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (len0 >= LITTLEBLOCKSIZE)
|
||||
{
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
len0 -= LITTLEBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (len0--)
|
||||
*dst++ = *src++;
|
||||
|
||||
return dst0;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_MEMCPY_GENERIC */
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<memset>>---set an area of memory
|
||||
INDEX
|
||||
memset
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
void *memset(void *<[dst]>, int <[c]>, size_t <[length]>);
|
||||
DESCRIPTION
|
||||
This function converts the argument <[c]> into an unsigned
|
||||
char and fills the first <[length]> characters of the array
|
||||
pointed to by <[dst]> to the value.
|
||||
RETURNS
|
||||
<<memset>> returns the value of <[dst]>.
|
||||
PORTABILITY
|
||||
<<memset>> is ANSI C.
|
||||
<<memset>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
memset ansi pure
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#undef LBLOCKSIZE
|
||||
#undef UNALIGNED
|
||||
#undef TOO_SMALL
|
||||
|
||||
#define LBLOCKSIZE (sizeof(long))
|
||||
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
|
||||
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
|
||||
|
||||
#if MESOSPHERE_LIBC_MEMSET_GENERIC
|
||||
|
||||
void *
|
||||
__attribute__((weak))
|
||||
memset (void *m,
|
||||
int c,
|
||||
size_t n)
|
||||
{
|
||||
char *s = (char *) m;
|
||||
|
||||
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
|
||||
unsigned int i;
|
||||
unsigned long buffer;
|
||||
unsigned long *aligned_addr;
|
||||
unsigned int d = c & 0xff; /* To avoid sign extension, copy C to an
|
||||
unsigned variable. */
|
||||
|
||||
while (UNALIGNED (s))
|
||||
{
|
||||
if (n--)
|
||||
*s++ = (char) c;
|
||||
else
|
||||
return m;
|
||||
}
|
||||
|
||||
if (!TOO_SMALL (n))
|
||||
{
|
||||
/* If we get this far, we know that n is large and s is word-aligned. */
|
||||
aligned_addr = (unsigned long *) s;
|
||||
|
||||
/* Store D into each char sized location in BUFFER so that
|
||||
we can set large blocks quickly. */
|
||||
buffer = (d << 8) | d;
|
||||
buffer |= (buffer << 16);
|
||||
for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
|
||||
buffer = (buffer << i) | buffer;
|
||||
|
||||
/* Unroll the loop. */
|
||||
while (n >= LBLOCKSIZE*4)
|
||||
{
|
||||
*aligned_addr++ = buffer;
|
||||
*aligned_addr++ = buffer;
|
||||
*aligned_addr++ = buffer;
|
||||
*aligned_addr++ = buffer;
|
||||
n -= 4*LBLOCKSIZE;
|
||||
}
|
||||
|
||||
while (n >= LBLOCKSIZE)
|
||||
{
|
||||
*aligned_addr++ = buffer;
|
||||
n -= LBLOCKSIZE;
|
||||
}
|
||||
/* Pick up the remainder with a bytewise loop. */
|
||||
s = (char*)aligned_addr;
|
||||
}
|
||||
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
|
||||
while (n--)
|
||||
*s++ = (char) c;
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_MEMSET_GENERIC */
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<memcmp>>---compare two memory areas
|
||||
INDEX
|
||||
memcmp
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
int memcmp(const void *<[s1]>, const void *<[s2]>, size_t <[n]>);
|
||||
DESCRIPTION
|
||||
This function compares not more than <[n]> characters of the
|
||||
object pointed to by <[s1]> with the object pointed to by <[s2]>.
|
||||
RETURNS
|
||||
The function returns an integer greater than, equal to or
|
||||
less than zero according to whether the object pointed to by
|
||||
<[s1]> is greater than, equal to or less than the object
|
||||
pointed to by <[s2]>.
|
||||
PORTABILITY
|
||||
<<memcmp>> is ANSI C.
|
||||
<<memcmp>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
memcmp ansi pure
|
||||
*/
|
||||
|
||||
#undef LBLOCKSIZE
|
||||
#undef UNALIGNED
|
||||
#undef TOO_SMALL
|
||||
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
|
||||
/* How many bytes are copied each iteration of the word copy loop. */
|
||||
#define LBLOCKSIZE (sizeof (long))
|
||||
|
||||
/* Threshhold for punting to the byte copier. */
|
||||
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
|
||||
|
||||
#if MESOSPHERE_LIBC_MEMCMP_GENERIC
|
||||
|
||||
int
|
||||
__attribute__((weak))
|
||||
memcmp (const void *m1,
|
||||
const void *m2,
|
||||
size_t n)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
unsigned char *s1 = (unsigned char *) m1;
|
||||
unsigned char *s2 = (unsigned char *) m2;
|
||||
|
||||
while (n--)
|
||||
{
|
||||
if (*s1 != *s2)
|
||||
{
|
||||
return *s1 - *s2;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
unsigned char *s1 = (unsigned char *) m1;
|
||||
unsigned char *s2 = (unsigned char *) m2;
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
/* If the size is too small, or either pointer is unaligned,
|
||||
then we punt to the byte compare loop. Hopefully this will
|
||||
not turn up in inner loops. */
|
||||
if (!TOO_SMALL(n) && !UNALIGNED(s1,s2))
|
||||
{
|
||||
/* Otherwise, load and compare the blocks of memory one
|
||||
word at a time. */
|
||||
a1 = (unsigned long*) s1;
|
||||
a2 = (unsigned long*) s2;
|
||||
while (n >= LBLOCKSIZE)
|
||||
{
|
||||
if (*a1 != *a2)
|
||||
break;
|
||||
a1++;
|
||||
a2++;
|
||||
n -= LBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* check m mod LBLOCKSIZE remaining characters */
|
||||
|
||||
s1 = (unsigned char*)a1;
|
||||
s2 = (unsigned char*)a2;
|
||||
}
|
||||
|
||||
while (n--)
|
||||
{
|
||||
if (*s1 != *s2)
|
||||
return *s1 - *s2;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_MEMCMP_GENERIC */
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<strncpy>>---counted copy string
|
||||
INDEX
|
||||
strncpy
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
char *strncpy(char *restrict <[dst]>, const char *restrict <[src]>,
|
||||
size_t <[length]>);
|
||||
DESCRIPTION
|
||||
<<strncpy>> copies not more than <[length]> characters from the
|
||||
the string pointed to by <[src]> (including the terminating
|
||||
null character) to the array pointed to by <[dst]>. If the
|
||||
string pointed to by <[src]> is shorter than <[length]>
|
||||
characters, null characters are appended to the destination
|
||||
array until a total of <[length]> characters have been
|
||||
written.
|
||||
RETURNS
|
||||
This function returns the initial value of <[dst]>.
|
||||
PORTABILITY
|
||||
<<strncpy>> is ANSI C.
|
||||
<<strncpy>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
strncpy ansi pure
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
/*SUPPRESS 560*/
|
||||
/*SUPPRESS 530*/
|
||||
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
|
||||
#if LONG_MAX == 2147483647L
|
||||
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
|
||||
#else
|
||||
#if LONG_MAX == 9223372036854775807L
|
||||
/* Nonzero if X (a long int) contains a NULL byte. */
|
||||
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
|
||||
#else
|
||||
#error long int is not a 32bit or 64bit type.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DETECTNULL
|
||||
#error long int is not a 32bit or 64bit byte
|
||||
#endif
|
||||
|
||||
#undef TOO_SMALL
|
||||
#define TOO_SMALL(LEN) ((LEN) < sizeof (long))
|
||||
|
||||
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
|
||||
|
||||
char *
|
||||
strncpy (char *__restrict dst0,
|
||||
const char *__restrict src0,
|
||||
size_t count)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
char *dscan;
|
||||
const char *sscan;
|
||||
|
||||
dscan = dst0;
|
||||
sscan = src0;
|
||||
while (count > 0)
|
||||
{
|
||||
--count;
|
||||
if ((*dscan++ = *sscan++) == '\0')
|
||||
break;
|
||||
}
|
||||
while (count-- > 0)
|
||||
*dscan++ = '\0';
|
||||
|
||||
return dst0;
|
||||
#else
|
||||
char *dst = dst0;
|
||||
const char *src = src0;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
/* If SRC and DEST is aligned and count large enough, then copy words. */
|
||||
if (!UNALIGNED (src, dst) && !TOO_SMALL (count))
|
||||
{
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* SRC and DEST are both "long int" aligned, try to do "long int"
|
||||
sized copies. */
|
||||
while (count >= sizeof (long int) && !DETECTNULL(*aligned_src))
|
||||
{
|
||||
count -= sizeof (long int);
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
}
|
||||
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
--count;
|
||||
if ((*dst++ = *src++) == '\0')
|
||||
break;
|
||||
}
|
||||
|
||||
while (count-- > 0)
|
||||
*dst++ = '\0';
|
||||
|
||||
return dst0;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_STRNCPY_GENERIC */
|
||||
|
||||
/*
|
||||
FUNCTION
|
||||
<<strncmp>>---character string compare
|
||||
|
||||
INDEX
|
||||
strncmp
|
||||
SYNOPSIS
|
||||
#include <string.h>
|
||||
int strncmp(const char *<[a]>, const char * <[b]>, size_t <[length]>);
|
||||
DESCRIPTION
|
||||
<<strncmp>> compares up to <[length]> characters
|
||||
from the string at <[a]> to the string at <[b]>.
|
||||
RETURNS
|
||||
If <<*<[a]>>> sorts lexicographically after <<*<[b]>>>,
|
||||
<<strncmp>> returns a number greater than zero. If the two
|
||||
strings are equivalent, <<strncmp>> returns zero. If <<*<[a]>>>
|
||||
sorts lexicographically before <<*<[b]>>>, <<strncmp>> returns a
|
||||
number less than zero.
|
||||
PORTABILITY
|
||||
<<strncmp>> is ANSI C.
|
||||
<<strncmp>> requires no supporting OS subroutines.
|
||||
QUICKREF
|
||||
strncmp ansi pure
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
|
||||
/* DETECTNULL returns nonzero if (long)X contains a NULL byte. */
|
||||
#if LONG_MAX == 2147483647L
|
||||
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
|
||||
#else
|
||||
#if LONG_MAX == 9223372036854775807L
|
||||
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
|
||||
#else
|
||||
#error long int is not a 32bit or 64bit type.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DETECTNULL
|
||||
#error long int is not a 32bit or 64bit byte
|
||||
#endif
|
||||
|
||||
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
|
||||
|
||||
int
|
||||
strncmp (const char *s1,
|
||||
const char *s2,
|
||||
size_t n)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
while (n-- != 0 && *s1 == *s2)
|
||||
{
|
||||
if (n == 0 || *s1 == '\0')
|
||||
break;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
|
||||
#else
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
/* If s1 or s2 are unaligned, then compare bytes. */
|
||||
if (!UNALIGNED (s1, s2))
|
||||
{
|
||||
/* If s1 and s2 are word-aligned, compare them a word at a time. */
|
||||
a1 = (unsigned long*)s1;
|
||||
a2 = (unsigned long*)s2;
|
||||
while (n >= sizeof (long) && *a1 == *a2)
|
||||
{
|
||||
n -= sizeof (long);
|
||||
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *a1 == *a2. */
|
||||
if (n == 0 || DETECTNULL (*a1))
|
||||
return 0;
|
||||
|
||||
a1++;
|
||||
a2++;
|
||||
}
|
||||
|
||||
/* A difference was detected in last few bytes of s1, so search bytewise */
|
||||
s1 = (char*)a1;
|
||||
s2 = (char*)a2;
|
||||
}
|
||||
|
||||
while (n-- > 0 && *s1 == *s2)
|
||||
{
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *s1 == *s2. */
|
||||
if (n == 0 || *s1 == '\0')
|
||||
return 0;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
||||
#endif /* MESOSPHERE_LIBC_STRNCMP_GENERIC */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue