kernel_ldr: use unoptimized memcpy before mmu bringup (closes #1102)

Before the MMU is up, all reads/writes must be aligned; the optimized
memcpy implementation does not guarantee all reads/writes it performs
are aligned.

This commit splits the libc impl to be separate for kernel/kernel_ldr,
and so now only kernel will use the optimized impl. This is safe,
as the MMU is brought up before kernel begins executing.
This commit is contained in:
Michael Scire 2020-08-02 14:30:06 -07:00
parent a82914d58e
commit 29358dc593
10 changed files with 723 additions and 0 deletions

View file

@ -0,0 +1,31 @@
/*
* Macros for asm code.
*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _ASMDEFS_H
#define _ASMDEFS_H
#define ENTRY_ALIGN(name, alignment) \
.global name; \
.type name,%function; \
.align alignment; \
name: \
.cfi_startproc;
#define ENTRY(name) ENTRY_ALIGN(name, 6)
#define ENTRY_ALIAS(name) \
.global name; \
.type name,%function; \
name:
#define END(name) \
.cfi_endproc; \
.size name, .-name;
#define L(l) .L ## l
#endif

View file

@ -0,0 +1,133 @@
/* memcmp - compare memory
*
* Copyright (c) 2013, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses.
*/
#include "asmdefs.h"
/* Parameters and result. */
#define src1 x0
#define src2 x1
#define limit x2
#define result w0
/* Internal variables. */
#define data1 x3
#define data1w w3
#define data1h x4
#define data2 x5
#define data2w w5
#define data2h x6
#define tmp1 x7
#define tmp2 x8
ENTRY (memcmp)
subs limit, limit, 8
b.lo L(less8)
ldr data1, [src1], 8
ldr data2, [src2], 8
cmp data1, data2
b.ne L(return)
subs limit, limit, 8
b.gt L(more16)
ldr data1, [src1, limit]
ldr data2, [src2, limit]
b L(return)
L(more16):
ldr data1, [src1], 8
ldr data2, [src2], 8
cmp data1, data2
bne L(return)
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
strings. */
subs limit, limit, 16
b.ls L(last_bytes)
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
try to align, so limit it only to strings larger than 128 bytes. */
cmp limit, 96
b.ls L(loop16)
/* Align src1 and adjust src2 with bytes not yet done. */
and tmp1, src1, 15
add limit, limit, tmp1
sub src1, src1, tmp1
sub src2, src2, tmp1
/* Loop performing 16 bytes per iteration using aligned src1.
Limit is pre-decremented by 16 and must be larger than zero.
Exit if <= 16 bytes left to do or if the data is not equal. */
.p2align 4
L(loop16):
ldp data1, data1h, [src1], 16
ldp data2, data2h, [src2], 16
subs limit, limit, 16
ccmp data1, data2, 0, hi
ccmp data1h, data2h, 0, eq
b.eq L(loop16)
cmp data1, data2
bne L(return)
mov data1, data1h
mov data2, data2h
cmp data1, data2
bne L(return)
/* Compare last 1-16 bytes using unaligned access. */
L(last_bytes):
add src1, src1, limit
add src2, src2, limit
ldp data1, data1h, [src1]
ldp data2, data2h, [src2]
cmp data1, data2
bne L(return)
mov data1, data1h
mov data2, data2h
cmp data1, data2
/* Compare data bytes and set return value to 0, -1 or 1. */
L(return):
#ifndef __AARCH64EB__
rev data1, data1
rev data2, data2
#endif
cmp data1, data2
L(ret_eq):
cset result, ne
cneg result, result, lo
ret
.p2align 4
/* Compare up to 8 bytes. Limit is [-8..-1]. */
L(less8):
adds limit, limit, 4
b.lo L(less4)
ldr data1w, [src1], 4
ldr data2w, [src2], 4
cmp data1w, data2w
b.ne L(return)
sub limit, limit, 4
L(less4):
adds limit, limit, 4
beq L(ret_eq)
L(byte_loop):
ldrb data1w, [src1], 1
ldrb data2w, [src2], 1
subs limit, limit, 1
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
b.eq L(byte_loop)
sub result, data1w, data2w
ret
END (memcmp)

View file

@ -0,0 +1,239 @@
/*
* memcpy - copy memory area
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses.
*
*/
#include "asmdefs.h"
#define dstin x0
#define src x1
#define count x2
#define dst x3
#define srcend x4
#define dstend x5
#define A_l x6
#define A_lw w6
#define A_h x7
#define B_l x8
#define B_lw w8
#define B_h x9
#define C_l x10
#define C_lw w10
#define C_h x11
#define D_l x12
#define D_h x13
#define E_l x14
#define E_h x15
#define F_l x16
#define F_h x17
#define G_l count
#define G_h dst
#define H_l src
#define H_h srcend
#define tmp1 x14
/* This implementation handles overlaps and supports both memcpy and memmove
from a single entry point. It uses unaligned accesses and branchless
sequences to keep the code small, simple and improve performance.
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
copies of up to 128 bytes, and large copies. The overhead of the overlap
check is negligible since it is only required for large copies.
Large copies use a software pipelined loop processing 64 bytes per iteration.
The destination pointer is 16-byte aligned to minimize unaligned accesses.
The loop tail is handled by always copying 64 bytes from the end.
*/
ENTRY (memcpy)
ENTRY_ALIAS (memmove)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
b.hi L(copy_long)
cmp count, 32
b.hi L(copy32_128)
/* Small copies: 0..32 bytes. */
cmp count, 16
b.lo L(copy16)
ldp A_l, A_h, [src]
ldp D_l, D_h, [srcend, -16]
stp A_l, A_h, [dstin]
stp D_l, D_h, [dstend, -16]
ret
/* Copy 8-15 bytes. */
L(copy16):
tbz count, 3, L(copy8)
ldr A_l, [src]
ldr A_h, [srcend, -8]
str A_l, [dstin]
str A_h, [dstend, -8]
ret
.p2align 3
/* Copy 4-7 bytes. */
L(copy8):
tbz count, 2, L(copy4)
ldr A_lw, [src]
ldr B_lw, [srcend, -4]
str A_lw, [dstin]
str B_lw, [dstend, -4]
ret
/* Copy 0..3 bytes using a branchless sequence. */
L(copy4):
cbz count, L(copy0)
lsr tmp1, count, 1
ldrb A_lw, [src]
ldrb C_lw, [srcend, -1]
ldrb B_lw, [src, tmp1]
strb A_lw, [dstin]
strb B_lw, [dstin, tmp1]
strb C_lw, [dstend, -1]
L(copy0):
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_l, A_h, [src]
ldp B_l, B_h, [src, 16]
ldp C_l, C_h, [srcend, -32]
ldp D_l, D_h, [srcend, -16]
cmp count, 64
b.hi L(copy128)
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy 65..128 bytes. */
L(copy128):
ldp E_l, E_h, [src, 32]
ldp F_l, F_h, [src, 48]
cmp count, 96
b.ls L(copy96)
ldp G_l, G_h, [srcend, -64]
ldp H_l, H_h, [srcend, -48]
stp G_l, G_h, [dstend, -64]
stp H_l, H_h, [dstend, -48]
L(copy96):
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp E_l, E_h, [dstin, 32]
stp F_l, F_h, [dstin, 48]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy more than 128 bytes. */
L(copy_long):
/* Use backwards copy if there is an overlap. */
sub tmp1, dstin, src
cbz tmp1, L(copy0)
cmp tmp1, count
b.lo L(copy_long_backwards)
/* Copy 16 bytes and then align dst to 16-byte alignment. */
ldp D_l, D_h, [src]
and tmp1, dstin, 15
bic dst, dstin, 15
sub src, src, tmp1
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
ldp B_l, B_h, [src, 32]
ldp C_l, C_h, [src, 48]
ldp D_l, D_h, [src, 64]!
subs count, count, 128 + 16 /* Test and readjust count. */
b.ls L(copy64_from_end)
L(loop64):
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [src, 32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [src, 48]
stp D_l, D_h, [dst, 64]!
ldp D_l, D_h, [src, 64]!
subs count, count, 64
b.hi L(loop64)
/* Write the last iteration and copy 64 bytes from the end. */
L(copy64_from_end):
ldp E_l, E_h, [srcend, -64]
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [srcend, -48]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [srcend, -32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [srcend, -16]
stp D_l, D_h, [dst, 64]
stp E_l, E_h, [dstend, -64]
stp A_l, A_h, [dstend, -48]
stp B_l, B_h, [dstend, -32]
stp C_l, C_h, [dstend, -16]
ret
.p2align 4
/* Large backwards copy for overlapping copies.
Copy 16 bytes and then align dst to 16-byte alignment. */
L(copy_long_backwards):
ldp D_l, D_h, [srcend, -16]
and tmp1, dstend, 15
sub srcend, srcend, tmp1
sub count, count, tmp1
ldp A_l, A_h, [srcend, -16]
stp D_l, D_h, [dstend, -16]
ldp B_l, B_h, [srcend, -32]
ldp C_l, C_h, [srcend, -48]
ldp D_l, D_h, [srcend, -64]!
sub dstend, dstend, tmp1
subs count, count, 128
b.ls L(copy64_from_start)
L(loop64_backwards):
stp A_l, A_h, [dstend, -16]
ldp A_l, A_h, [srcend, -16]
stp B_l, B_h, [dstend, -32]
ldp B_l, B_h, [srcend, -32]
stp C_l, C_h, [dstend, -48]
ldp C_l, C_h, [srcend, -48]
stp D_l, D_h, [dstend, -64]!
ldp D_l, D_h, [srcend, -64]!
subs count, count, 64
b.hi L(loop64_backwards)
/* Write the last iteration and copy 64 bytes from the start. */
L(copy64_from_start):
ldp G_l, G_h, [src, 48]
stp A_l, A_h, [dstend, -16]
ldp A_l, A_h, [src, 32]
stp B_l, B_h, [dstend, -32]
ldp B_l, B_h, [src, 16]
stp C_l, C_h, [dstend, -48]
ldp C_l, C_h, [src]
stp D_l, D_h, [dstend, -64]
stp G_l, G_h, [dstin, 48]
stp A_l, A_h, [dstin, 32]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
END (memcpy)

View file

@ -0,0 +1,172 @@
/*
* memset - fill memory with a constant byte
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
*
*/
#include "asmdefs.h"
#define DC_ZVA_THRESHOLD 512
#define dstin x0
#define val x1
#define valw w1
#define count x2
#define dst x3
#define dstend x4
#define zva_val x5
ENTRY (memset)
bfi valw, valw, 8, 8
bfi valw, valw, 16, 16
bfi val, val, 32, 32
add dstend, dstin, count
cmp count, 96
b.hi L(set_long)
cmp count, 16
b.hs L(set_medium)
/* Set 0..15 bytes. */
tbz count, 3, 1f
str val, [dstin]
str val, [dstend, -8]
ret
1: tbz count, 2, 2f
str valw, [dstin]
str valw, [dstend, -4]
ret
2: cbz count, 3f
strb valw, [dstin]
tbz count, 1, 3f
strh valw, [dstend, -2]
3: ret
/* Set 16..96 bytes. */
.p2align 4
L(set_medium):
stp val, val, [dstin]
tbnz count, 6, L(set96)
stp val, val, [dstend, -16]
tbz count, 5, 1f
stp val, val, [dstin, 16]
stp val, val, [dstend, -32]
1: ret
.p2align 4
/* Set 64..96 bytes. Write 64 bytes from the start and
32 bytes from the end. */
L(set96):
stp val, val, [dstin, 16]
stp val, val, [dstin, 32]
stp val, val, [dstin, 48]
stp val, val, [dstend, -32]
stp val, val, [dstend, -16]
ret
.p2align 4
L(set_long):
stp val, val, [dstin]
#if DC_ZVA_THRESHOLD
cmp count, DC_ZVA_THRESHOLD
ccmp val, 0, 0, cs
bic dst, dstin, 15
b.eq L(zva_64)
#else
bic dst, dstin, 15
#endif
/* Small-size or non-zero memset does not use DC ZVA. */
sub count, dstend, dst
/*
* Adjust count and bias for loop. By substracting extra 1 from count,
* it is easy to use tbz instruction to check whether loop tailing
* count is less than 33 bytes, so as to bypass 2 unneccesary stps.
*/
sub count, count, 64+16+1
#if DC_ZVA_THRESHOLD
/* Align loop on 16-byte boundary, this might be friendly to i-cache. */
nop
#endif
1: stp val, val, [dst, 16]
stp val, val, [dst, 32]
stp val, val, [dst, 48]
stp val, val, [dst, 64]!
subs count, count, 64
b.hs 1b
tbz count, 5, 1f /* Remaining count is less than 33 bytes? */
stp val, val, [dst, 16]
stp val, val, [dst, 32]
1: stp val, val, [dstend, -32]
stp val, val, [dstend, -16]
ret
#if DC_ZVA_THRESHOLD
.p2align 4
L(zva_64):
stp val, val, [dst, 16]
stp val, val, [dst, 32]
stp val, val, [dst, 48]
bic dst, dst, 63
/*
* Previous memory writes might cross cache line boundary, and cause
* cache line partially dirty. Zeroing this kind of cache line using
* DC ZVA will incur extra cost, for it requires loading untouched
* part of the line from memory before zeoring.
*
* So, write the first 64 byte aligned block using stp to force
* fully dirty cache line.
*/
stp val, val, [dst, 64]
stp val, val, [dst, 80]
stp val, val, [dst, 96]
stp val, val, [dst, 112]
sub count, dstend, dst
/*
* Adjust count and bias for loop. By substracting extra 1 from count,
* it is easy to use tbz instruction to check whether loop tailing
* count is less than 33 bytes, so as to bypass 2 unneccesary stps.
*/
sub count, count, 128+64+64+1
add dst, dst, 128
nop
/* DC ZVA sets 64 bytes each time. */
1: dc zva, dst
add dst, dst, 64
subs count, count, 64
b.hs 1b
/*
* Write the last 64 byte aligned block using stp to force fully
* dirty cache line.
*/
stp val, val, [dst, 0]
stp val, val, [dst, 16]
stp val, val, [dst, 32]
stp val, val, [dst, 48]
tbz count, 5, 1f /* Remaining count is less than 33 bytes? */
stp val, val, [dst, 64]
stp val, val, [dst, 80]
1: stp val, val, [dstend, -32]
stp val, val, [dstend, -16]
ret
#endif
END (memset)

View file

@ -0,0 +1,24 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
/* Definitions for libc genericity. */
#define MESOSPHERE_LIBC_MEMCPY_GENERIC 0
#define MESOSPHERE_LIBC_MEMCMP_GENERIC 0
#define MESOSPHERE_LIBC_MEMMOVE_GENERIC 0
#define MESOSPHERE_LIBC_MEMSET_GENERIC 0
#define MESOSPHERE_LIBC_STRNCPY_GENERIC 1
#define MESOSPHERE_LIBC_STRNCMP_GENERIC 1

View file

@ -0,0 +1,26 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#if defined(ATMOSPHERE_ARCH_ARM64)
#include "kern_libc_config.arch.arm64.h"
#else
#error "Unknown architecture for libc"
#endif

View file

@ -0,0 +1,673 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include <stddef.h>
#include <limits.h>
#include "kern_libc_config.h"
/* Note: copied from newlib */
#ifdef __cplusplus
extern "C" {
#endif
/*
FUNCTION
<<memmove>>---move possibly overlapping memory
INDEX
memmove
SYNOPSIS
#include <string.h>
void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
DESCRIPTION
This function moves <[length]> characters from the block of
memory starting at <<*<[src]>>> to the memory starting at
<<*<[dst]>>>. <<memmove>> reproduces the characters correctly
at <<*<[dst]>>> even if the two areas overlap.
RETURNS
The function returns <[dst]> as passed.
PORTABILITY
<<memmove>> is ANSI C.
<<memmove>> requires no supporting OS subroutines.
QUICKREF
memmove ansi pure
*/
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the 4X unrolled loop. */
#define BIGBLOCKSIZE (sizeof (long) << 2)
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLEBLOCKSIZE (sizeof (long))
/* Threshhold for punting to the byte copier. */
#undef TOO_SMALL
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMMOVE_GENERIC
/*SUPPRESS 20*/
void *
//__inhibit_loop_to_libcall
__attribute__((weak))
memmove (void *dst_void,
const void *src_void,
size_t length)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = dst_void;
const char *src = src_void;
if (src < dst && dst < src + length)
{
/* Have to copy backwards */
src += length;
dst += length;
while (length--)
{
*--dst = *--src;
}
}
else
{
while (length--)
{
*dst++ = *src++;
}
}
return dst_void;
#else
char *dst = dst_void;
const char *src = src_void;
long *aligned_dst;
const long *aligned_src;
if (src < dst && dst < src + length)
{
/* Destructive overlap...have to copy backwards */
src += length;
dst += length;
while (length--)
{
*--dst = *--src;
}
}
else
{
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(length) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (length >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
length -= BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (length >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
length -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--)
{
*dst++ = *src++;
}
}
return dst_void;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMMOVE_GENERIC */
/*
FUNCTION
<<memcpy>>---copy memory regions
SYNOPSIS
#include <string.h>
void* memcpy(void *restrict <[out]>, const void *restrict <[in]>,
size_t <[n]>);
DESCRIPTION
This function copies <[n]> bytes from the memory region
pointed to by <[in]> to the memory region pointed to by
<[out]>.
If the regions overlap, the behavior is undefined.
RETURNS
<<memcpy>> returns a pointer to the first byte of the <[out]>
region.
PORTABILITY
<<memcpy>> is ANSI C.
<<memcpy>> requires no supporting OS subroutines.
QUICKREF
memcpy ansi pure
*/
#if MESOSPHERE_LIBC_MEMCPY_GENERIC
void *
__attribute__((weak))
memcpy (void * dst0,
const void * __restrict src0,
size_t len0)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = (char *) dst0;
char *src = (char *) src0;
void *save = dst0;
while (len0--)
{
*dst++ = *src++;
}
return save;
#else
char *dst = dst0;
const char *src = src0;
long *aligned_dst;
const long *aligned_src;
/* If the size is small, or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(len0) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (len0 >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
len0 -= BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (len0 >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
len0 -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (len0--)
*dst++ = *src++;
return dst0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMCPY_GENERIC */
/*
FUNCTION
<<memset>>---set an area of memory
INDEX
memset
SYNOPSIS
#include <string.h>
void *memset(void *<[dst]>, int <[c]>, size_t <[length]>);
DESCRIPTION
This function converts the argument <[c]> into an unsigned
char and fills the first <[length]> characters of the array
pointed to by <[dst]> to the value.
RETURNS
<<memset>> returns the value of <[dst]>.
PORTABILITY
<<memset>> is ANSI C.
<<memset>> requires no supporting OS subroutines.
QUICKREF
memset ansi pure
*/
#include <string.h>
#undef LBLOCKSIZE
#undef UNALIGNED
#undef TOO_SMALL
#define LBLOCKSIZE (sizeof(long))
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMSET_GENERIC
void *
__attribute__((weak))
memset (void *m,
int c,
size_t n)
{
char *s = (char *) m;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
unsigned int i;
unsigned long buffer;
unsigned long *aligned_addr;
unsigned int d = c & 0xff; /* To avoid sign extension, copy C to an
unsigned variable. */
while (UNALIGNED (s))
{
if (n--)
*s++ = (char) c;
else
return m;
}
if (!TOO_SMALL (n))
{
/* If we get this far, we know that n is large and s is word-aligned. */
aligned_addr = (unsigned long *) s;
/* Store D into each char sized location in BUFFER so that
we can set large blocks quickly. */
buffer = (d << 8) | d;
buffer |= (buffer << 16);
for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
buffer = (buffer << i) | buffer;
/* Unroll the loop. */
while (n >= LBLOCKSIZE*4)
{
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
n -= 4*LBLOCKSIZE;
}
while (n >= LBLOCKSIZE)
{
*aligned_addr++ = buffer;
n -= LBLOCKSIZE;
}
/* Pick up the remainder with a bytewise loop. */
s = (char*)aligned_addr;
}
#endif /* not PREFER_SIZE_OVER_SPEED */
while (n--)
*s++ = (char) c;
return m;
}
#endif /* MESOSPHERE_LIBC_MEMSET_GENERIC */
/*
FUNCTION
<<memcmp>>---compare two memory areas
INDEX
memcmp
SYNOPSIS
#include <string.h>
int memcmp(const void *<[s1]>, const void *<[s2]>, size_t <[n]>);
DESCRIPTION
This function compares not more than <[n]> characters of the
object pointed to by <[s1]> with the object pointed to by <[s2]>.
RETURNS
The function returns an integer greater than, equal to or
less than zero according to whether the object pointed to by
<[s1]> is greater than, equal to or less than the object
pointed to by <[s2]>.
PORTABILITY
<<memcmp>> is ANSI C.
<<memcmp>> requires no supporting OS subroutines.
QUICKREF
memcmp ansi pure
*/
#undef LBLOCKSIZE
#undef UNALIGNED
#undef TOO_SMALL
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the word copy loop. */
#define LBLOCKSIZE (sizeof (long))
/* Threshhold for punting to the byte copier. */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMCMP_GENERIC
int
__attribute__((weak))
memcmp (const void *m1,
const void *m2,
size_t n)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
unsigned char *s1 = (unsigned char *) m1;
unsigned char *s2 = (unsigned char *) m2;
while (n--)
{
if (*s1 != *s2)
{
return *s1 - *s2;
}
s1++;
s2++;
}
return 0;
#else
unsigned char *s1 = (unsigned char *) m1;
unsigned char *s2 = (unsigned char *) m2;
unsigned long *a1;
unsigned long *a2;
/* If the size is too small, or either pointer is unaligned,
then we punt to the byte compare loop. Hopefully this will
not turn up in inner loops. */
if (!TOO_SMALL(n) && !UNALIGNED(s1,s2))
{
/* Otherwise, load and compare the blocks of memory one
word at a time. */
a1 = (unsigned long*) s1;
a2 = (unsigned long*) s2;
while (n >= LBLOCKSIZE)
{
if (*a1 != *a2)
break;
a1++;
a2++;
n -= LBLOCKSIZE;
}
/* check m mod LBLOCKSIZE remaining characters */
s1 = (unsigned char*)a1;
s2 = (unsigned char*)a2;
}
while (n--)
{
if (*s1 != *s2)
return *s1 - *s2;
s1++;
s2++;
}
return 0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMCMP_GENERIC */
/*
FUNCTION
<<strncpy>>---counted copy string
INDEX
strncpy
SYNOPSIS
#include <string.h>
char *strncpy(char *restrict <[dst]>, const char *restrict <[src]>,
size_t <[length]>);
DESCRIPTION
<<strncpy>> copies not more than <[length]> characters from the
the string pointed to by <[src]> (including the terminating
null character) to the array pointed to by <[dst]>. If the
string pointed to by <[src]> is shorter than <[length]>
characters, null characters are appended to the destination
array until a total of <[length]> characters have been
written.
RETURNS
This function returns the initial value of <[dst]>.
PORTABILITY
<<strncpy>> is ANSI C.
<<strncpy>> requires no supporting OS subroutines.
QUICKREF
strncpy ansi pure
*/
#include <string.h>
#include <limits.h>
/*SUPPRESS 560*/
/*SUPPRESS 530*/
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
#if LONG_MAX == 2147483647L
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
#ifndef DETECTNULL
#error long int is not a 32bit or 64bit byte
#endif
#undef TOO_SMALL
#define TOO_SMALL(LEN) ((LEN) < sizeof (long))
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
char *
strncpy (char *__restrict dst0,
const char *__restrict src0,
size_t count)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dscan;
const char *sscan;
dscan = dst0;
sscan = src0;
while (count > 0)
{
--count;
if ((*dscan++ = *sscan++) == '\0')
break;
}
while (count-- > 0)
*dscan++ = '\0';
return dst0;
#else
char *dst = dst0;
const char *src = src0;
long *aligned_dst;
const long *aligned_src;
/* If SRC and DEST is aligned and count large enough, then copy words. */
if (!UNALIGNED (src, dst) && !TOO_SMALL (count))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* SRC and DEST are both "long int" aligned, try to do "long int"
sized copies. */
while (count >= sizeof (long int) && !DETECTNULL(*aligned_src))
{
count -= sizeof (long int);
*aligned_dst++ = *aligned_src++;
}
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (count > 0)
{
--count;
if ((*dst++ = *src++) == '\0')
break;
}
while (count-- > 0)
*dst++ = '\0';
return dst0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_STRNCPY_GENERIC */
/*
FUNCTION
<<strncmp>>---character string compare
INDEX
strncmp
SYNOPSIS
#include <string.h>
int strncmp(const char *<[a]>, const char * <[b]>, size_t <[length]>);
DESCRIPTION
<<strncmp>> compares up to <[length]> characters
from the string at <[a]> to the string at <[b]>.
RETURNS
If <<*<[a]>>> sorts lexicographically after <<*<[b]>>>,
<<strncmp>> returns a number greater than zero. If the two
strings are equivalent, <<strncmp>> returns zero. If <<*<[a]>>>
sorts lexicographically before <<*<[b]>>>, <<strncmp>> returns a
number less than zero.
PORTABILITY
<<strncmp>> is ANSI C.
<<strncmp>> requires no supporting OS subroutines.
QUICKREF
strncmp ansi pure
*/
#include <string.h>
#include <limits.h>
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* DETECTNULL returns nonzero if (long)X contains a NULL byte. */
#if LONG_MAX == 2147483647L
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
#ifndef DETECTNULL
#error long int is not a 32bit or 64bit byte
#endif
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
int
strncmp (const char *s1,
const char *s2,
size_t n)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
if (n == 0)
return 0;
while (n-- != 0 && *s1 == *s2)
{
if (n == 0 || *s1 == '\0')
break;
s1++;
s2++;
}
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
#else
unsigned long *a1;
unsigned long *a2;
if (n == 0)
return 0;
/* If s1 or s2 are unaligned, then compare bytes. */
if (!UNALIGNED (s1, s2))
{
/* If s1 and s2 are word-aligned, compare them a word at a time. */
a1 = (unsigned long*)s1;
a2 = (unsigned long*)s2;
while (n >= sizeof (long) && *a1 == *a2)
{
n -= sizeof (long);
/* If we've run out of bytes or hit a null, return zero
since we already know *a1 == *a2. */
if (n == 0 || DETECTNULL (*a1))
return 0;
a1++;
a2++;
}
/* A difference was detected in last few bytes of s1, so search bytewise */
s1 = (char*)a1;
s2 = (char*)a2;
}
while (n-- > 0 && *s1 == *s2)
{
/* If we've run out of bytes or hit a null, return zero
since we already know *s1 == *s2. */
if (n == 0 || *s1 == '\0')
return 0;
s1++;
s2++;
}
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_STRNCMP_GENERIC */
#ifdef __cplusplus
} /* extern "C" */
#endif