forked from Mirror/GodMode9
revert seqmemcpy and memcpy reimplementation
This commit is contained in:
parent
c644820e41
commit
1e2b45941b
@ -10,7 +10,7 @@ INCDIRS := source source/common source/filesys source/crypto source/fatfs source
|
|||||||
INCLUDE := $(foreach dir,$(INCDIRS),-I"$(shell pwd)/$(dir)")
|
INCLUDE := $(foreach dir,$(INCDIRS),-I"$(shell pwd)/$(dir)")
|
||||||
|
|
||||||
ASFLAGS += $(SUBARCH) $(INCLUDE)
|
ASFLAGS += $(SUBARCH) $(INCLUDE)
|
||||||
CFLAGS += $(SUBARCH) $(INCLUDE)
|
CFLAGS += $(SUBARCH) $(INCLUDE) -fno-builtin-memcpy
|
||||||
LDFLAGS += $(SUBARCH) -Wl,-Map,$(TARGET).map
|
LDFLAGS += $(SUBARCH) -Wl,-Map,$(TARGET).map
|
||||||
|
|
||||||
include ../Makefile.common
|
include ../Makefile.common
|
||||||
|
@ -41,9 +41,6 @@
|
|||||||
#define countof(x) \
|
#define countof(x) \
|
||||||
(sizeof(x) / sizeof((x)[0]))
|
(sizeof(x) / sizeof((x)[0]))
|
||||||
|
|
||||||
#define seqmemcpy(d,o,s) \
|
|
||||||
for(u32 i = 0; i < (s); i++) ((u8*)(void*)(d))[i] = ((u8*)(void*)(o))[i]
|
|
||||||
|
|
||||||
#define bkpt \
|
#define bkpt \
|
||||||
asm("bkpt\n\t")
|
asm("bkpt\n\t")
|
||||||
|
|
||||||
|
72
arm9/source/common/memcpy.s
Normal file
72
arm9/source/common/memcpy.s
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
@ memcpy_arm946e-s - hand written reimplementation of memcpy to be sequential
|
||||||
|
@ Written in 2019 by luigoalma <luigoalma at gmail dot com>
|
||||||
|
@ To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
|
||||||
|
@ For a copy of CC0 Public Domain Dedication, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||||
|
.cpu arm946e-s
|
||||||
|
.arch armv5te
|
||||||
|
.arm
|
||||||
|
.section .text.memcpy, "ax", %progbits
|
||||||
|
.align 2
|
||||||
|
.global memcpy
|
||||||
|
.syntax unified
|
||||||
|
.type memcpy, %function
|
||||||
|
memcpy:
|
||||||
|
@ r0 = dest
|
||||||
|
@ r1 = src
|
||||||
|
@ r2 = length
|
||||||
|
@ check if length 0 and return if so
|
||||||
|
cmp r2, #0
|
||||||
|
bxeq lr
|
||||||
|
push {r0,r4-r9,lr}
|
||||||
|
@ pre-fetch data
|
||||||
|
pld [r1]
|
||||||
|
@ alignment check with word size
|
||||||
|
@ if not aligned but both are in the same misalignment, fix it up
|
||||||
|
@ otherwise jump to basic loop
|
||||||
|
orr r12, r0, r1
|
||||||
|
ands r12, r12, #3
|
||||||
|
beq .L1
|
||||||
|
and r4, r0, #3
|
||||||
|
and r5, r1, #3
|
||||||
|
cmp r4, r5
|
||||||
|
bne .L6
|
||||||
|
rsb r4, r4, #4
|
||||||
|
.L0:
|
||||||
|
ldrb r3, [r1], #1
|
||||||
|
strb r3, [r0], #1
|
||||||
|
subs r2, r2, #1
|
||||||
|
popeq {r0,r4-r9,pc}
|
||||||
|
subs r4, r4, #1
|
||||||
|
bne .L0
|
||||||
|
.L1:
|
||||||
|
@ check if length higher than 32
|
||||||
|
@ if so, do the 32 byte block copy loop,
|
||||||
|
@ until there's nothing left or remainder to copy is less than 32
|
||||||
|
movs r3, r2, LSR#5
|
||||||
|
beq .L3
|
||||||
|
.L2:
|
||||||
|
ldm r1!, {r4-r9,r12,lr}
|
||||||
|
stm r0!, {r4-r9,r12,lr}
|
||||||
|
subs r3, r3, #1
|
||||||
|
bne .L2
|
||||||
|
ands r2, r2, #0x1F
|
||||||
|
popeq {r0,r4-r9,pc}
|
||||||
|
.L3:
|
||||||
|
@ copy in word size the remaining data,
|
||||||
|
@ and finish off with basic loop if can't copy all by word size.
|
||||||
|
movs r3, r2, LSR#2
|
||||||
|
beq .L6
|
||||||
|
.L4:
|
||||||
|
ldr r12, [r1], #4
|
||||||
|
str r12, [r0], #4
|
||||||
|
subs r3, r3, #1
|
||||||
|
bne .L4
|
||||||
|
ands r2, r2, #0x3
|
||||||
|
.L5: @ the basic loop
|
||||||
|
popeq {r0,r4-r9,pc}
|
||||||
|
.L6:
|
||||||
|
ldrb r3, [r1], #1
|
||||||
|
strb r3, [r0], #1
|
||||||
|
subs r2, r2, #1
|
||||||
|
b .L5
|
||||||
|
.size memcpy, .-memcpy
|
@ -16,6 +16,12 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To ensure correct functionality, the builtin memcpy should perform a sequential copy.
|
||||||
|
* If not, it should be replaced with a different reimplementation that does for sure act sequential.
|
||||||
|
* Or alternatively, it's own memcpy with it's own name, say perhaps seqmemcpy, and memcpy calls replaced.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "rsa.h"
|
#include "rsa.h"
|
||||||
#include "sha.h"
|
#include "sha.h"
|
||||||
@ -76,7 +82,7 @@ bool RSA_setKey2048(u8 keyslot, const u8 *const mod, u32 exp)
|
|||||||
REG_RSA_EXP[(0x100>>2) - 1] = exp;
|
REG_RSA_EXP[(0x100>>2) - 1] = exp;
|
||||||
|
|
||||||
if(slot->REG_RSA_SLOTSIZE != RSA_SLOTSIZE_2048) return false;
|
if(slot->REG_RSA_SLOTSIZE != RSA_SLOTSIZE_2048) return false;
|
||||||
seqmemcpy((void*)REG_RSA_MOD, mod, 0x100);
|
memcpy((void*)REG_RSA_MOD, mod, 0x100);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -88,11 +94,11 @@ bool RSA_decrypt2048(void *const decSig, const void *const encSig)
|
|||||||
if(!(rsaSlots[keyslot].REG_RSA_SLOTCNT & RSA_KEY_STAT_SET)) return false;
|
if(!(rsaSlots[keyslot].REG_RSA_SLOTCNT & RSA_KEY_STAT_SET)) return false;
|
||||||
|
|
||||||
REG_RSA_CNT |= RSA_INPUT_NORMAL | RSA_INPUT_BIG;
|
REG_RSA_CNT |= RSA_INPUT_NORMAL | RSA_INPUT_BIG;
|
||||||
seqmemcpy((void*)REG_RSA_TXT, encSig, 0x100);
|
memcpy((void*)REG_RSA_TXT, encSig, 0x100);
|
||||||
|
|
||||||
REG_RSA_CNT |= RSA_ENABLE;
|
REG_RSA_CNT |= RSA_ENABLE;
|
||||||
rsaWaitBusy();
|
rsaWaitBusy();
|
||||||
seqmemcpy(decSig, (void*)REG_RSA_TXT, 0x100);
|
memcpy(decSig, (void*)REG_RSA_TXT, 0x100);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,15 @@
|
|||||||
|
/*
|
||||||
|
* To ensure correct functionality, the builtin memcpy should perform a sequential copy.
|
||||||
|
* If not, it should be replaced with a different reimplementation that does for sure act sequential.
|
||||||
|
* Or alternatively, it's own memcpy with it's own name, say perhaps seqmemcpy, and memcpy calls replaced, as well the line "*((_sha_block*)REG_SHAINFIFO) = *((const _sha_block*)src32);" inside sha_update, to call explicitly seqmemcpy like, "seqmemcpy(REG_SHAINFIFO, src32, 0x40);".
|
||||||
|
*/
|
||||||
#include "sha.h"
|
#include "sha.h"
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
u32 data[16];
|
||||||
|
} _sha_block;
|
||||||
|
|
||||||
void sha_init(u32 mode)
|
void sha_init(u32 mode)
|
||||||
{
|
{
|
||||||
while(*REG_SHACNT & 1);
|
while(*REG_SHACNT & 1);
|
||||||
@ -12,12 +22,12 @@ void sha_update(const void* src, u32 size)
|
|||||||
|
|
||||||
while(size >= 0x40) {
|
while(size >= 0x40) {
|
||||||
while(*REG_SHACNT & 1);
|
while(*REG_SHACNT & 1);
|
||||||
seqmemcpy((void*)REG_SHAINFIFO, src32, 0x40);
|
*((_sha_block*)REG_SHAINFIFO) = *((const _sha_block*)src32);
|
||||||
src32 += 16;
|
src32 += 16;
|
||||||
size -= 0x40;
|
size -= 0x40;
|
||||||
}
|
}
|
||||||
while(*REG_SHACNT & 1);
|
while(*REG_SHACNT & 1);
|
||||||
if(size) seqmemcpy((void*)REG_SHAINFIFO, src32, size);
|
if(size) memcpy((void*)REG_SHAINFIFO, src32, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sha_get(void* res) {
|
void sha_get(void* res) {
|
||||||
@ -26,7 +36,7 @@ void sha_get(void* res) {
|
|||||||
*REG_SHACNT = (*REG_SHACNT & ~SHA_NORMAL_ROUND) | SHA_FINAL_ROUND;
|
*REG_SHACNT = (*REG_SHACNT & ~SHA_NORMAL_ROUND) | SHA_FINAL_ROUND;
|
||||||
while(*REG_SHACNT & SHA_FINAL_ROUND);
|
while(*REG_SHACNT & SHA_FINAL_ROUND);
|
||||||
while(*REG_SHACNT & 1);
|
while(*REG_SHACNT & 1);
|
||||||
if (hash_size) seqmemcpy(res, (void*)REG_SHAHASH, hash_size);
|
if (hash_size) memcpy(res, (void*)REG_SHAHASH, hash_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sha_quick(void* res, const void* src, u32 size, u32 mode) {
|
void sha_quick(void* res, const void* src, u32 size, u32 mode) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user