diff --git a/arm9/source/common/asmfunc.h b/arm9/source/common/asmfunc.h
new file mode 100644
index 0000000..0c528f0
--- /dev/null
+++ b/arm9/source/common/asmfunc.h
@@ -0,0 +1,32 @@
+#pragma once
+
+/*
+ * This file is part of fastboot 3DS
+ * Copyright (C) 2017 derrek, profi200
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#if !__ASSEMBLER__
+ #error Only include this in assembly files!
+#endif
+
+
+.macro ASM_FUNC name
+ .section .text.\name, "ax", %progbits
+ .global \name
+ .type \name %function
+ .align 2
+\name:
+.endm
diff --git a/arm9/source/common/mmio.h b/arm9/source/common/mmio.h
new file mode 100644
index 0000000..780221c
--- /dev/null
+++ b/arm9/source/common/mmio.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of fastboot 3DS
+ * Copyright (C) 2019 Aurora Wright, TuxSH, derrek, profi200
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+// Based on https://github.com/AuroraWright/Luma3DS/blob/master/arm9/source/alignedseqmemcpy.s
+
+#include "common.h"
+
+
+
+void iomemcpy(vu32 *restrict dst, const vu32 *restrict src, u32 size);
+void iomemset(vu32 *ptr, u32 value, u32 size);
diff --git a/arm9/source/common/mmio.s b/arm9/source/common/mmio.s
new file mode 100644
index 0000000..fb0e5ab
--- /dev/null
+++ b/arm9/source/common/mmio.s
@@ -0,0 +1,87 @@
+/*
+ * This file is part of fastboot 3DS
+ * Copyright (C) 2019 Aurora Wright, TuxSH, derrek, profi200
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+ @ Based on https://github.com/AuroraWright/Luma3DS/blob/master/arm9/source/alignedseqmemcpy.s
+
+#include "asmfunc.h"
+
+.arm
+.cpu arm946e-s
+.fpu softvfp
+
+
+
+@ void iomemcpy(vu32 *restrict dst, const vu32 *restrict src, u32 size)
+ASM_FUNC iomemcpy
+ bics r12, r2, #31
+ beq iomemcpy_test_words
+ stmfd sp!, {r4-r10}
+ iomemcpy_blocks_lp:
+ ldmia r1!, {r3-r10}
+ stmia r0!, {r3-r10}
+ subs r12, #32
+ bne iomemcpy_blocks_lp
+ ldmfd sp!, {r4-r10}
+iomemcpy_test_words:
+ ands r12, r2, #28
+ beq iomemcpy_halfword_byte
+ iomemcpy_words_lp:
+ ldr r3, [r1], #4
+ str r3, [r0], #4
+ subs r12, #4
+ bne iomemcpy_words_lp
+iomemcpy_halfword_byte:
+ tst r2, #2
+ ldrneh r3, [r1], #2
+ strneh r3, [r0], #2
+ tst r2, #1
+ ldrneb r3, [r1]
+ strneb r3, [r0]
+ bx lr
+
+
+@ void iomemset(vu32 *ptr, u32 value, u32 size)
+ASM_FUNC iomemset
+ bics r12, r2, #31
+ beq iomemset_test_words
+ stmfd sp!, {r4-r9}
+ mov r3, r1
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r7, r1
+ mov r8, r1
+ mov r9, r1
+ iomemset_blocks_lp:
+ stmia r0!, {r1, r3-r9}
+ subs r12, #32
+ bne iomemset_blocks_lp
+ ldmfd sp!, {r4-r9}
+iomemset_test_words:
+ ands r12, r2, #28
+ beq iomemset_halfword_byte
+ iomemset_words_lp:
+ str r1, [r0], #4
+ subs r12, #4
+ bne iomemset_words_lp
+iomemset_halfword_byte:
+ tst r2, #2
+ strneh r1, [r0], #2
+ tst r2, #1
+ strneb r1, [r0]
+ bx lr
diff --git a/arm9/source/crypto/rsa.c b/arm9/source/crypto/rsa.c
index faba2f7..e18342d 100644
--- a/arm9/source/crypto/rsa.c
+++ b/arm9/source/crypto/rsa.c
@@ -16,15 +16,10 @@
* along with this program. If not, see .
*/
-/*
- * To ensure correct functionality, the builtin memcpy should perform a sequential copy.
- * If not, it should be replaced with a different reimplementation that does for sure act sequential.
- * Or alternatively, it's own memcpy with it's own name, say perhaps seqmemcpy, and memcpy calls replaced.
- */
-
#include "common.h"
#include "rsa.h"
#include "sha.h"
+#include "mmio.h"
@@ -40,9 +35,9 @@
#define REGs_RSA_SLOT2 ((vu32*)(RSA_REGS_BASE + 0x120))
#define REGs_RSA_SLOT3 ((vu32*)(RSA_REGS_BASE + 0x130))
#define rsaSlots ((RsaSlot*)(RSA_REGS_BASE + 0x100))
-#define REG_RSA_EXP ((vu32*)(RSA_REGS_BASE + 0x200))
-#define REG_RSA_MOD ( (RSA_REGS_BASE + 0x400))
-#define REG_RSA_TXT ( (RSA_REGS_BASE + 0x800))
+#define REGs_RSA_EXP ((vu32*)(RSA_REGS_BASE + 0x200))
+#define REGs_RSA_MOD ((vu32*)(RSA_REGS_BASE + 0x400))
+#define REGs_RSA_TXT ((vu32*)(RSA_REGS_BASE + 0x800))
typedef struct
{
@@ -69,7 +64,7 @@ void RSA_selectKeyslot(u8 keyslot)
REG_RSA_CNT = (REG_RSA_CNT & ~RSA_KEYSLOT(0xFu)) | RSA_KEYSLOT(keyslot);
}
-bool RSA_setKey2048(u8 keyslot, const u8 *const mod, u32 exp)
+bool RSA_setKey2048(u8 keyslot, const u32 *const mod, u32 exp)
{
RsaSlot *slot = &rsaSlots[keyslot];
rsaWaitBusy();
@@ -78,35 +73,35 @@ bool RSA_setKey2048(u8 keyslot, const u8 *const mod, u32 exp)
if(!(slot->REG_RSA_SLOTCNT & RSA_KEY_UNK_BIT31)) slot->REG_RSA_SLOTCNT &= ~RSA_KEY_STAT_SET;
REG_RSA_CNT = RSA_INPUT_NORMAL | RSA_INPUT_BIG | RSA_KEYSLOT(keyslot);
- memset((void*)REG_RSA_EXP, 0, 0x100 - 4);
- REG_RSA_EXP[(0x100>>2) - 1] = exp;
+ iomemset(REGs_RSA_EXP, 0, 0x100 - 4);
+ REGs_RSA_EXP[(0x100>>2) - 1] = exp;
if(slot->REG_RSA_SLOTSIZE != RSA_SLOTSIZE_2048) return false;
- memcpy((void*)REG_RSA_MOD, mod, 0x100);
+ iomemcpy(REGs_RSA_MOD, mod, 0x100);
return true;
}
-bool RSA_decrypt2048(void *const decSig, const void *const encSig)
+bool RSA_decrypt2048(u32 *const decSig, const u32 *const encSig)
{
const u8 keyslot = RSA_GET_KEYSLOT;
rsaWaitBusy();
if(!(rsaSlots[keyslot].REG_RSA_SLOTCNT & RSA_KEY_STAT_SET)) return false;
REG_RSA_CNT |= RSA_INPUT_NORMAL | RSA_INPUT_BIG;
- memcpy((void*)REG_RSA_TXT, encSig, 0x100);
+ iomemcpy(REGs_RSA_TXT, encSig, 0x100);
REG_RSA_CNT |= RSA_ENABLE;
rsaWaitBusy();
- memcpy(decSig, (void*)REG_RSA_TXT, 0x100);
+ iomemcpy(decSig, REGs_RSA_TXT, 0x100);
return true;
}
bool RSA_verify2048(const u32 *const encSig, const u32 *const data, u32 size)
{
- u8 decSig[0x100];
- if(!RSA_decrypt2048(decSig, encSig)) return false;
+ alignas(4) u8 decSig[0x100];
+ if(!RSA_decrypt2048((u32*)(void*)decSig, encSig)) return false;
if(decSig[0] != 0x00 || decSig[1] != 0x01) return false;
diff --git a/arm9/source/crypto/rsa.h b/arm9/source/crypto/rsa.h
index 3f230e1..be1813f 100644
--- a/arm9/source/crypto/rsa.h
+++ b/arm9/source/crypto/rsa.h
@@ -66,7 +66,7 @@ void RSA_selectKeyslot(u8 keyslot);
*
* @return Returns true on success, false otherwise.
*/
-bool RSA_setKey2048(u8 keyslot, const u8 *const mod, u32 exp);
+bool RSA_setKey2048(u8 keyslot, const u32 *const mod, u32 exp);
/**
* @brief Decrypts a RSA 2048 signature.
@@ -76,7 +76,7 @@ bool RSA_setKey2048(u8 keyslot, const u8 *const mod, u32 exp);
*
* @return Returns true on success, false otherwise.
*/
-bool RSA_decrypt2048(void *const decSig, const void *const encSig);
+bool RSA_decrypt2048(u32 *const decSig, const u32 *const encSig);
/**
* @brief Verifies a RSA 2048 SHA 256 signature.
diff --git a/arm9/source/crypto/sha.c b/arm9/source/crypto/sha.c
index 7f8fe48..cd2df7c 100644
--- a/arm9/source/crypto/sha.c
+++ b/arm9/source/crypto/sha.c
@@ -1,9 +1,5 @@
-/*
- * To ensure correct functionality, the builtin memcpy should perform a sequential copy.
- * If not, it should be replaced with a different reimplementation that does for sure act sequential.
- * Or alternatively, it's own memcpy with it's own name, say perhaps seqmemcpy, and memcpy calls replaced, as well the line "*((_sha_block*)REG_SHAINFIFO) = *((const _sha_block*)src32);" inside sha_update, to call explicitly seqmemcpy like, "seqmemcpy(REG_SHAINFIFO, src32, 0x40);".
- */
#include "sha.h"
+#include "mmio.h"
typedef struct
{
@@ -27,7 +23,7 @@ void sha_update(const void* src, u32 size)
size -= 0x40;
}
while(*REG_SHACNT & 1);
- if(size) memcpy((void*)REG_SHAINFIFO, src32, size);
+ if(size) iomemcpy((void*)REG_SHAINFIFO, src32, size);
}
void sha_get(void* res) {
@@ -36,7 +32,7 @@ void sha_get(void* res) {
*REG_SHACNT = (*REG_SHACNT & ~SHA_NORMAL_ROUND) | SHA_FINAL_ROUND;
while(*REG_SHACNT & SHA_FINAL_ROUND);
while(*REG_SHACNT & 1);
- if (hash_size) memcpy(res, (void*)REG_SHAHASH, hash_size);
+ if (hash_size) iomemcpy(res, (void*)REG_SHAHASH, hash_size);
}
void sha_quick(void* res, const void* src, u32 size, u32 mode) {
diff --git a/arm9/source/game/cert.c b/arm9/source/game/cert.c
index 3b01f0f..313dd71 100644
--- a/arm9/source/game/cert.c
+++ b/arm9/source/game/cert.c
@@ -1,7 +1,7 @@
#include "cert.h"
#include "ff.h"
-u32 LoadCertFromCertDb(u64 offset, Certificate* cert, u8* mod, u32* exp) {
+u32 LoadCertFromCertDb(u64 offset, Certificate* cert, u32* mod, u32* exp) {
Certificate cert_local;
FIL db;
UINT bytes_read;
diff --git a/arm9/source/game/cert.h b/arm9/source/game/cert.h
index ad7e73d..3ef3604 100644
--- a/arm9/source/game/cert.h
+++ b/arm9/source/game/cert.h
@@ -19,4 +19,4 @@ typedef struct {
u8 padding1[0x34];
} __attribute__((packed)) Certificate;
-u32 LoadCertFromCertDb(u64 offset, Certificate* cert, u8* mod, u32* exp);
+u32 LoadCertFromCertDb(u64 offset, Certificate* cert, u32* mod, u32* exp);
diff --git a/arm9/source/game/ticket.c b/arm9/source/game/ticket.c
index f128029..239c7c1 100644
--- a/arm9/source/game/ticket.c
+++ b/arm9/source/game/ticket.c
@@ -17,7 +17,7 @@ u32 ValidateTicket(Ticket* ticket) {
u32 ValidateTicketSignature(Ticket* ticket) {
static bool got_modexp = false;
- static u8 mod[0x100] = { 0 };
+ static u32 mod[0x100 / 0x4] = { 0 };
static u32 exp = 0;
if (!got_modexp) {
diff --git a/arm9/source/game/tmd.c b/arm9/source/game/tmd.c
index d2ddbdc..2eef622 100644
--- a/arm9/source/game/tmd.c
+++ b/arm9/source/game/tmd.c
@@ -16,7 +16,7 @@ u32 ValidateTmd(TitleMetaData* tmd) {
u32 ValidateTmdSignature(TitleMetaData* tmd) {
static bool got_modexp = false;
- static u8 mod[0x100] = { 0 };
+ static u32 mod[0x100 / 4] = { 0 };
static u32 exp = 0;
if (!got_modexp) {