Skip to content

Commit

Permalink
Increase speeds of same70 by using Tightly Coupled Memory and DCache …
Browse files Browse the repository at this point in the history
…(#6708) (#410)

* docs: Add step rate benchmark for same70
Signed-off-by: Luke Vuksta <wulfstawulfsta@gmail.com>

* atsam: Add data memory barrier to USB driver
Signed-off-by: Luke Vuksta <wulfstawulfsta@gmail.com>

* atsam: Enable TCM and cache for atsame70
Signed-off-by: Luke Vuksta <wulfstawulfsta@gmail.com>

---------

Co-authored-by: Wulfsta <wulfstawulfsta@gmail.com>
  • Loading branch information
rogerlz and Wulfsta authored Oct 29, 2024
1 parent 863b428 commit e6c9f25
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/Benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ config_stepper oid=2 step_pin=PC28 dir_pin=PA4 invert_step=-1 step_pulse_ticks=0
finalize_config crc=0
```


The test was last run on commit `34e9ea55` with gcc version
`arm-none-eabi-gcc (NixOS 10.3-2021.10) 10.3.1` on a SAME70Q20B
micro-controller.
Expand Down
1 change: 1 addition & 0 deletions docs/Features.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ represent total number of steps per second on the micro-controller.
| AR100 | 3529K | 2507K |
| STM32F407 | 3652K | 2459K |
| STM32F446 | 3913K | 2634K |
| SAME70 | 6667K | 4737K |
| STM32H743 | 9091K | 6061K |

If unsure of the micro-controller on a particular board, find the
Expand Down
22 changes: 19 additions & 3 deletions src/atsam/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ config CLOCK_FREQ

config FLASH_SIZE
hex
default 0x20000 if MACH_SAME70
default 0x80000

config FLASH_BOOT_ADDRESS
Expand All @@ -84,18 +85,33 @@ config RAM_START
config RAM_SIZE
hex
default 0x18000 if MACH_SAM3X
default 0x20000 if MACH_SAM4
default 0x40000 if MACH_SAME70
default 0x20000 if MACH_SAM4 || MACH_SAME70

config STACK_SIZE
int
default 512

config FLASH_APPLICATION_ADDRESS
hex
default 0x400000 if MACH_SAM4 || MACH_SAME70
default 0x0 if MACH_SAME70
default 0x400000 if MACH_SAM4
default 0x80000

config ARMCM_ITCM_FLASH_MIRROR_START
depends on MACH_SAME70
hex
default 0x400000

config ARMCM_DTCM_START
depends on MACH_SAME70
hex
default 0x20000000

config ARMCM_DTCM_SIZE
depends on MACH_SAME70
hex
default 0x20000

choice
prompt "Communication interface"
config ATSAM_USB
Expand Down
7 changes: 4 additions & 3 deletions src/atsam/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ CFLAGS-$(CONFIG_MACH_SAM4E) += -Ilib/sam4e/include
CFLAGS-$(CONFIG_MACH_SAME70) += -Ilib/same70b/include
CFLAGS += $(CFLAGS-y) -D__$(MCU)__ -mthumb -Ilib/cmsis-core -Ilib/fast-hash

CFLAGS_klipper.elf += -nostdlib -lgcc -lc_nano
CFLAGS_klipper.elf += -T $(OUT)src/generic/armcm_link.ld
$(OUT)klipper.elf: $(OUT)src/generic/armcm_link.ld
samlink-y := $(OUT)src/generic/armcm_link.ld
samlink-$(CONFIG_MACH_SAME70) := $(OUT)src/atsam/same70_link.ld
CFLAGS_klipper.elf += -nostdlib -lgcc -lc_nano -T $(samlink-y)
$(OUT)klipper.elf: $(samlink-y)

# Add source files
src-y += atsam/main.c atsam/gpio.c atsam/i2c.c atsam/spi.c
Expand Down
4 changes: 2 additions & 2 deletions src/atsam/fdcan.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ struct fdcan_msg_ram {
struct fdcan_fifo TXFIFO[3];
};

// Message ram is in regular memory
static struct fdcan_msg_ram MSG_RAM;
// Message ram is in DTCM - locate it there to avoid cache.
static struct fdcan_msg_ram MSG_RAM __section(".dtcm.bss");


/****************************************************************
Expand Down
1 change: 1 addition & 0 deletions src/atsam/sam3_usb.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ usb_write_packet(uint32_t ep, const uint8_t *data, uint32_t len)
uint8_t *dest = usb_fifo(ep);
while (len--)
*dest++ = *data++;
__DMB();
}

static void
Expand Down
81 changes: 81 additions & 0 deletions src/atsam/same70_link.lds.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Generic ARM Cortex-M linker script
//
// Copyright (C) 2019-2024 Kevin O'Connor <kevin@koconnor.net>
// Copyright (C) 2024 Luke Vuksta <wulfstawulfsta@gmail.com>
//
// This file may be distributed under the terms of the GNU GPLv3 license.

#include "autoconf.h" // CONFIG_FLASH_APPLICATION_ADDRESS

OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm")
OUTPUT_ARCH(arm)

MEMORY
{
rom (rx) : ORIGIN = CONFIG_FLASH_APPLICATION_ADDRESS , LENGTH = CONFIG_FLASH_SIZE
ram (rwx) : ORIGIN = CONFIG_RAM_START , LENGTH = CONFIG_RAM_SIZE
dtcm (rw) : ORIGIN = CONFIG_ARMCM_DTCM_START , LENGTH = CONFIG_ARMCM_DTCM_SIZE
}

SECTIONS
{
.text : AT (CONFIG_ARMCM_ITCM_FLASH_MIRROR_START) {
. = ALIGN(4);
_text_vectortable_start = .;
KEEP(*(.vector_table))
_text_vectortable_end = .;
*(.text .text.*)
*(.ramfunc .ramfunc.*)
*(.rodata .rodata*)
} > rom

_text_size = SIZEOF (.text);
. = CONFIG_ARMCM_ITCM_FLASH_MIRROR_START + _text_size;
. = ALIGN(4);
_data_flash = .;

.data : AT (_data_flash)
{
. = ALIGN(4);
_data_start = .;
*(.data .data.*)
. = ALIGN(4);
_data_end = .;
} > ram

.bss (NOLOAD) :
{
. = ALIGN(4);
_bss_start = .;
*(.bss .bss.*)
*(COMMON)
. = ALIGN(4);
_bss_end = .;
} > ram

_stack_start = CONFIG_RAM_START + CONFIG_RAM_SIZE - CONFIG_STACK_SIZE ;
.stack _stack_start (NOLOAD) :
{
. = . + CONFIG_STACK_SIZE;
_stack_end = .;
} > ram

.dtcm_bss (NOLOAD) :
{
. = ALIGN(4);
_dtcm_bss_start = .;
*(.dtcm.bss)
. = ALIGN(4);
_dtcm_bss_end = .;
} > dtcm

/DISCARD/ : {
// The .init/.fini sections are used by __libc_init_array(), but
// that isn't needed so no need to include them in the binary.
*(.init)
*(.fini)
// Don't include exception tables
*(.ARM.extab)
*(.ARM.exidx)
}
}
87 changes: 82 additions & 5 deletions src/atsam/same70_sysinit.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// This code is from lib/sam4e/gcc/system_sam4e.c and modified for the SAM E70

#include <string.h> // memset
#include "internal.h"

/* Clock Settings (300MHz) */
Expand All @@ -9,13 +10,17 @@
| CKGR_PLLAR_PLLACOUNT(0x3fU) \
| CKGR_PLLAR_DIVA_BYPASS)
#define SYS_BOARD_MCKR (PMC_MCKR_MDIV_PCK_DIV2 | PMC_MCKR_CSS_PLLA_CLK)
#define RST_PARAMS ((0xA5 << RSTC_CR_KEY_Pos) | RSTC_CR_PROCRST)
#define GPNVM_TCM_MASK ((1 << 7) | (1 << 8))

/* Key to unlock MOR register */
#define SYS_CKGR_MOR_KEY_VALUE CKGR_MOR_KEY(0x37)

uint32_t SystemCoreClock = CHIP_FREQ_MAINCK_RC_12MHZ;
extern uint32_t _text_size;
extern uint32_t _dtcm_bss_start, _dtcm_bss_end;

void SystemInit( void )
void
SystemInit( void )
{
/* Set FWS according to SYS_BOARD_MCKR configuration */
EFC->EEFC_FMR = EEFC_FMR_FWS(6) | EEFC_FMR_CLOE;
Expand Down Expand Up @@ -66,12 +71,84 @@ void SystemInit( void )
{
}

SystemCoreClock = CHIP_FREQ_CPU_MAX;

// Configure PCK6 for TC use
/* Configure PCK6 for TC use */
PMC->PMC_PCK[6] = PMC_PCK_CSS_MCK | PMC_PCK_PRES(2);
while ( !(PMC->PMC_SR & PMC_SR_PCKRDY6) )
{
}
PMC->PMC_SCER |= PMC_SCER_PCK6;

/* Check Tightly Coupled Memory (TCM) bits. */
EFC->EEFC_FCR = (EEFC_FCR_FKEY_PASSWD | EEFC_FCR_FCMD_GGPB);
while ((EFC->EEFC_FSR & EEFC_FSR_FRDY) == 0)
;
if ((EFC->EEFC_FRR & GPNVM_TCM_MASK) != GPNVM_TCM_MASK)
{
/* Configure TCM sizes to 128KiB (set GPNVM7 and GPNVM8). */
EFC->EEFC_FCR = (EEFC_FCR_FKEY_PASSWD | EEFC_FCR_FCMD_SGPB
| EEFC_FCR_FARG(7));
while ((EFC->EEFC_FSR & EEFC_FSR_FRDY) == 0)
;
EFC->EEFC_FCR = (EEFC_FCR_FKEY_PASSWD| EEFC_FCR_FCMD_SGPB
| EEFC_FCR_FARG(8));
while ((EFC->EEFC_FSR & EEFC_FSR_FRDY) == 0)
;
/* Reboot required, but bits are set now and we will not
* return down this path. */
__DSB();
__ISB();
RSTC->RSTC_CR = RST_PARAMS;
for (;;)
;
}

/* Clear Data Tightly Coupled Memory (DTCM) bss segment - this has to happen
* after we check that the DTCM is enabled. */
memset(&_dtcm_bss_start, 0, (&_dtcm_bss_end - &_dtcm_bss_start) * 4);

/* DMA copy flash to Instruction Tightly Coupled Memory (ITCM). Just use
* channel 0 since we have not done anything yet. */
enable_pclock(ID_XDMAC);
/* Clear pending interrupts. */
(void)REG_XDMAC_CIS0;
REG_XDMAC_CSA0 = CONFIG_ARMCM_ITCM_FLASH_MIRROR_START;
REG_XDMAC_CDA0 = CONFIG_FLASH_APPLICATION_ADDRESS;
REG_XDMAC_CUBC0 = XDMAC_CUBC_UBLEN((int)&_text_size);

REG_XDMAC_CC0 =
XDMAC_CC_TYPE_MEM_TRAN | XDMAC_CC_MBSIZE_SINGLE |
XDMAC_CC_CSIZE_CHK_1 | XDMAC_CC_DWIDTH_WORD |
XDMAC_CC_SAM_INCREMENTED_AM | XDMAC_CC_DAM_INCREMENTED_AM |
XDMAC_CC_SIF_AHB_IF1 | XDMAC_CC_DIF_AHB_IF0;

REG_XDMAC_CNDA0 = 0;
REG_XDMAC_CNDC0 = 0;
REG_XDMAC_CBC0 = 0;
REG_XDMAC_CDS_MSP0 = 0;
REG_XDMAC_CSUS0 = 0;
REG_XDMAC_CDUS0 = 0;

REG_XDMAC_CIE0 = XDMAC_CIE_BIE;

__DSB();
__ISB();
XDMAC->XDMAC_GE = XDMAC_GE_EN0;
while ( XDMAC->XDMAC_GS & XDMAC_GS_ST0 )
;

while ( !(REG_XDMAC_CIS0 & XDMAC_CIS_BIS) )
;

/* Enable ITCM. DTCM is enabled by default. */
__DSB();
__ISB();
SCB->ITCMCR = (SCB_ITCMCR_EN_Msk | SCB_ITCMCR_RMW_Msk
| SCB_ITCMCR_RETEN_Msk);
__DSB();
__ISB();

/* Use data cache rather than DTCM for two reasons:
* 1. It is hard to flash this device with GPNVM bits enabled.
* 2. It is about as fast. */
SCB_EnableDCache();
}

0 comments on commit e6c9f25

Please sign in to comment.