Skip to content

Commit

Permalink
Add fast single-precision add/sub/mul for Hazard3 (#1883)
Browse files Browse the repository at this point in the history
* Add fast single-precision add/sub/mul for Hazard3

* Make test output less noisy. Map -nan to -inf in vector gen. Move random vectors to separate files.

* Re-disable USB stdout for pico_float_test by default...

* Disable pico/float.h exports on RISC-V as these functions aren't implemented

* Add hazard3 instructions to asm_helper. Split hazard3.h to support this.

You can still include hazard3.h to get everything. This just allows you
to pull in less.
  • Loading branch information
Wren6991 authored Aug 30, 2024
1 parent 876f331 commit d886df6
Show file tree
Hide file tree
Showing 12 changed files with 2,896 additions and 164 deletions.
34 changes: 9 additions & 25 deletions src/rp2350/pico_platform/include/pico/asm_helper.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@

#include "pico.h"

#ifdef __riscv
// Get macros for convenient use of Hazard3 instructions without binutils support
#include "hardware/hazard3/instructions.h"
#endif

#if !PICO_ASSEMBLER_IS_CLANG
#define apsr_nzcv r15
#endif
# note we don't do this by default in this file for backwards comaptibility with user code
# that may include this file, but not use unified syntax. Note that this macro does equivalent
# setup to the pico_default_asm macro for inline assembly in C code.
// note we don't do this by default in this file for backwards comaptibility with user code
// that may include this file, but not use unified syntax. Note that this macro does equivalent
// setup to the pico_default_asm macro for inline assembly in C code.
.macro pico_default_asm_setup
#ifndef __riscv
.syntax unified
Expand Down Expand Up @@ -60,28 +65,7 @@ weak_func WRAPPER_FUNC_NAME(\x)
.word \func + \offset
.endm

# backwards compatibility
// backwards compatibility
.macro __pre_init func, priority_string1
__pre_init_with_offset func, 0, priority_string1
.endm

#ifdef __riscv
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)
.macro h3.bextm rd rs1 rs2 nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
.endm

// rd = (rs1 >> shamt) & ~(-1 << nbits)
.macro h3.bextmi rd rs1 shamt nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
.if (\shamt < 0) || (\shamt > 31)
.err
.endif
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
.endm
#endif
136 changes: 8 additions & 128 deletions src/rp2_common/hardware_hazard3/include/hardware/hazard3.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,101 +4,26 @@
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef _HARDWARE_HAZARD3_
#define _HARDWARE_HAZARD3_
#ifndef _HARDWARE_HAZARD3_H
#define _HARDWARE_HAZARD3_H

#include "pico.h"
#include "hardware/riscv.h"

// This includes both standard and Hazard3 custom CSRs:
#include "hardware/regs/rvcsr.h"

#include "hardware/hazard3/features.h"
#include "hardware/hazard3/instructions.h"

/** \file hardware/hazard3.h
* \defgroup hardware_hazard3 hardware_hazard3
*
* \brief Accessors for Hazard3-specific RISC-V CSRs, and intrinsics for Hazard3 custom instructions
*
*/

// Feature detection macros for Hazard3 custom extensions
#if PICO_RP2350
#define __hazard3_extension_xh3power
#define __hazard3_extension_xh3bextm
#define __hazard3_extension_xh3irq
#define __hazard3_extension_xh3pmpm
#endif

#ifdef __ASSEMBLER__

// Assembly language instruction macros for Hazard3 custom instructions

// h3.bextm: Extract up to 8 consecutive bits from register rs1, with the
// first bit indexed by rs2, and bit count configured by an immediate value.
// R-format instruction. Pseudocode:
//
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)

.macro h3.bextm rd rs1 rs2 nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
#ifdef __hazard3_extension_xh3bextm
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
#else
srl \rd, \rs1, \rs2
andi \rd, \rd, ((1 << \nbits) - 1)
#endif
.endm

// h3.bextmi: Extract up to 8 consecutive bits from register rs1, with the
// first bit index and the number of bits both configured by immediate
// values. I-format instruction. Pseudocode:
//
// rd = (rs1 >> shamt) & ~(-1 << nbits)

.macro h3.bextmi rd rs1 shamt nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
.if (\shamt < 0) || (\shamt > 31)
.err
.endif
#ifdef __hazard3_extension_xh3bextm
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
#else
srli \rd, \rs1, \shamt
andi \rd, \rd, ((1 << \nbits) - 1)
#endif
.endm

// h3.block: enter an idle state until another processor in the same
// multiprocessor complex executes an h3.unblock instruction, or the
// processor is interrupted. Fall through immediately if an h3.unblock has
// been received since the last execution of an h3.block on this processor.
// On RP2350, processors also have their own h3.unblock signals reflected
// back to them.

.macro h3.block
#ifdef __hazard3_extension_xh3power
slt x0, x0, x0
#else
nop
#endif
.endm

// h3.unblock: signal other processors in the same multiprocessor complex to
// exit the idle state entered by an h3.block instruction. On RP2350, this
// signal is also reflected back to the processor that executed the
// h3.unblock, which will cause that processor's next h3.block to fall
// through immediately.

.macro h3.unblock
#ifdef __hazard3_extension_xh3power
slt x0, x0, x1
#else
nop
#endif
.endm

#else // !__ASSEMBLER__
#ifndef __ASSEMBLER__

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -128,51 +53,6 @@ extern "C" {
#define hazard3_irqarray_clear(csr, index, data) static_assert(false, "Not supported: Xh3irq extension")
#endif


// nbits must be a constant expression
#ifdef __hazard3_extension_xh3bextm
#define __hazard3_bextm(nbits, rs1, rs2) ({\
uint32_t __h3_bextm_rd; \
asm (".insn r 0x0b, 0, %3, %0, %1, %2"\
: "=r" (__h3_bextm_rd) \
: "r" (rs1), "r" (rs2), "i" ((((nbits) - 1) & 0x7) << 1)\
); \
__h3_bextm_rd; \
})
#else
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((rs2) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
#endif

// nbits and shamt must be constant expressions
#ifdef __hazard3_extension_xh3bextm
#define __hazard3_bextmi(nbits, rs1, shamt) ({\
uint32_t __h3_bextmi_rd; \
asm (".insn i 0x0b, 0x4, %0, %1, %2"\
: "=r" (__h3_bextmi_rd) \
: "r" (rs1), "i" ((((nbits) - 1) & 0x7) << 6 | ((shamt) & 0x1f)) \
); \
__h3_bextmi_rd; \
})
#else
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((shamt) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
#endif

#ifdef __hazard3_extension_xh3power
#define __hazard3_block() asm volatile ("slt x0, x0, x0" : : : "memory")
#else
#define __hazard3_block() do {} while (0)
#endif

#ifdef __hazard3_extension_xh3power
#define __hazard3_unblock() asm volatile ("slt x0, x0, x1" : : : "memory")
#else
#define __hazard3_unblock() do {} while (0)
#endif

#ifdef __cplusplus
}
#endif

#endif // !__ASSEMBLER__

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (c) 2024 Raspberry Pi Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef _HARDWARE_HAZARD3_FEATURES_H
#define _HARDWARE_HAZARD3_FEATURES_H

#include "pico.h"

/** \file hardware/hazard3/features.h
* \addtogroup hardware_hazard3
*
* \brief Sets macros for supported Hazard3 custom extensions (features) based on PICO_PLATFORM macros
*
*/

// Feature detection macros for Hazard3 custom extensions
#if PICO_RP2350
// Version 1.0 of these four extensions
// (encoded as major * 100 + minor)
#define __hazard3_extension_xh3power 100
#define __hazard3_extension_xh3bextm 100
#define __hazard3_extension_xh3irq 100
#define __hazard3_extension_xh3pmpm 100
#endif

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright (c) 2024 Raspberry Pi Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef _HARDWARE_HAZARD3_INSTRUCTIONS_H
#define _HARDWARE_HAZARD3_INSTRUCTIONS_H

#include "pico.h"

// Get list of supported extensions based on platform:
#include "hardware/hazard3/features.h"

/** \file hardware/hazard3/instructions.h
* \addtogroup hardware_hazard3
*
* \brief Intrinsics and asm macros for Hazard3 custom instructions
*
* The implementation of these intrinsics depends on the feature macros
* defined in hardware/hazard3/features.h. When the relevant feature is not
* present, the intrinsics fall back on an RV32I equivalent if possible.
*
*/

#ifdef __ASSEMBLER__

// Assembly language instruction macros for Hazard3 custom instructions

// h3.bextm: Extract up to 8 consecutive bits from register rs1, with the
// first bit indexed by rs2, and bit count configured by an immediate value.
// R-format instruction. Pseudocode:
//
// rd = (rs1 >> rs2[4:0]) & ~(-1 << nbits)

.macro h3.bextm rd rs1 rs2 nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
#ifdef __hazard3_extension_xh3bextm
.insn r 0x0b, 0x4, (((\nbits - 1) & 0x7 ) << 1), \rd, \rs1, \rs2
#else
srl \rd, \rs1, \rs2
andi \rd, \rd, ((1 << \nbits) - 1)
#endif
.endm

// h3.bextmi: Extract up to 8 consecutive bits from register rs1, with the
// first bit index and the number of bits both configured by immediate
// values. I-format instruction. Pseudocode:
//
// rd = (rs1 >> shamt) & ~(-1 << nbits)

.macro h3.bextmi rd rs1 shamt nbits
.if (\nbits < 1) || (\nbits > 8)
.err
.endif
.if (\shamt < 0) || (\shamt > 31)
.err
.endif
#ifdef __hazard3_extension_xh3bextm
.insn i 0x0b, 0x4, \rd, \rs1, (\shamt & 0x1f) | (((\nbits - 1) & 0x7 ) << 6)
#else
srli \rd, \rs1, \shamt
andi \rd, \rd, ((1 << \nbits) - 1)
#endif
.endm

// h3.block: enter an idle state until another processor in the same
// multiprocessor complex executes an h3.unblock instruction, or the
// processor is interrupted. Fall through immediately if an h3.unblock has
// been received since the last execution of an h3.block on this processor.
// On RP2350, processors also have their own h3.unblock signals reflected
// back to them.

.macro h3.block
#ifdef __hazard3_extension_xh3power
slt x0, x0, x0
#else
nop
#endif
.endm

// h3.unblock: signal other processors in the same multiprocessor complex to
// exit the idle state entered by an h3.block instruction. On RP2350, this
// signal is also reflected back to the processor that executed the
// h3.unblock, which will cause that processor's next h3.block to fall
// through immediately.

.macro h3.unblock
#ifdef __hazard3_extension_xh3power
slt x0, x0, x1
#else
nop
#endif
.endm

#else // !__ASSEMBLER__

// C language instruction macros for Hazard3 custom instructions

#ifdef __cplusplus
extern "C" {
#endif

// nbits must be a constant expression
#ifdef __hazard3_extension_xh3bextm
#define __hazard3_bextm(nbits, rs1, rs2) ({\
uint32_t __h3_bextm_rd; \
asm (".insn r 0x0b, 0, %3, %0, %1, %2"\
: "=r" (__h3_bextm_rd) \
: "r" (rs1), "r" (rs2), "i" ((((nbits) - 1) & 0x7) << 1)\
); \
__h3_bextm_rd; \
})
#else
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((rs2) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
#endif

// nbits and shamt must be constant expressions
#ifdef __hazard3_extension_xh3bextm
#define __hazard3_bextmi(nbits, rs1, shamt) ({\
uint32_t __h3_bextmi_rd; \
asm (".insn i 0x0b, 0x4, %0, %1, %2"\
: "=r" (__h3_bextmi_rd) \
: "r" (rs1), "i" ((((nbits) - 1) & 0x7) << 6 | ((shamt) & 0x1f)) \
); \
__h3_bextmi_rd; \
})
#else
#define __hazard3_bextm(nbits, rs1, rs2) (((rs1) >> ((shamt) & 0x1f)) & (0xffu >> (7 - (((nbits) - 1) & 0x7))))
#endif

#ifdef __hazard3_extension_xh3power
#define __hazard3_block() asm volatile ("slt x0, x0, x0" : : : "memory")
#else
#define __hazard3_block() do {} while (0)
#endif

#ifdef __hazard3_extension_xh3power
#define __hazard3_unblock() asm volatile ("slt x0, x0, x1" : : : "memory")
#else
#define __hazard3_unblock() do {} while (0)
#endif

#ifdef __cplusplus
}
#endif

#endif // !__ASSEMBLER__

#endif
Loading

0 comments on commit d886df6

Please sign in to comment.