From e42ae37e0c1e6dbaf193749db955e9272c561205 Mon Sep 17 00:00:00 2001 From: Jacek Maksymowicz Date: Tue, 22 Oct 2024 12:47:04 +0200 Subject: [PATCH] hal/armv7m: allow for more granular memory maps Use MPU region overlap feature to allow for more granularity when translating memory maps to MPU regions. With this feature, the code can now create an MPU region larger than the map, then "mask off" the excess with another region. Improve MPU region code to handle more edge cases. Replace inline assembly with builtins for `ctz` function. JIRA: RTOS-950 --- hal/armv7m/mpu.c | 229 ++++++++++++++++++++++++++++++----------------- 1 file changed, 148 insertions(+), 81 deletions(-) diff --git a/hal/armv7m/mpu.c b/hal/armv7m/mpu.c index e9e6f650..5f237f5e 100644 --- a/hal/armv7m/mpu.c +++ b/hal/armv7m/mpu.c @@ -20,99 +20,181 @@ static mpu_common_t mpu_common; +/* clang-format off */ enum { mpu_type, mpu_ctrl, mpu_rnr, mpu_rbar, mpu_rasr, mpu_rbar_a1, mpu_rasr_a1, mpu_rbar_a2, mpu_rasr_a2, mpu_rbar_a3, mpu_rasr_a3 }; +/* clang-format on */ -/* Binary count trailing zeros */ -__attribute__((always_inline)) static inline u8 _ctz(u32 val) +/* Removes all RASR attribute bits except ENABLE */ +#define HOLE_ATTR(rasrAttr) (0 | ((rasrAttr) & 0x1)) + + +/* Setup single MPU region entry in a local MPU context */ +static int mpu_regionSet(unsigned int *idx, u32 base_addr, u8 srdMask, u8 sizeBit, u32 rasrAttr) { - asm volatile(" \ - rbit %0, %0; \ - clz %0, %0" - : "+l"(val)); - return val; + if ((sizeBit < 5) || (*idx >= mpu_common.regMax)) { + return -EPERM; + } + + mpu_common.region[*idx].rbar = + base_addr | + (1ul << 4) | /* mark region as valid */ + (*idx & 0xful); + + mpu_common.region[*idx].rasr = + rasrAttr | + (((u32)srdMask) << 8) | + ((((u32)sizeBit - 1) & 0x1ful) << 1); + + *idx += 1; + return EOK; } -/* Setup single MPU region entry in a local MPU context */ -static int mpu_regionSet(u8 idx, addr_t addr, u8 sizeBit, u32 attr, unsigned int enable, u8 srdMask) +/* Translate memory map attributes to RASR attribute bits */ +static u32 mpu_regionAttrs(u32 attr, unsigned int enable) { u8 tex = 0; u8 ap = 1; /* privileged read-write access, unprivileged no access */ - if (sizeBit < 5 || idx >= mpu_common.regMax) - return -EPERM; - - if (attr & mAttrRead) + if ((attr & mAttrRead) != 0) { ap = 2; /* privileged read-write access, unprivileged read only access */ + } - if (attr & mAttrWrite) + if ((attr & mAttrWrite) != 0) { ap = 3; /* privileged read-write access, unprivileged read and write access */ + } - mpu_common.region[idx].rbar = - ((u32)addr & (0x7fffffful << 5)) | - (1ul << 4) | /* mark region as valid */ - (idx & 0xful); - - mpu_common.region[idx].rasr = - ((((attr & mAttrExec) == 0) & 1ul) << 28) | - ((ap & 0x7ul) << 24) | - ((tex & 0x7ul) << 19) | - ((((attr & mAttrShareable) != 0) & 1ul) << 18) | - ((((attr & mAttrCacheable) != 0) & 1ul) << 17) | - ((((attr & mAttrBufferable) != 0) & 1ul) << 16) | - ((srdMask & 0xfful) << 8) | - (((sizeBit - 1) & 0x1ful) << 1) | - (enable != 0); - - return EOK; + return ((((attr & mAttrExec) == 0) & 1ul) << 28) | + ((ap & 0x7ul) << 24) | + ((tex & 0x7ul) << 19) | + ((((attr & mAttrShareable) != 0) & 1ul) << 18) | + ((((attr & mAttrCacheable) != 0) & 1ul) << 17) | + ((((attr & mAttrBufferable) != 0) & 1ul) << 16) | + (enable != 0); } -/* Find best alignment for map and setup a region */ -static int mpu_regionBestFit(u8 idx, addr_t addr, size_t size, u32 attr, unsigned int enable, size_t *allocSize) +static int mpu_checkOverlap(unsigned int idx, u32 start, u32 end) { - size_t srSize; - addr_t srBase, alignMask; + end -= 1; + for (int i = 0; i < idx; i++) { + if (((mpu_common.region[i].rbar & 0x10ul) == 0) || ((mpu_common.region[i].rasr & 0x1ul) == 0)) { + continue; + } + + u32 sizeBit = ((mpu_common.region[i].rasr >> 1) & 0x1ful) + 1; + u32 regionMask = ~((1ul << sizeBit) - 1); + u32 sr_start = mpu_common.region[i].rbar & regionMask; + u32 subregions = (mpu_common.region[i].rasr >> 8) & 0xfful; + for (int j = 0; j < 8; j++) { + u32 sr_end = sr_start + (1ul << (sizeBit - 3)) - 1; + if (((subregions & (1ul << j)) == 0) && (start <= sr_end) && (sr_start <= end)) { + return 1; + } + + sr_start = sr_end; + } + } + + return 0; +} - u8 alignBits = _ctz(addr); - u8 srdMask = 0, bit = _ctz(size); - if (bit < alignBits) - alignBits = bit; +static int mpu_regionCalculateAndSet(unsigned int *idx, addr_t addr, addr_t end, u8 sizeBit, u32 rasrAttr) +{ + /* RBAR contains all MSBs that are the same */ + u32 base_addr = addr & ~((1ul << sizeBit) - 1); + /* Extract first and past-the-end subregion that needs to be enabled */ + u8 sr_start = (addr >> (sizeBit - 3)) & 7ul; + u8 sr_end = (end >> (sizeBit - 3)) & 7ul; + sr_end = (sr_end == 0) ? 8 : sr_end; + /* Bit set means disable region - negate result */ + u8 srdMask = ~(((1ul << sr_end) - 1) & (0xfful << sr_start)); + return mpu_regionSet(idx, base_addr, srdMask, sizeBit, rasrAttr); +} + - alignMask = alignBits < 32 ? ~((1u << alignBits) - 1) : 0; +/* Create up to 2 regions that will represent a given map */ +static int mpu_regionGenerate(unsigned int *idx, addr_t start, addr_t end, u32 rasrAttr) +{ + /* Allow end == 0, this means end of address range */ + if ((end != 0) && (end <= start)) { + return -EINVAL; + } - if (alignBits < 5) { - *allocSize = 1u << alignBits; + /* Check if size is power of 2 and start is aligned - necessary for handling + small regions (below 256 bytes) */ + const u32 size = (end - start) & 0xfffffffful; + if (size == 0) { + return mpu_regionSet(idx, 0, 0, 32, rasrAttr); } - else { - alignBits += 3; - if (alignBits >= 32) { - alignBits = 32; - alignMask = 0; - } - else { - alignMask = ~((1u << alignBits) - 1); + if ((size == (1 << __builtin_ctz(size))) && ((start & (size - 1)) == 0)) { + if (size < 32) { + /* Not supported by MPU */ + return -EINVAL; } - *allocSize = 0; - srBase = addr & alignMask; - srSize = 1u << (alignBits - 3); + return mpu_regionSet(idx, start, 0, __builtin_ctz(size), rasrAttr); + } - for (bit = 0; bit < 8; bit++) { - if (srBase < addr || srBase + srSize > addr + size || *allocSize + srSize > size) - srdMask |= 1u << bit; - else - *allocSize += srSize; + const int common_trailing_zeroes = __builtin_ctz(start | end); + if (common_trailing_zeroes < 5) { + /* This would require subregions smaller than 32 bytes to represent - not supported by MPU */ + return -EINVAL; + } - srBase += srSize; - } + const u8 commonMsb = 32 - __builtin_clz(start ^ ((end - 1) & 0xfffffffful)); + const int sigbits = commonMsb - common_trailing_zeroes; + if (sigbits <= 3) { + /* Can be represented with one region + 8 subregions */ + const u8 sizeBit = common_trailing_zeroes + 3; + return mpu_regionCalculateAndSet(idx, start, end, sizeBit, rasrAttr); + } + else if (sigbits == 4) { + /* Can be represented with 2 regions + up to 8 subregions each */ + const u8 sizeBit = common_trailing_zeroes + 3; + const u32 diff_mask = (1ull << sizeBit) - 1; + const u32 reg1_end = (start & (~diff_mask)) + diff_mask + 1; + int ret = mpu_regionCalculateAndSet(idx, start, reg1_end, sizeBit, rasrAttr); + return (ret == EOK) ? mpu_regionCalculateAndSet(idx, reg1_end, end, sizeBit, rasrAttr) : ret; + } + else if (rasrAttr == HOLE_ATTR(rasrAttr)) { + /* Cannot attempt another cutout - we are already trying to make a hole */ + return -EPERM; } - return mpu_regionSet(idx, addr & alignMask, alignBits, attr, enable, srdMask); + /* Attempt to allocate larger region and mask start or end with another region */ + const u32 diff_mask = (1ul << (commonMsb - 3)) - 1; + u32 aligned_start, aligned_end, hole_start, hole_end; + if ((start & (~diff_mask)) == start) { + /* Start aligned - try cutting from the end */ + aligned_start = start; + aligned_end = (end & (~diff_mask)) + diff_mask + 1; + hole_start = end; + hole_end = aligned_end; + } + else if ((end & (~diff_mask)) == end) { + /* End aligned - try cutting from the start */ + aligned_start = start & (~diff_mask); + aligned_end = end; + hole_start = aligned_start; + hole_end = start; + } + else { + /* Would need cutting from both ends - not supported (we limit to 2 regions per map) */ + return -EPERM; + } + + /* First check if our "hole" overrides any existing mappings. This would lead to unintuitive behaviors. */ + if (mpu_checkOverlap(*idx, hole_start, hole_end) != 0) { + return -EPERM; + } + + int ret = mpu_regionCalculateAndSet(idx, aligned_start, aligned_end, commonMsb, rasrAttr); + return (ret == EOK) ? mpu_regionGenerate(idx, hole_start, hole_end, HOLE_ATTR(rasrAttr)) : ret; } @@ -168,28 +250,13 @@ void mpu_init(void) int mpu_regionAlloc(addr_t addr, addr_t end, u32 attr, u32 mapId, unsigned int enable) { int res = EOK; - size_t size, allocSize = 0; - u8 regCur = mpu_common.regCnt; - - if (addr > end) - return -ERANGE; - - size = end - addr; - - while (size > 0 && (regCur - mpu_common.regCnt) < 2) { - if ((res = mpu_regionBestFit(regCur++, addr, size, attr, enable, &allocSize)) < 0) - break; - - if (allocSize > size) - break; - - addr += allocSize; - size -= allocSize; - } + unsigned int regCur = mpu_common.regCnt; - if (res != EOK || size != 0) { + u32 rasrAttr = mpu_regionAttrs(attr, enable); + res = mpu_regionGenerate(®Cur, addr, end, rasrAttr); + if (res != EOK) { mpu_regionInvalidate(mpu_common.regCnt, regCur); - return res == EOK ? -EPERM : res; + return res; } mpu_regionAssignMap(mpu_common.regCnt, regCur, mapId);