From 0405e128110d47a40443936e68dc32d7bc4ccc0b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 26 Aug 2024 12:12:17 +0530 Subject: [PATCH 01/26] powerpc/xmon: Fix tmpstr length check in scanhex If a function name is greater than 63 characters long, xmon command may not find them. For example, here is a test that executed an illegal instruction in a kernel function and one of call stack function has a name greater than 63 characters long: cpu 0x0: Vector: 700 (Program Check) at [c00000000a6577e0] pc: c0000000001aacb8: check__allowed__function__name__for__symbol__r4+0x8/0x10 lr: c00000000019c1e0: check__allowed__function__name__for__symbol__r1+0x20/0x40 sp: c00000000a657a80 msr: 800000000288b033 current = 0xc00000000a439900 paca = 0xc000000003e90000 irqmask: 0x03 irq_happened: 0x01 ..... [link register ] c00000000019c1e0 check__allowed__function__name__for__symbol__r1+0x20/0x40 [c00000000a657a80] c00000000a439900 (unreliable) [c00000000a657aa0] c0000000001021d8 check__allowed__function__name__for__symbol__r2_resolution_symbol+0x38/0x4c [c00000000a657ac0] c00000000019b424 power_pmu_event_init+0xa4/0xa50 and when executing a dump instruction (di) command for long function name, xmon fails to find the function symbol: 0:mon> di $check__allowed__function__name__for__symbol__r2_resolution_symbol unknown symbol 'check__allowed__function__name__for__symbol__r2_resolution_symb' 0000000000000000 ******** This is because in scanhex(), tmpstr loop index is checked only for a upper bound of 63. Fix it by replacing the upper bound value with (KSYM_NAME_LEN-1). With fix: 0:mon> di $check__allowed__function__name__for__symbol__r2_resolution_symbol c0000000001021a0 3c4c0249 addis r2,r12,585 c0000000001021a4 3842ae60 addi r2,r2,-20896 c0000000001021a8 7c0802a6 mflr r0 c0000000001021ac 60000000 nop ..... Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/linuxppc-dev/CANiq72=QeTgtZL4k9=4CJP6C_Hv=rh3fsn3B9S3KFoPXkyWk3w@mail.gmail.com/ Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240826064217.46658-1-maddy@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bd4813bad317..e6cddbb2305f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3543,7 +3543,7 @@ scanhex(unsigned long *vp) } } else if (c == '$') { int i; - for (i=0; i<63; i++) { + for (i = 0; i < (KSYM_NAME_LEN - 1); i++) { c = inchar(); if (isspace(c) || c == '\0') { termch = c; From d6b34416b08895a7457c53630595ce84e4aa904c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 16:57:05 +1000 Subject: [PATCH 02/26] powerpc/configs/64s: Enable DEFERRED_STRUCT_PAGE_INIT It can speed up initialisation of page structs at boot on large machines. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240820065705.660812-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 544a65fda77b..6001d580c0dd 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -93,6 +93,7 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_ZONE_DEVICE=y CONFIG_NET=y CONFIG_PACKET=y From 8ae4f16f7d7b59cca55aeca6db7c9636ffe7fbaa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Aug 2024 18:07:29 +1000 Subject: [PATCH 03/26] powerpc/64s/mm: Move __real_pte stubs into hash-4k.h The stub versions of __real_pte() etc are only used with HPT & 4K pages, so move them into the hash-4k.h header. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240821080729.872034-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 20 +++++++++++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 26 -------------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index c654c376ef8b..c3efacab4b94 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -74,6 +74,26 @@ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) +/* + * With 4K page size the real_pte machinery is all nops. + */ +#define __real_pte(e, p, o) ((real_pte_t){(e)}) +#define __rpte_to_pte(r) ((r).pte) +#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K + /* * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just * a matter of returning the PTE bits that need to be modified. On 64K PTE, diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 519b1743a0f4..f8ba72573cae 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -330,32 +330,6 @@ static inline unsigned long pud_leaf_size(pud_t pud) #ifndef __ASSEMBLY__ -/* - * This is the default implementation of various PTE accessors, it's - * used in all cases except Book3S with 64K pages where we have a - * concept of sub-pages - */ -#ifndef __real_pte - -#define __real_pte(e, p, o) ((real_pte_t){(e)}) -#define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) - -#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ - do { \ - index = 0; \ - shift = mmu_psize_defs[psize].shift; \ - -#define pte_iterate_hashed_end() } while(0) - -/* - * We expect this to be called only for user addresses or kernel virtual - * addresses other than the linear mapping. - */ -#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K - -#endif /* __real_pte */ - static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long clr, unsigned long set, int huge) From 197116e2dec8d23888ce76044fe673480afceff0 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Thu, 22 Aug 2024 16:54:29 +0800 Subject: [PATCH 04/26] powerpc/powermac/pfunc_base: Use helper function for_each_child_of_node() for_each_child_of_node() can help to iterate through the device_node, and we don't need to do it manually. No functional change with this conversion. Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822085430.25753-2-zhangzekun11@huawei.com --- arch/powerpc/platforms/powermac/pfunc_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 085e0ad20eba..8253de737373 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -313,7 +313,7 @@ static void __init uninorth_install_pfunc(void) /* * Install handlers for the hwclock child if any */ - for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;) + for_each_child_of_node(uninorth_node, np) if (of_node_name_eq(np, "hw-clock")) { unin_hwclock = np; break; From 46f4bbb8aac2b876355cdefdacd1971b65f8b631 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Thu, 22 Aug 2024 16:54:30 +0800 Subject: [PATCH 05/26] powerpc/pseries/dlpar: Use helper function for_each_child_of_node() for_each_child_of_node can help to iterate through the device_node, and we don't need to use while loop. No functional change with this conversion. Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822085430.25753-3-zhangzekun11@huawei.com --- arch/powerpc/platforms/pseries/dlpar.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47f8eabd1bee..ee47ed21b99d 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -250,11 +250,8 @@ int dlpar_detach_node(struct device_node *dn) struct device_node *child; int rc; - child = of_get_next_child(dn, NULL); - while (child) { + for_each_child_of_node(dn, child) dlpar_detach_node(child); - child = of_get_next_child(dn, child); - } rc = of_detach_node(dn); if (rc) From dace02a9ee1921adee05bf1807a78f92ee2dea2b Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:06 +0800 Subject: [PATCH 06/26] powerpc: Remove obsoleted declaration for _get_SP The implementation of _get_SP() was removed in commit f4db196717c6 ("[POWERPC] Remove _get_SP"), remove the now obsolete declaration. Signed-off-by: Gaosheng Cui Reviewed-by: Christophe Leroy [mpe: Update change log to refer to correct commit per Christophe] Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-2-cuigaosheng1@huawei.com --- arch/powerpc/kernel/process.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3b506d4c55f3..e7b70c2cc001 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -72,8 +72,6 @@ #define TM_DEBUG(x...) do { } while(0) #endif -extern unsigned long _get_SP(void); - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Are we running in "Suspend disabled" mode? If so we have to block any From 6745c5bb2e0fe513918ce2136108a2efb92bdea1 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:07 +0800 Subject: [PATCH 07/26] powerpc/maple: Remove obsoleted declaration for maple_calibrate_decr() The maple_calibrate_decr() have been removed since commit 10f7e7c15e6c ("[PATCH] ppc64: consolidate calibrate_decr implementations"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-3-cuigaosheng1@huawei.com --- arch/powerpc/platforms/maple/maple.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h index 4f358b55c341..8ddbaa4ebd0b 100644 --- a/arch/powerpc/platforms/maple/maple.h +++ b/arch/powerpc/platforms/maple/maple.h @@ -7,7 +7,6 @@ extern int maple_set_rtc_time(struct rtc_time *tm); extern void maple_get_rtc_time(struct rtc_time *tm); extern time64_t maple_get_boot_time(void); -extern void maple_calibrate_decr(void); extern void maple_pci_init(void); extern void maple_pci_irq_fixup(struct pci_dev *dev); extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); From fe16a749731e86d580acf8d43b0298dfe6d1503d Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:08 +0800 Subject: [PATCH 08/26] powerpc/pasemi: Remove obsoleted declaration for pas_pci_irq_fixup() The pas_pci_irq_fixup() have been removed since commit 771f7404a9de ("pasemi_mac: Move the IRQ mapping from the PCI layer to the driver"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-4-cuigaosheng1@huawei.com --- arch/powerpc/platforms/pasemi/pasemi.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 018c30665e1b..6f6743b8e48d 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -5,7 +5,6 @@ extern time64_t pas_get_boot_time(void); extern void pas_pci_init(void); struct pci_dev; -extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); From 600d6a7e630e970624911624eb15986245b18668 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:06:09 +0800 Subject: [PATCH 09/26] powerpc: Remove obsoleted declarations for use_cop and drop_cop The use_cop() and drop_cop() have been removed since commit 6ff4d3e96652 ("powerpc: Remove old unused icswx based coprocessor support"), now they are useless, so remove them. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130609.786431-5-cuigaosheng1@huawei.com --- arch/powerpc/include/asm/mmu_context.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 37bffa0f7918..99707456c2cd 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -116,9 +116,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) } #endif -extern int use_cop(unsigned long acop, struct mm_struct *mm); -extern void drop_cop(unsigned long acop, struct mm_struct *mm); - #ifdef CONFIG_PPC_BOOK3S_64 static inline void inc_mm_active_cpus(struct mm_struct *mm) { From 10c8ac13395a087c90ba6acd11f793588ba5609e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 22 Aug 2024 21:00:43 +0800 Subject: [PATCH 10/26] powerpc/powernv/pci: Remove obsoleted declaration for pnv_pci_init_ioda_hub The pnv_pci_init_ioda_hub() have been removed since commit 5ac129cdb50b ("powerpc/powernv/pci: Remove ioda1 support"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Signed-off-by: Michael Ellerman Link: https://msgid.link/20240822130043.783756-1-cuigaosheng1@huawei.com --- arch/powerpc/platforms/powernv/pci.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 957f2b47a3c0..93fba1f8661f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -274,7 +274,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); extern struct iommu_table *pnv_pci_table_alloc(int nid); -extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); From f9f2bff64c2f0dbee57be3d8c2741357ad3d05e6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:45 +0200 Subject: [PATCH 11/26] powerpc/8xx: Fix initial memory mapping Commit cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") introduced an initial mapping of kernel TEXT using PAGE_KERNEL_TEXT, but the pages that contain kernel TEXT may also contain kernel RODATA, and depending on selected debug options PAGE_KERNEL_TEXT may be either RWX or ROX. RODATA must be writable during init because it also contains ro_after_init data. So use PAGE_KERNEL_X instead to be sure it is RWX. Fixes: cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/dac7a828d8497c4548c91840575a706657baa4f1.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 388bba0ab3e7..15d918dce27d 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -150,11 +150,11 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_X, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_X, true); mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } From 65a82e117ffeeab0baf6f871a1cab11a28ace183 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:46 +0200 Subject: [PATCH 12/26] powerpc/8xx: Fix kernel vs user address comparison Since commit 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text"), module exec space is below PAGE_OFFSET so not only space above PAGE_OFFSET, but space above TASK_SIZE need to be seen as kernel space. Until now the problem went undetected because by default TASK_SIZE is 0x8000000 which means address space is determined by just checking upper address bit. But when TASK_SIZE is over 0x80000000, PAGE_OFFSET is used for comparison, leading to thinking module addresses are part of user space. Fix it by using TASK_SIZE instead of PAGE_OFFSET for address comparison. Fixes: 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/3f574c9845ff0a023b46cb4f38d2c45aecd769bd.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ac74321b1192..c955a8196d55 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -41,12 +41,12 @@ #include "head_32.h" .macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 +#if CONFIG_TASK_SIZE <= 0x80000000 && MODULES_VADDR >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ not. \scratch, \addr #else rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h + cmpli cr0, \scratch, TASK_SIZE@h #endif .endm @@ -404,7 +404,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f From 985db026c34dfc45213649023d5505822a5dcd78 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:47 +0200 Subject: [PATCH 13/26] powerpc/8xx: Copy kernel PGD entries into all PGDIRs In order to avoid having to select PGDIR at each TLB miss based on fault address, copy kernel PGD entries into all PGDIRs in pgd_alloc(). At first it will be used for ITLB misses for kernel TEXT, then for execmem then for kernel DATA. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c6d2bf5af2ea909071a85bdca8b1f5dc2df134a8.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgalloc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index d06efac6d7aa..4ef780b291bc 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -19,8 +19,14 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + +#ifdef CONFIG_PPC_8xx + memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, + (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +#endif + return pgd; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) From 1a736d98c84acd38e40fff69528ce7aaa55dd22d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:48 +0200 Subject: [PATCH 14/26] Revert "powerpc/8xx: Always pin kernel text TLB" This reverts commit bccc58986a2f98e3af349c85c5f49aac7fb19ef2. When STRICT_KERNEL_RWX is selected, EXEC memory must stop where RW memory start. When pinning iTLBs it means an 8M alignment for RW data start. That may be acceptable on boards with a lot of memory but one of my supported boards only has 32 Mbytes and this forced alignment leads to a waste of almost 4 Mbytes with is more than 10% of the total memory. So revert commit bccc58986a2f ("powerpc/8xx: Always pin kernel text TLB") but don't restore previous behaviour in ITLB miss handler as now kernel PGD entries are copied into each process PGDIR. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/01b6780b860c8043b51a1ba9d83acfc6f2dde910.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 8 ++++++++ arch/powerpc/mm/nohash/8xx.c | 3 ++- arch/powerpc/platforms/8xx/Kconfig | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c955a8196d55..66ee0a31d99d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -587,6 +587,10 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -683,6 +687,7 @@ SYM_FUNC_START_LOCAL(initial_mmu) blr SYM_FUNC_END(initial_mmu) +#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -704,6 +709,7 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia +#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -724,6 +730,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 +#endif LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA @@ -783,3 +790,4 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi +#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 15d918dce27d..4c2f9d716993 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -177,7 +177,8 @@ int mmu_mark_initmem_nx(void) if (!debug_pagealloc_enabled_or_kfence()) err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - mmu_pin_tlb(block_mapped_ram, false); + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); return err; } diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index a14d9d8997a4..8623aebfac48 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -195,6 +195,13 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + endmenu endmenu From bcf77a70c4ffc9b01044229de87f5b6f9c1f7913 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:49 +0200 Subject: [PATCH 15/26] powerpc/8xx: Allow setting DATA alignment even with STRICT_KERNEL_RWX It is now possible to not pin kernel text with a 8Mbytes TLB, so the alignment for STRICT_KERNEL_RWX can be relaxed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/d0d8b05012b392dd166cfd911f14ba2741ce7e1e.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d7b09b064a8a..3c202785a146 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -853,8 +853,8 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \ - PPC_85xx + depends on (PPC_8xx && !PIN_TLB_DATA && (!STRICT_KERNEL_RWX || !PIN_TLB_TEXT)) || \ + PPC_BOOK3S_32 || PPC_85xx help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -870,9 +870,9 @@ config DATA_SHIFT range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 - default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA - default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + default 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && \ + (PIN_TLB_DATA || PIN_TLB_TEXT) + default 19 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 24 if STRICT_KERNEL_RWX && PPC_85xx default PAGE_SHIFT help From c5eec4df25c34f4bee8c757ed157f5d96eaba554 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:50 +0200 Subject: [PATCH 16/26] powerpc/8xx: Reduce default size of module/execmem area 8xx boards don't have much memory, the two I know have respectively 32Mbytes and 128Mbytes, so there is no point in having 256 Mbytes of memory for module text. Reduce it to 32Mbytes for 8xx, that's more than enough. Nevertheless, make it a configurable value so that it can be customised if needed. Also add a build verification for overlap of module execmem space with user PMD. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8db23b61e33a0d1913d814f94bfe71ba7ac78b0f.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 18 ++++++++++++++++++ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 3 ++- arch/powerpc/mm/nohash/8xx.c | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3c202785a146..f050a37aa857 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1271,6 +1271,24 @@ config TASK_SIZE default "0x80000000" if PPC_8xx default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" + +config MODULES_SIZE_BOOL + bool "Set custom size for modules/execmem area" + depends on EXECMEM && ADVANCED_OPTIONS + depends on PPC_8xx + help + This option allows you to set the size of kernel virtual address + space dedicated for modules/execmem. + For the time being it is only for 8xx. + + Say N here unless you know what you are doing. + +config MODULES_SIZE + int "Size of modules/execmem area (In Mbytes)" if MODULES_SIZE_BOOL + range 1 256 if EXECMEM + default 32 if EXECMEM && PPC_8xx + default 0 + endmenu if PPC64 diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index a756a1e59c54..2986f9ba40b8 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -170,8 +170,9 @@ #define mmu_linear_psize MMU_PAGE_8M -#define MODULES_VADDR (PAGE_OFFSET - SZ_256M) #define MODULES_END PAGE_OFFSET +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 4c2f9d716993..8b54f12d1889 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -207,6 +207,8 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, /* 8xx can only access 32MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); + + BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE) < TASK_SIZE); } int pud_clear_huge(pud_t *pud) From 16a71c045186a11c1c743934e330de78162b86dd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:51 +0200 Subject: [PATCH 17/26] powerpc/8xx: Preallocate execmem page tables Preallocate execmem page tables before creating new PGDs so that all PGD entries related to execmem can be copied in pgd_alloc(). On 8xx there are 32 Mbytes for execmem by default so this will use 32 kbytes. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a7180cc1ba59dec4502af39b4e9f3ff91c57280d.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201..7a5af64f165d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -412,6 +412,18 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; +#ifdef CONFIG_PPC_8xx +static void prealloc_execmem_pgtable(void) +{ + unsigned long va; + + for (va = ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE); va < MODULES_END; va += PGDIR_SIZE) + pte_alloc_kernel(pmd_off_k(va), va); +} +#else +static void prealloc_execmem_pgtable(void) { } +#endif + struct execmem_info __init *execmem_arch_setup(void) { pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; @@ -443,6 +455,8 @@ struct execmem_info __init *execmem_arch_setup(void) end = VMALLOC_END; #endif + prealloc_execmem_pgtable(); + execmem_info = (struct execmem_info){ .ranges = { [EXECMEM_DEFAULT] = { From 33c527522f394f63cc589a6f7af990b2232444c8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:52 +0200 Subject: [PATCH 18/26] powerpc/8xx: Inconditionally use task PGDIR in ITLB misses Now that modules exec page tables are preallocated, the instruction TLBmiss handler can use task PGDIR inconditionally. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/774fd766a8b9bcb9173b5e677d5dad0df2d3970f.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 66ee0a31d99d..f9a05648a522 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -199,18 +199,7 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_EXECMEM - mfcr r11 - compare_to_kernel_boundary r10, r10 -#endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_EXECMEM - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 -#endif lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC From ac9f97ff8b324905d457f2694490c63b9deccbc6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:53 +0200 Subject: [PATCH 19/26] powerpc/8xx: Inconditionally use task PGDIR in DTLB misses At the time being, DATA TLB miss handlers use task PGDIR for user addresses and swapper_pg_dir for kernel addresses. Now that kernel part of swapper_pg_dir is copied into task PGDIR at PGD allocation, it is possible to avoid the above logic and always use task PGDIR. But new kernel PGD entries can still be created after init, in which case those PGD entries may miss in task PGDIR. This can be handled in DATA TLB error handler. However, it needs to be done in real mode because the missing entry might be related to the stack. So implement copy of missing PGD entry in the prolog of DATA TLB ERROR handler just after the fixup of DAR. Note that this is feasible because 8xx doesn't implement vmap or ioremap with 8Mbytes pages but only 512kbytes pages which are at PTE level. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/7a76a923d2a111f1d843d8b20b4df0c65d2f4a7b.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 57 ++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index f9a05648a522..811a7130505c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,16 +40,6 @@ #include "head_32.h" -.macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && MODULES_VADDR >= 0x80000000 -/* By simply checking Address >= 0x80000000, we know if its a kernel address */ - not. \scratch, \addr -#else - rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, TASK_SIZE@h -#endif -.endm - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -237,19 +227,12 @@ instruction_counter: START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - mfcr r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_MD_EPN - compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 @@ -321,15 +304,19 @@ instruction_counter: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ + mfspr r11, SPRN_DSISR + rlwinm r11, r11, 0, DSISR_NOHPTE + cmpwi cr1, r11, 0 + beq+ cr1, .Ldtlbie + mfspr r11, SPRN_DAR + tlbie r11 + rlwinm r11, r11, 16, 0xffff + cmplwi cr1, r11, TASK_SIZE@h + bge- cr1, FixupPGD +.Ldtlbie: EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - andis. r10,r5,DSISR_NOHPTE@h - beq+ .Ldtlbie - tlbie r4 -.Ldtlbie: prepare_transfer_to_handler bl do_page_fault b interrupt_return @@ -383,6 +370,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ __HEAD . = 0x2000 +FixupPGD: + mtspr SPRN_M_TW, r10 + mfspr r10, SPRN_DAR + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table */ + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + bne cr1, 1f + + rlwinm r10, r11, 0, 20, 31 + oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + beq cr1, 1f + stw r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Set the level 1 entry */ + mfspr r10, SPRN_M_TW + mtcr r10 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +1: + mfspr r10, SPRN_M_TW + b .Ldtlbie + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. From 2f2b9a3adc66e978a1248ffb38df8477e8e97c57 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:54 +0200 Subject: [PATCH 20/26] powerpc/32s: Reduce default size of module/execmem area book3s/32 platforms have usually more memory than 8xx, but it is still not worth reserving a full segment (256 Mbytes) for module text. 64Mbytes should be far enough. Also fix TASK_SIZE when EXECMEM is not selected, and add a build verification for overlap of module execmem space with user segments. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c1f6a4e47f177d919561c6e97d31af5564923cf6.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 7 ++++--- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 ++- arch/powerpc/mm/book3s32/mmu.c | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f050a37aa857..b9f11c262582 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1269,23 +1269,24 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 + default "0xb0000000" if PPC_BOOK3S_32 && EXECMEM default "0xc0000000" config MODULES_SIZE_BOOL bool "Set custom size for modules/execmem area" depends on EXECMEM && ADVANCED_OPTIONS - depends on PPC_8xx help This option allows you to set the size of kernel virtual address space dedicated for modules/execmem. - For the time being it is only for 8xx. + For the time being it is only for 8xx and book3s/32. Other + platform share it with vmalloc space. Say N here unless you know what you are doing. config MODULES_SIZE int "Size of modules/execmem area (In Mbytes)" if MODULES_SIZE_BOOL range 1 256 if EXECMEM + default 64 if EXECMEM && PPC_BOOK3S_32 default 32 if EXECMEM && PPC_8xx default 0 diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 52971ee30717..42c3af90d1f0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -196,7 +196,8 @@ void unmap_kernel_page(unsigned long va); #endif #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) -#define MODULES_VADDR (MODULES_END - SZ_256M) +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 625fe7d08e06..2db167f4233f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -223,6 +223,8 @@ int mmu_mark_initmem_nx(void) update_bats(); + BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, SZ_256M) < TASK_SIZE); + for (i = TASK_SIZE >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ if (is_module_segment(i << 28)) From 82ef440f9a38a1fd7f4854397633a35af33840a5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:55 +0200 Subject: [PATCH 21/26] powerpc/603: Copy kernel PGD entries into all PGDIRs and preallocate execmem page tables For the same reason as 8xx, copy kernel PGD entries into all PGDIRs in pgd_alloc() and preallocate execmem page tables before creating new PGDs so that all PGD entries related to execmem are copied by pgd_alloc(). This will help reduce the fast-path in TLBmiss handlers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/1a0d1feee07c4cf955f6a43a704c203e5c90fa53.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgalloc.h | 2 +- arch/powerpc/mm/mem.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 4ef780b291bc..bb5f3e8ea912 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -22,7 +22,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); -#ifdef CONFIG_PPC_8xx +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_BOOK3S_603) memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7a5af64f165d..a0c5a0d7b249 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -412,7 +412,7 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; -#ifdef CONFIG_PPC_8xx +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_BOOK3S_603) static void prealloc_execmem_pgtable(void) { unsigned long va; From 31c0e137ec609f36877ea39cd343ef2476d080aa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:56 +0200 Subject: [PATCH 22/26] powerpc/603: Switch r0 and r3 in TLB miss handlers In preparation of next patch that will perform some additional calculations to replace comparison, switch the use of r0 and r3 as r0 has some limitations in some instructions like 'addi/subi'. Also remove outdated comments about the meaning of each register. The registers are used for many things and it would be difficult to accurately describe all things done with a given register. The function is now small enough to get a global view without much description. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/566af5e87685b1a85d3182549c0d520ce2d8877a.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 94 +++++++++++----------------- 1 file changed, 38 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 57196883a00e..7995506e7fbd 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -411,39 +411,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) */ . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS + mfspr r0,SPRN_IMISS #ifdef CONFIG_EXECMEM lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_EXECMEM - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ #ifdef CONFIG_EXECMEM - rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ + rlwimi r2, r3, 0, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r2, r1 /* PP = user? 1 : 0 */ @@ -451,7 +445,7 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - tlbli r3 + tlbli r0 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 rfi @@ -480,35 +474,29 @@ InstructionAddressInvalid: */ . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS + mfspr r0,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r2,r0,0,30,31 /* userspace ? -> PP */ + rlwimi r2,r3,0,30,31 /* userspace ? -> PP */ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -518,23 +506,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: @@ -560,34 +548,28 @@ DataAddressInvalid: */ . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS + mfspr r0,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 + li r3, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 + li r3, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ +112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */ + rlwimi r2,r3,0,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION @@ -597,23 +579,23 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) From 3f57d90c231d3329aaed7079dd05b5a2f7692a58 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:57 +0200 Subject: [PATCH 23/26] powerpc/603: Inconditionally use task PGDIR in ITLB misses Now that modules exec page tables are preallocated, the instruction TLBmiss handler can use task PGDIR inconditionally. Also revise the identification of user vs kernel user space by doing a calculation instead of a comparison: Get the segment number and subtract the number of the first kernel segment. The result is positive for kernel addresses and negative for user addresses, which means that upper 2 bits are 0 for kernel and 3 for user. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/9a3242162ad2faab8019c698e501b326a126ee9e.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 7995506e7fbd..156304c00ece 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -413,22 +413,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_IMISS -#ifdef CONFIG_EXECMEM - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 -#endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ #ifdef CONFIG_EXECMEM - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwinm r3, r0, 4, 0xf + subi r3, r3, (TASK_SIZE >> 28) & 0xf #endif -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ @@ -437,7 +430,7 @@ InstructionTLBMiss: bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ #ifdef CONFIG_EXECMEM - rlwimi r2, r3, 0, 31, 31 /* userspace ? -> PP lsb */ + rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r2, r1 /* PP = user? 1 : 0 */ From 062e825a336017c0334c7497690826c95aa1a84f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:58 +0200 Subject: [PATCH 24/26] powerpc/603: Inconditionally use task PGDIR in DTLB misses At the time being, DATA TLB miss handlers use task PGDIR for user addresses and swapper_pg_dir for kernel addresses. Now that kernel part of swapper_pg_dir is copied into task PGDIR at PGD allocation, it is possible to avoid the above logic and always use task PGDIR. But new kernel PGD entries can still be created after init, in which case those PGD entries may miss in task PGDIR. This can be handled in DATA TLB error handler. However, it needs to be done in real mode because the missing entry might be related to the stack. So implement copy of missing PGD entry in DATA TLB miss handler just after detection of invalid PGD entry. Also replace comparison by same calculation as in previous patch to know if an address belongs to a kernel or user segment. Note that as mentioned in platforms/Kconfig.cputype, SMP is not supported on 603 processors so there is no risk of the PGD entry be populated during the fault. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a2ba8eeb1c845eeb9e46b6fe3a5e9f841df9a033.1724173828.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 65 ++++++++++++++++------------ 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 156304c00ece..cb2bca76be53 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -469,27 +469,22 @@ InstructionAddressInvalid: DataLoadTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ - rlwinm r2, r2, 28, 0xfffff000 - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r2,r3,0,30,31 /* userspace ? -> PP */ + rlwimi r2,r3,2,30,31 /* userspace ? -> PP */ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -518,6 +513,16 @@ MMU_FTR_SECTION_ELSE tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + b 1b DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -543,26 +548,22 @@ DataAddressInvalid: DataStoreTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r0,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r0 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED - rlwinm r2, r2, 28, 0xfffff000 - li r3, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r3, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r2,r3,0,31,31 /* userspace ? -> PP lsb */ + rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION @@ -592,6 +593,16 @@ MMU_FTR_SECTION_ELSE rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm r2,r2,0,0,19 /* extract address of pte page */ + b 1b + #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception #endif From dca5b1d69aea36ab559d9ca13729370007c60df1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 14:26:54 +0200 Subject: [PATCH 25/26] powerpc/32: Implement validation of emergency stack VMAP stack added an emergency stack on powerpc/32 for when there is a stack overflow, but failed to add stack validation for that emergency stack. That validation is required for show stack. Implement it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2439d50b019f758db4a6d7b238b06441ab109799.1724156805.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/thread_info.h | 4 ++++ arch/powerpc/kernel/process.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 15c5691dd218..6ebca2996f18 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -226,6 +226,10 @@ static inline int arch_within_stack_frames(const void * const stack, return BAD_STACK; } +#ifdef CONFIG_PPC32 +extern void *emergency_ctx[]; +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e7b70c2cc001..ff61a3e7984c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2175,10 +2175,10 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +#ifdef CONFIG_PPC64 static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { -#ifdef CONFIG_PPC64 unsigned long stack_page; unsigned long cpu = task_cpu(p); @@ -2206,10 +2206,26 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; # endif -#endif return 0; } +#else +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; + + stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return 0; +} +#endif /* * validate the stack frame of a particular minimum size, used for when we are From 65948b0e716a47382731889ee6bbb18642b8b003 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Aug 2024 10:00:29 +0200 Subject: [PATCH 26/26] powerpc/vdso: Inconditionally use CFUNC macro During merge of commit 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") a fallback version of CFUNC macro was added at the last minute, so it can be used inconditionally. Fixes: 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/0fa863f2f69b2ca4094ae066fcf1430fb31110c9.1724313540.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso/gettimeofday.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 48fc6658053a..894cb939cd2b 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -38,11 +38,7 @@ .else addi r4, r5, VDSO_DATA_OFFSET .endif -#ifdef __powerpc64__ bl CFUNC(DOTSYM(\funct)) -#else - bl \funct -#endif PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)