Skip to content

Commit

Permalink
Automatic merge of 'next' into merge (2024-06-17 15:38)
Browse files Browse the repository at this point in the history
  • Loading branch information
mpe committed Jun 17, 2024
2 parents 708e383 + 11e6e6d commit e2b06d7
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 50 deletions.
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
#define DMA64_PROPNAME "linux,dma64-ddr-window-info"

#define MIN_DDW_VPMEM_DMA_WINDOW SZ_2G

/* Boot time flags */
extern int iommu_is_off;
extern int iommu_force_on;
Expand Down
28 changes: 0 additions & 28 deletions arch/powerpc/include/asm/plpar_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,6 @@ static inline long poll_pending(void)
return plpar_hcall_norets(H_POLL_PENDING);
}

static inline u8 get_cede_latency_hint(void)
{
return get_lppaca()->cede_latency_hint;
}

static inline void set_cede_latency_hint(u8 latency_hint)
{
get_lppaca()->cede_latency_hint = latency_hint;
}

static inline long cede_processor(void)
{
/*
Expand All @@ -37,24 +27,6 @@ static inline long cede_processor(void)
return plpar_hcall_norets_notrace(H_CEDE);
}

static inline long extended_cede_processor(unsigned long latency_hint)
{
long rc;
u8 old_latency_hint = get_cede_latency_hint();

set_cede_latency_hint(latency_hint);

rc = cede_processor();

/* Ensure that H_CEDE returns with IRQs on */
if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & MSR_EE)))
__hard_irq_enable();

set_cede_latency_hint(old_latency_hint);

return rc;
}

static inline long vpa_call(unsigned long flags, unsigned long cpu,
unsigned long vpa)
{
Expand Down
4 changes: 1 addition & 3 deletions arch/powerpc/mm/drmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,8 @@ static int __init drmem_init(void)
const __be32 *prop;

dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (!dn) {
pr_info("No dynamic reconfiguration memory found\n");
if (!dn)
return 0;
}

if (init_drmem_lmb_size(dn)) {
of_node_put(dn);
Expand Down
14 changes: 13 additions & 1 deletion arch/powerpc/mm/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,

static int __init parse_numa_properties(void)
{
struct device_node *memory;
struct device_node *memory, *pci;
int default_nid = 0;
unsigned long i;
const __be32 *associativity;
Expand Down Expand Up @@ -1010,6 +1010,18 @@ static int __init parse_numa_properties(void)
goto new_range;
}

for_each_node_by_name(pci, "pci") {
int nid = NUMA_NO_NODE;

associativity = of_get_associativity(pci);
if (associativity) {
nid = associativity_to_nid(associativity);
initialize_form1_numa_distance(associativity);
}
if (likely(nid >= 0) && !node_online(nid))
node_set_online(nid);
}

/*
* Now do the same thing for each MEMBLOCK listed in the
* ibm,dynamic-memory property in the
Expand Down
69 changes: 52 additions & 17 deletions arch/powerpc/platforms/pseries/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1304,14 +1304,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
struct ddw_query_response query;
struct ddw_create_response create;
int page_shift;
u64 win_addr;
u64 win_addr, dynamic_offset = 0;
const char *win_name;
struct device_node *dn;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct dma_win *window;
struct property *win64;
struct failed_ddw_pdn *fpdn;
bool default_win_removed = false, direct_mapping = false;
bool dynamic_mapping = false;
bool pmem_present;
struct pci_dn *pci = PCI_DN(pdn);
struct property *default_win = NULL;
Expand Down Expand Up @@ -1407,7 +1408,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_failed;
}


/*
* The "ibm,pmemory" can appear anywhere in the address space.
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
Expand All @@ -1432,14 +1432,42 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
1ULL << page_shift);

len = order_base_2(query.largest_available_block << page_shift);
win_name = DMA64_PROPNAME;

dynamic_mapping = true;
} else {
direct_mapping = !default_win_removed ||
(len == MAX_PHYSMEM_BITS) ||
(!pmem_present && (len == max_ram_len));
win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;

/* DDW is big enough to direct map RAM. If there is vPMEM, check
* if enough space is left in DDW where we can dynamically
* allocate TCEs for vPMEM. For now, this Hybrid sharing of DDW
* is only for SR-IOV devices.
*/
if (default_win_removed && pmem_present && !direct_mapping) {
/* DDW is big enough to be split */
if ((query.largest_available_block << page_shift) >=
MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) {
direct_mapping = true;

/* offset of the Dynamic part of DDW */
dynamic_offset = 1ULL << max_ram_len;
}

/* DDW will at least have dynamic allocation */
dynamic_mapping = true;

/* create max size DDW possible */
len = order_base_2(query.largest_available_block
<< page_shift);
}
}

/* Even if the DDW is split into both direct mapped RAM and dynamically
* mapped vPMEM, the DDW property in OF will be marked as Direct.
*/
win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;

ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
goto out_failed;
Expand Down Expand Up @@ -1467,9 +1495,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
if (!window)
goto out_del_prop;

if (direct_mapping) {
window->direct = true;
window->direct = direct_mapping;

if (direct_mapping) {
/* DDW maps the whole partition, so enable direct DMA mapping */
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
win64->value, tce_setrange_multi_pSeriesLP_walk);
Expand All @@ -1481,12 +1509,13 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
clean_dma_window(pdn, win64->value);
goto out_del_list;
}
} else {
}

if (dynamic_mapping) {
struct iommu_table *newtbl;
int i;
unsigned long start = 0, end = 0;

window->direct = false;
u64 dynamic_addr, dynamic_len;

for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
Expand All @@ -1506,8 +1535,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_del_list;
}

iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
/* If the DDW is split between directly mapped RAM and Dynamic
* mapped for TCES, offset into the DDW where the dynamic part
* begins.
*/
dynamic_addr = win_addr + dynamic_offset;
dynamic_len = (1UL << len) - dynamic_offset;
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn,
dynamic_addr, dynamic_len, page_shift, NULL,
&iommu_table_lpar_multi_ops);
iommu_init_table(newtbl, pci->phb->node, start, end);

pci->table_group->tables[1] = newtbl;
Expand Down Expand Up @@ -1559,13 +1595,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
out_unlock:
mutex_unlock(&dma_win_init_mutex);

/*
* If we have persistent memory and the window size is only as big
* as RAM, then we failed to create a window to cover persistent
* memory and need to set the DMA limit.
/* If we have persistent memory and the window size is not big enough
* to directly map both RAM and vPMEM, then we need to set DMA limit.
*/
if (pmem_present && direct_mapping && len == max_ram_len)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset +
(1ULL << max_ram_len);

return direct_mapping;
}
Expand Down
14 changes: 14 additions & 0 deletions arch/powerpc/platforms/pseries/pci_dlpar.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <linux/pci.h>
#include <linux/export.h>
#include <linux/node.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
Expand All @@ -21,9 +22,22 @@
struct pci_controller *init_phb_dynamic(struct device_node *dn)
{
struct pci_controller *phb;
int nid;

pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);

nid = of_node_to_nid(dn);
if (likely((nid) >= 0)) {
if (!node_online(nid)) {
if (__register_one_node(nid)) {
pr_err("PCI: Failed to register node %d\n", nid);
} else {
update_numa_distance(dn);
node_set_online(nid);
}
}
}

phb = pcibios_alloc_controller(dn);
if (!phb)
return NULL;
Expand Down
22 changes: 21 additions & 1 deletion arch/powerpc/platforms/pseries/vas.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,27 @@ static long hcall_return_busy_check(long rc)
{
/* Check if we are stalled for some time */
if (H_IS_LONG_BUSY(rc)) {
msleep(get_longbusy_msecs(rc));
unsigned int ms;
/*
* Allocate, Modify and Deallocate HCALLs returns
* H_LONG_BUSY_ORDER_1_MSEC or H_LONG_BUSY_ORDER_10_MSEC
* for the long delay. So the sleep time should always
* be either 1 or 10msecs, but in case if the HCALL
* returns the long delay > 10 msecs, clamp the sleep
* time to 10msecs.
*/
ms = clamp(get_longbusy_msecs(rc), 1, 10);

/*
* msleep() will often sleep at least 20 msecs even
* though the hypervisor suggests that the OS reissue
* HCALLs after 1 or 10msecs. Also the delay hint from
* the HCALL is just a suggestion. So OK to pause for
* less time than the hinted delay. Use usleep_range()
* to ensure we don't sleep much longer than actually
* needed.
*/
usleep_range(ms * (USEC_PER_MSEC / 10), ms * USEC_PER_MSEC);
rc = H_BUSY;
} else if (rc == H_BUSY) {
cond_resched();
Expand Down

0 comments on commit e2b06d7

Please sign in to comment.