]> Git Repo - J-linux.git/commitdiff
cxl/region: Fix region creation for greater than x2 switches
authorHuaisheng Ye <[email protected]>
Mon, 9 Dec 2024 23:33:02 +0000 (15:33 -0800)
committerDave Jiang <[email protected]>
Tue, 10 Dec 2024 21:50:34 +0000 (14:50 -0700)
The cxl_port_setup_targets() algorithm fails to identify valid target list
ordering in the presence of 4-way and above switches resulting in
'cxl create-region' failures of the form:

  $ cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
  cxl region: create_region: region0: failed to set target7 to mem0
  cxl region: cmd_create_region: created 0 regions

  [kernel debug message]
  check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host mem6:decoder7.0 at 2
  bus_remove_device:574: bus: 'cxl': remove device region0

QEMU can create this failing topology:

                       ACPI0017:00 [root0]
                           |
                         HB_0 [port1]
                        /             \
                     RP_0             RP_1
                      |                 |
                USP [port2]           USP [port3]
            /    /    \    \        /   /    \    \
          DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
           |     |     |     |     |    |     |    |
          mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
 Pos:      0     2     4     6     1    3     5    7

 HB: Host Bridge
 RP: Root Port
 USP: Upstream Port
 DSP: Downstream Port

...with the following command steps:

$ qemu-system-x86_64 -machine q35,cxl=on,accel=tcg  \
        -smp cpus=8 \
        -m 8G \
        -hda /home/work/vm-images/centos-stream8-02.qcow2 \
        -object memory-backend-ram,size=4G,id=m0 \
        -object memory-backend-ram,size=4G,id=m1 \
        -object memory-backend-ram,size=2G,id=cxl-mem0 \
        -object memory-backend-ram,size=2G,id=cxl-mem1 \
        -object memory-backend-ram,size=2G,id=cxl-mem2 \
        -object memory-backend-ram,size=2G,id=cxl-mem3 \
        -object memory-backend-ram,size=2G,id=cxl-mem4 \
        -object memory-backend-ram,size=2G,id=cxl-mem5 \
        -object memory-backend-ram,size=2G,id=cxl-mem6 \
        -object memory-backend-ram,size=2G,id=cxl-mem7 \
        -numa node,memdev=m0,cpus=0-3,nodeid=0 \
        -numa node,memdev=m1,cpus=4-7,nodeid=1 \
        -netdev user,id=net0,hostfwd=tcp::2222-:22 \
        -device virtio-net-pci,netdev=net0 \
        -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
        -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
        -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
        -device cxl-upstream,bus=root_port0,id=us0 \
        -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
        -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-vmem0 \
        -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
        -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-vmem1 \
        -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
        -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-vmem2 \
        -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
        -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-vmem3 \
        -device cxl-upstream,bus=root_port1,id=us1 \
        -device cxl-downstream,port=4,bus=us1,id=swport4,chassis=0,slot=8 \
        -device cxl-type3,bus=swport4,volatile-memdev=cxl-mem4,id=cxl-vmem4 \
        -device cxl-downstream,port=5,bus=us1,id=swport5,chassis=0,slot=9 \
        -device cxl-type3,bus=swport5,volatile-memdev=cxl-mem5,id=cxl-vmem5 \
        -device cxl-downstream,port=6,bus=us1,id=swport6,chassis=0,slot=10 \
        -device cxl-type3,bus=swport6,volatile-memdev=cxl-mem6,id=cxl-vmem6 \
        -device cxl-downstream,port=7,bus=us1,id=swport7,chassis=0,slot=11 \
        -device cxl-type3,bus=swport7,volatile-memdev=cxl-mem7,id=cxl-vmem7 \
        -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=32G &

In Guest OS:
$ cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0

Fix the method to calculate @distance by iterativeley multiplying the
number of targets per switch port. This also follows the algorithm
recommended here [1].

Fixes: 27b3f8d13830 ("cxl/region: Program target lists")
Link: http://lore.kernel.org/[email protected]
Signed-off-by: Huaisheng Ye <[email protected]>
Tested-by: Li Zhijian <[email protected]>
[djbw: add a comment explaining 'distance']
Signed-off-by: Dan Williams <[email protected]>
Link: https://patch.msgid.link/173378716722.1270362.9546805175813426729.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dave Jiang <[email protected]>
drivers/cxl/core/region.c

index d778996507984a759bbe84e7acac3774e0c7af98..b98b1ccffd1ca1b3e205c44f47a00750b01cbb99 100644 (file)
@@ -1295,6 +1295,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
        struct cxl_region_params *p = &cxlr->params;
        struct cxl_decoder *cxld = cxl_rr->decoder;
        struct cxl_switch_decoder *cxlsd;
+       struct cxl_port *iter = port;
        u16 eig, peig;
        u8 eiw, peiw;
 
@@ -1311,16 +1312,26 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 
        cxlsd = to_cxl_switch_decoder(&cxld->dev);
        if (cxl_rr->nr_targets_set) {
-               int i, distance;
+               int i, distance = 1;
+               struct cxl_region_ref *cxl_rr_iter;
 
                /*
-                * Passthrough decoders impose no distance requirements between
-                * peers
+                * The "distance" between peer downstream ports represents which
+                * endpoint positions in the region interleave a given port can
+                * host.
+                *
+                * For example, at the root of a hierarchy the distance is
+                * always 1 as every index targets a different host-bridge. At
+                * each subsequent switch level those ports map every Nth region
+                * position where N is the width of the switch == distance.
                 */
-               if (cxl_rr->nr_targets == 1)
-                       distance = 0;
-               else
-                       distance = p->nr_targets / cxl_rr->nr_targets;
+               do {
+                       cxl_rr_iter = cxl_rr_load(iter, cxlr);
+                       distance *= cxl_rr_iter->nr_targets;
+                       iter = to_cxl_port(iter->dev.parent);
+               } while (!is_cxl_root(iter));
+               distance *= cxlrd->cxlsd.cxld.interleave_ways;
+
                for (i = 0; i < cxl_rr->nr_targets_set; i++)
                        if (ep->dport == cxlsd->target[i]) {
                                rc = check_last_peer(cxled, ep, cxl_rr,
This page took 0.056784 seconds and 4 git commands to generate.