diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index b003bed339fddd7e44a5feadf2ad5baa12e076fe..26739383c015634723f5a4640bf945406885a192 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -236,18 +236,30 @@ with respect to allocation: user can request. "bandwidth_gran": - The granularity in which the memory bandwidth + The approximate granularity in which the memory bandwidth percentage is allocated. The allocated b/w percentage is rounded off to the next - control step available on the hardware. The - available bandwidth control steps are: - min_bandwidth + N * bandwidth_gran. + control step available on the hardware. The available + steps are at least as small as this value. "delay_linear": Indicates if the delay scale is linear or non-linear. This field is purely informational only. +"max_lim": + Read-only. On ARM MPAM systems where MBA exposes MBW_MAX, this + file contains a single decimal integer: the + ``MPAMF_MBW_IDR.MAX_LIM`` field [1:0] (values ``0``–``3``) as probed for the MBA resource. + The file appears only when the platform supports MBA MBW_MAX and + the MAX_LIM value is available; otherwise it is not listed. + + The Arm MPAM architecture defines the meaning of each MAX_LIM + encoding. In this kernel, when ``max_lim`` reads ``0``, the + driver treats the ``HARDLIM`` bit of ``MPAMCFG_MBW_MAX`` as + read/write and an optional ``MB_HLIM`` line may appear in + ``schemata``. When ``max_lim`` is nonzero, ``MB_HLIM`` is omitted. + "thread_throttle_mode": Indicator on Intel systems of how tasks running on threads of a physical core are throttled in cases where they @@ -964,6 +976,27 @@ Memory bandwidth domain is L3 cache. MB:=bw_MiBps0;=bw_MiBps1;... +MBW maximum hard limit (ARM MPAM) +--------------------------------- +On some ARM systems, resctrl memory bandwidth allocation uses MPAM +maximum bandwidth (MBW_MAX). When ``max_lim`` reads ``0`` (see ``max_lim`` +under the ``MB`` allocation ``info`` directory), an additional schemata +line selects the ``HARDLIM`` bit for ``MPAMCFG_MBW_MAX`` independently of +the numeric limit on the ``MB`` line. + +The line uses the same cache/domain indices as ``MB``. Each value must +be ``0`` or ``1``: ``0`` clears HARDLIM (soft-limit behaviour for the +max), ``1`` sets HARDLIM (hard limit). When ``max_lim`` is nonzero or +``MB_HLIM`` is not supported for the platform, the line is omitted from +``schemata``. + +Format: +:: + + MB_HLIM:=0|1;=0|1;... + +The corresponding ``schema_format`` entry under ``info`` is ``mb_hlim``. + Slow Memory Bandwidth Allocation (SMBA) --------------------------------------- AMD hardware supports Slow Memory Bandwidth Allocation (SMBA). diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM new file mode 100644 index 0000000000000000000000000000000000000000..45957b7b4ea21fbd49e5e57512cad210afbdd776 --- /dev/null +++ b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM @@ -0,0 +1 @@ +CONFIG_ARM64_MPAM=y diff --git a/anolis/configs/L0-MANDATORY/x86/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS similarity index 100% rename from anolis/configs/L0-MANDATORY/x86/CONFIG_RESCTRL_FS rename to anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS diff --git a/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS new file mode 100644 index 0000000000000000000000000000000000000000..2147c5f9ea80a3481004517c46838b71eb591a6f --- /dev/null +++ b/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS @@ -0,0 +1 @@ +# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS new file mode 100644 index 0000000000000000000000000000000000000000..2147c5f9ea80a3481004517c46838b71eb591a6f --- /dev/null +++ b/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS @@ -0,0 +1 @@ +# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM new file mode 100644 index 0000000000000000000000000000000000000000..e93cbd36cedc1b687c9c2d09124388fc11798580 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM @@ -0,0 +1 @@ +CONFIG_ACPI_MPAM=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM deleted file mode 100644 index c34b76e4f406cc005d35eff15e8f8e54419f45f5..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_ARM64_MPAM is not set diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER new file mode 100644 index 0000000000000000000000000000000000000000..9e4b322241381be98ac7d5dd00664de3608e794d --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER @@ -0,0 +1 @@ +CONFIG_ARM64_MPAM_DRIVER=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER_DEBUG b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER_DEBUG new file mode 100644 index 0000000000000000000000000000000000000000..76eca7c2ff09daf747a09fadbc9978f4801cf884 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_DRIVER_DEBUG @@ -0,0 +1 @@ +# CONFIG_ARM64_MPAM_DRIVER_DEBUG is not set diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_RESCTRL_FS b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_RESCTRL_FS new file mode 100644 index 0000000000000000000000000000000000000000..c91ce4ffbafa2132c6344290bf9796321a6930ab --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_MPAM_RESCTRL_FS @@ -0,0 +1 @@ +CONFIG_ARM64_MPAM_RESCTRL_FS=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_IOMMU b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_IOMMU new file mode 100644 index 0000000000000000000000000000000000000000..425eb50c311e1c38e85a3dfc43d03aa48eb559ce --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_IOMMU @@ -0,0 +1 @@ +CONFIG_RESCTRL_IOMMU=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID new file mode 100644 index 0000000000000000000000000000000000000000..8cddb03cb13512f3d86acc0333c8bd8e45932152 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID @@ -0,0 +1 @@ +CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID=y diff --git a/anolis/configs/L2-OPTIONAL/x86/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL similarity index 100% rename from anolis/configs/L2-OPTIONAL/x86/CONFIG_ARCH_HAS_CPU_RESCTRL rename to anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL diff --git a/anolis/configs/L2-OPTIONAL/x86/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL similarity index 100% rename from anolis/configs/L2-OPTIONAL/x86/CONFIG_PROC_CPU_RESCTRL rename to anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL new file mode 100644 index 0000000000000000000000000000000000000000..dd3c6353e127c4229b2241ccd4e97277eb3f5cf1 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL @@ -0,0 +1 @@ +# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL new file mode 100644 index 0000000000000000000000000000000000000000..b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL @@ -0,0 +1 @@ +# CONFIG_PROC_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL new file mode 100644 index 0000000000000000000000000000000000000000..dd3c6353e127c4229b2241ccd4e97277eb3f5cf1 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL @@ -0,0 +1 @@ +# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL new file mode 100644 index 0000000000000000000000000000000000000000..b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL @@ -0,0 +1 @@ +# CONFIG_PROC_CPU_RESCTRL is not set diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 575f8408a9e7c69fae090c1dfe9b5e753c7cb6ae..40a74a06173458b535e163d1c4b7850970bf829a 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -191,6 +191,11 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx) { } +static inline bool resctrl_arch_mon_can_overflow(void) +{ + return true; +} + void resctrl_cpu_detect(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7b7171a212c7ea766cba7ad0960cd211e60e2149..02cb44561ab6f214f6fa93b97d61c8e2b3f37ca8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -108,7 +108,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "MB", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), - .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, [RDT_RESOURCE_SMBA] = @@ -117,7 +116,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "SMBA", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), - .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, [RDT_RESOURCE_PERF_PKG] = @@ -392,22 +390,23 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; r->membw.max_bw = MAX_MBA_BW; - r->membw.arch_needs_linear = true; + r->mba.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { - r->membw.delay_linear = true; + r->mba.delay_linear = true; r->membw.min_bw = MAX_MBA_BW - max_delay; r->membw.bw_gran = MAX_MBA_BW - max_delay; } else { if (!rdt_get_mb_table(r)) return false; - r->membw.arch_needs_linear = false; + r->mba.arch_needs_linear = false; } if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) - r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; + r->mba.throttle_mode = THREAD_THROTTLE_PER_THREAD; else - r->membw.throttle_mode = THREAD_THROTTLE_MAX; + r->mba.throttle_mode = THREAD_THROTTLE_MAX; #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_HWDRC r->hwdrc_capable = hwdrc_detect_intel(); @@ -432,17 +431,18 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; + r->schema_fmt = RESCTRL_SCHEMA__AMD_MBA; r->membw.max_bw = 1 << eax; /* AMD does not use delay */ - r->membw.delay_linear = false; - r->membw.arch_needs_linear = false; + r->mba.delay_linear = false; + r->mba.arch_needs_linear = false; /* * AMD does not use memory delay throttle model to control * the allocation like Intel does. */ - r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; + r->mba.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; @@ -511,7 +511,7 @@ static void mba_wrmsr_amd(struct msr_param *m) */ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { - if (r->membw.delay_linear) + if (r->mba.delay_linear) return MAX_MBA_BW - bw; pr_warn_once("Non Linear delay-bw map not supported but queried\n"); @@ -564,7 +564,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) * For Memory Allocation: Set b/w requested to 100% */ for (i = 0; i < hw_res->num_closid; i++, dc++) - *dc = resctrl_get_default_ctrl(r); + *dc = resctrl_get_resource_default_ctrl(r); } static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b20e705606b8fbf10f99de11c1a993516442d07a..91ce05256a004cbe6c951ce8b37af1b9baac6a1b 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -16,9 +16,15 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "internal.h" +u32 resctrl_arch_round_bw(u32 val, const struct resctrl_schema *s) +{ + return roundup(val, (unsigned long)s->membw.bw_gran); +} + int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 88502646844052f6d2617c101e4959310225787e..8a017f11110285197ef9800001209ab0e88e9a12 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -253,7 +253,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r); + hw_dom->ctrl_val[i] = resctrl_get_resource_default_ctrl(r); msr_param.dom = d; smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index 84963a20c3e78854bbca4c5196d7395028443c9b..0bd9ff1fd978169a791511239cb2594af2f4576c 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -95,17 +95,51 @@ static void acpi_mpam_parse_irqs(struct platform_device *pdev, res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "error"); } -static int acpi_mpam_parse_resource(struct mpam_msc *msc, +#define UUID_MPAM_INTERCONNECT_TABLE "fe2bd645-033b-49e6-9479-2e0b8b21d1cd" + +struct acpi_mpam_interconnect_descriptor_table { + u8 type_uuid[16]; + u32 num_descriptors; +}; + +struct acpi_mpam_interconnect_descriptor { + u32 source_id; + u32 destination_id; + u8 link_type; + u8 reserved[3]; +}; + +static int acpi_mpam_parse_resource(struct acpi_mpam_msc_node *tbl_msc, + struct mpam_msc *msc, struct acpi_mpam_resource_node *res) { + struct acpi_mpam_interconnect_descriptor_table *tbl_int_tbl; + struct acpi_mpam_interconnect_descriptor *tbl_int; + guid_t int_tbl_uuid, spec_uuid; int level, nid; u32 cache_id; + off_t offset; + /* + * Class IDs are somewhat arbitrary, but need to be co-ordinated. + * 0-N are caches, + * 64, 65: Interconnect, but ideally these would appear between the + * classes the controls are adjacent to. + * 128: SMMU, + * 192-192+level: Memory Side Caches, nothing checks that N is a + * small number. + * 255: Memory Controllers + * + * ACPI devices would need a class id allocated based on the _HID. + * + * Classes that the mpam driver can't currently plumb into resctrl + * are registered as UNKNOWN. + */ switch (res->locator_type) { case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: cache_id = res->locator.cache_locator.cache_reference; level = find_acpi_cache_level_from_id(cache_id); - if (level <= 0) { + if (level <= 0 || level >= 64) { pr_err_once("Bad level (%d) for cache with id %u\n", level, cache_id); return -EINVAL; } @@ -120,6 +154,57 @@ static int acpi_mpam_parse_resource(struct mpam_msc *msc, } return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, MPAM_CLASS_ID_DEFAULT, nid); + case ACPI_MPAM_LOCATION_TYPE_SMMU: + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_UNKNOWN, + 128, res->locator.smmu_locator.smmu_interface); + case ACPI_MPAM_LOCATION_TYPE_MEMORY_CACHE: + cache_id = res->locator.mem_cache_locator.reference; + level = res->locator.mem_cache_locator.level; + if (192 + level >= 255) { + pr_err_once("Bad level (%u) for memory side cache with reference %u\n", + level, cache_id); + return -EINVAL; + } + + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, + 192 + level, cache_id); + + case ACPI_MPAM_LOCATION_TYPE_INTERCONNECT: + /* Find the descriptor table, and check it lands in the parent msc */ + offset = res->locator.interconnect_ifc_locator.inter_connect_desc_tbl_off; + if (offset >= tbl_msc->length) { + pr_err_once("Bad offset (%lu) for interconnect descriptor on msc %u\n", + offset, tbl_msc->identifier); + return -EINVAL; + } + tbl_int_tbl = ACPI_ADD_PTR(struct acpi_mpam_interconnect_descriptor_table, + tbl_msc, offset); + guid_parse(UUID_MPAM_INTERCONNECT_TABLE, &spec_uuid); + import_guid(&int_tbl_uuid, tbl_int_tbl->type_uuid); + if (!guid_equal(&spec_uuid, &int_tbl_uuid)) { + pr_err_once("Bad UUID for interconnect descriptor on msc %u\n", + tbl_msc->identifier); + return -EINVAL; + } + + offset += sizeof(*tbl_int_tbl); + offset += tbl_int_tbl->num_descriptors * sizeof(*tbl_int); + if (offset >= tbl_msc->length) { + pr_err_once("Bad num_descriptors (%u) for interconnect descriptor on msc %u\n", + tbl_int_tbl->num_descriptors, tbl_msc->identifier); + return -EINVAL; + } + + tbl_int = ACPI_ADD_PTR(struct acpi_mpam_interconnect_descriptor, + tbl_int_tbl, sizeof(*tbl_int_tbl)); + cache_id = tbl_int->source_id; + + /* Unknown link type? */ + if (tbl_int->link_type != 0 && tbl_int->link_type == 1) + return 0; + + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_UNKNOWN, + 64 + tbl_int->link_type, cache_id); default: /* These get discovered later and are treated as unknown */ return 0; @@ -150,7 +235,7 @@ int acpi_mpam_parse_resources(struct mpam_msc *msc, return -EINVAL; } - err = acpi_mpam_parse_resource(msc, resource); + err = acpi_mpam_parse_resource(tbl_msc, msc, resource); if (err) return err; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e8d7dbe495f0302191c3c7ec4c6d7564062f6236..f6c80ec5f2611bb79c513cf6b1616def3936292e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -4369,6 +4370,96 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } +static int arm_smmu_group_set_mpam(struct iommu_group *group, u16 partid, + u8 pmg) +{ + int i; + u32 sid; + unsigned long flags; + struct arm_smmu_ste *step; + struct iommu_domain *domain; + struct arm_smmu_device *smmu; + struct arm_smmu_master *master; + struct arm_smmu_cmdq_batch cmds; + struct arm_smmu_domain *smmu_domain; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_CFGI_STE, + .cfgi = { + .leaf = true, + }, + }; + struct arm_smmu_master_domain *master_domain; + + domain = iommu_get_domain_for_group(group); + smmu_domain = to_smmu_domain(domain); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM)) + return -EIO; + smmu = smmu_domain->smmu; + + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + master = master_domain->master; + + for (i = 0; i < master->num_streams; i++) { + sid = master->streams[i].id; + step = arm_smmu_get_step_for_sid(smmu, sid); + + /* These need locking if the VMSPtr is ever used */ + step->data[4] = FIELD_PREP(STRTAB_STE_4_PARTID, partid); + step->data[5] = FIELD_PREP(STRTAB_STE_5_PMG, pmg); + + cmd.cfgi.sid = sid; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + } + + master->partid = partid; + master->pmg = pmg; + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_cmdq_batch_submit(smmu, &cmds); + + return 0; +} + +static int arm_smmu_group_get_mpam(struct iommu_group *group, u16 *partid, + u8 *pmg) +{ + int err = -EINVAL; + unsigned long flags; + struct iommu_domain *domain; + struct arm_smmu_master *master; + struct arm_smmu_domain *smmu_domain; + struct arm_smmu_master_domain *master_domain; + + domain = iommu_get_domain_for_group(group); + smmu_domain = to_smmu_domain(domain); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM)) + return -EIO; + + if (!partid && !pmg) + return 0; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + master = master_domain->master; + if (master) { + if (partid) + *partid = master->partid; + if (pmg) + *pmg = master->pmg; + err = 0; + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + return err; +} + static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, @@ -4382,6 +4473,8 @@ static const struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, + .get_group_qos_params = arm_smmu_group_get_mpam, + .set_group_qos_params = arm_smmu_group_set_mpam, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .get_viommu_size = arm_smmu_get_viommu_size, @@ -5009,6 +5102,29 @@ static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) hw_features, fw_features); } +static void arm_smmu_mpam_register_smmu(struct arm_smmu_device *smmu) +{ + u16 partid_max; + u8 pmg_max; + u32 reg; + + if (!IS_ENABLED(CONFIG_ARM64_MPAM)) + return; + + if (!(smmu->features & ARM_SMMU_FEAT_MPAM)) + return; + + reg = readl_relaxed(smmu->base + ARM_SMMU_MPAMIDR); + if (!reg) + return; + + partid_max = FIELD_GET(SMMU_MPAMIDR_PARTID_MAX, reg); + pmg_max = FIELD_GET(SMMU_MPAMIDR_PMG_MAX, reg); + + if (mpam_register_requestor(partid_max, pmg_max)) + smmu->features &= ~ARM_SMMU_FEAT_MPAM; +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -5156,6 +5272,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; if (FIELD_GET(IDR3_FWB, reg)) smmu->features |= ARM_SMMU_FEAT_S2FWB; + if (FIELD_GET(IDR3_MPAM, reg)) + smmu->features |= ARM_SMMU_FEAT_MPAM; if (FIELD_GET(IDR3_BBM, reg) == 2) smmu->features |= ARM_SMMU_FEAT_BBML2; @@ -5221,6 +5339,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (arm_smmu_sva_supported(smmu)) smmu->features |= ARM_SMMU_FEAT_SVA; + arm_smmu_mpam_register_smmu(smmu); + dev_info(smmu->dev, "oas %lu-bit (features 0x%08x)\n", smmu->oas, smmu->features); return 0; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ef42df4753ec4d1aef55c4909c3b67579ce9911c..85d47843943a271b9a720725bce1413954247334 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -58,6 +58,7 @@ struct arm_vsmmu; #define IDR1_SIDSIZE GENMASK(5, 0) #define ARM_SMMU_IDR3 0xc +#define IDR3_MPAM (1 << 7) #define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) #define IDR3_BBM GENMASK(12, 11) @@ -169,6 +170,10 @@ struct arm_vsmmu; #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc +#define ARM_SMMU_MPAMIDR 0x130 +#define SMMU_MPAMIDR_PARTID_MAX GENMASK(15, 0) +#define SMMU_MPAMIDR_PMG_MAX GENMASK(23, 16) + #define ARM_SMMU_REG_SZ 0xe00 /* Common MSI config fields */ @@ -270,6 +275,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_MEV (1UL << 19) #define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) +#define STRTAB_STE_1_S1MPAM (1UL << 26) #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) #define STRTAB_STE_1_EATS_ABT 0UL @@ -300,6 +306,10 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) +#define STRTAB_STE_4_PARTID GENMASK_ULL(31, 16) + +#define STRTAB_STE_5_PMG GENMASK_ULL(7, 0) + /* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */ #define STRTAB_STE_0_NESTING_ALLOWED \ cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \ @@ -854,6 +864,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HD (1 << 22) #define ARM_SMMU_FEAT_S2FWB (1 << 23) #define ARM_SMMU_FEAT_BBML2 (1 << 24) +#define ARM_SMMU_FEAT_MPAM (1 << 25) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -945,6 +956,8 @@ struct arm_smmu_master { bool stall_enabled; unsigned int ssid_bits; unsigned int iopf_refcount; + u16 partid; + u8 pmg; }; /* SMMU private data for an IOMMU domain */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 61c12ba782066adcb597f1188b848ff734c9fc48..95e354a9874948a25dea063566d5c7375d70cb62 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1110,6 +1110,45 @@ struct iommu_group *iommu_group_alloc(void) } EXPORT_SYMBOL_GPL(iommu_group_alloc); +struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj) +{ + struct iommu_group *group; + + if (!iommu_group_kset || !group_kobj) + return NULL; + + group = container_of(group_kobj, struct iommu_group, kobj); + + kobject_get(group->devices_kobj); + kobject_put(&group->kobj); + + return group; +} + +struct iommu_group *iommu_group_get_by_id(int id) +{ + struct kobject *group_kobj; + const char *name; + + if (!iommu_group_kset) + return NULL; + + name = kasprintf(GFP_KERNEL, "%d", id); + if (!name) + return NULL; + + group_kobj = kset_find_obj(iommu_group_kset, name); + kfree(name); + + return iommu_group_get_from_kobj(group_kobj); +} +EXPORT_SYMBOL_GPL(iommu_group_get_by_id); + +struct kset *iommu_get_group_kset(void) +{ + return kset_get(iommu_group_kset); +} + /** * iommu_group_get_iommudata - retrieve iommu_data registered for a group * @group: the group @@ -2124,6 +2163,12 @@ void iommu_domain_free(struct iommu_domain *domain) } EXPORT_SYMBOL_GPL(iommu_domain_free); +struct iommu_domain *iommu_get_domain_for_group(struct iommu_group *group) +{ + return group->domain; +} +EXPORT_SYMBOL_GPL(iommu_get_domain_for_group); + /* * Put the group's domain back to the appropriate core-owned domain - either the * standard kernel-mode DMA configuration or an all-DMA-blocked domain. @@ -4135,3 +4180,79 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return ret; } #endif /* CONFIG_IRQ_MSI_IOMMU */ + +/* + * iommu_group_set_qos_params() - Set the QoS parameters for a group + * @group: the iommu group. + * @partition: the partition label all traffic from the group should use. + * @perf_mon_grp: the performance label all traffic from the group should use. + * + * Return: 0 on success, or an error. + */ +int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp) +{ + const struct iommu_ops *ops; + struct group_device *device; + int ret; + + mutex_lock(&group->mutex); + device = list_first_entry_or_null(&group->devices, typeof(*device), + list); + if (!device) { + ret = -ENODEV; + goto out_unlock; + } + + ops = dev_iommu_ops(device->dev); + if (!ops->set_group_qos_params) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = ops->set_group_qos_params(group, partition, perf_mon_grp); + +out_unlock: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_group_set_qos_params, "IOMMUFD_INTERNAL"); + +/* + * iommu_group_get_qos_params() - Get the QoS parameters for a group + * @group: the iommu group. + * @partition: the partition label all traffic from the group uses. + * @perf_mon_grp: the performance label all traffic from the group uses. + * + * Return: 0 on success, or an error. + */ +int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp) +{ + const struct iommu_ops *ops; + struct group_device *device; + int ret; + + mutex_lock(&group->mutex); + device = list_first_entry_or_null(&group->devices, typeof(*device), + list); + if (!device) { + ret = -ENODEV; + goto out_unlock; + } + + ops = dev_iommu_ops(device->dev); + if (!ops->get_group_qos_params) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = ops->get_group_qos_params(group, partition, perf_mon_grp); + +out_unlock: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_group_get_qos_params, "IOMMUFD_INTERNAL"); diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig index 672abea3b03ccbeb4532832dd18e5cb80e90ff5b..30f455dba5aa02b1e4295f5af6568f60277ccc13 100644 --- a/drivers/resctrl/Kconfig +++ b/drivers/resctrl/Kconfig @@ -29,3 +29,4 @@ config ARM64_MPAM_RESCTRL_FS default y if ARM64_MPAM_DRIVER && RESCTRL_FS select RESCTRL_RMID_DEPENDS_ON_CLOSID select RESCTRL_ASSIGN_FIXED + select RESCTRL_IOMMU if ARM_SMMU_V3 diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 41b14344b16f2d361e67c261a7243c40b7773ed9..d2773ee5d44858916b07e82baabf8ce805b31f2f 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,12 +21,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include "mpam_internal.h" @@ -65,6 +68,8 @@ static DEFINE_MUTEX(mpam_cpuhp_state_lock); u16 mpam_partid_max; u8 mpam_pmg_max; static bool partid_max_init, partid_max_published; +static u16 mpam_cmdline_partid_max; +static bool mpam_cmdline_partid_max_overridden; static DEFINE_SPINLOCK(partid_max_lock); /* @@ -83,6 +88,8 @@ static DECLARE_WORK(mpam_broken_work, &mpam_disable); /* When mpam is disabled, the printed reason to aid debugging */ static char *mpam_disable_reason; +static struct dentry *mpam_debugfs; + /* * Whether resctrl has been setup. Used by cpuhp in preference to * mpam_is_enabled(). The disable call after an error interrupt makes @@ -164,11 +171,17 @@ static void mpam_free_garbage(void) /* * Once mpam is enabled, new requestors cannot further reduce the available * partid. Assert that the size is fixed, and new requestors will be turned - * away. + * away. This is needed when walking over structures sized by PARTID. + * + * During mpam_disable() these structures are not fixed, but the MSC state + * is still reset using whatever sizes have been discovered so far. As only + * PARTID 0 will be used after mpam_disable(), any race would be benign. + * Skip the check if a mpam_disable_reason has been set. */ static void mpam_assert_partid_sizes_fixed(void) { - WARN_ON_ONCE(!partid_max_published); + if (!mpam_disable_reason) + WARN_ON_ONCE(!partid_max_published); } static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) @@ -300,6 +313,9 @@ int mpam_register_requestor(u16 partid_max, u8 pmg_max) return -EBUSY; } + if (mpam_cmdline_partid_max_overridden) + mpam_partid_max = min(mpam_cmdline_partid_max, mpam_partid_max); + return 0; } EXPORT_SYMBOL(mpam_register_requestor); @@ -333,6 +349,8 @@ static void mpam_class_destroy(struct mpam_class *class) { lockdep_assert_held(&mpam_list_lock); + debugfs_remove_recursive(class->debugfs); + class->debugfs = NULL; list_del_rcu(&class->classes_list); add_to_garbage(class); } @@ -353,7 +371,7 @@ mpam_class_find(u8 level_idx, enum mpam_class_types type) } static struct mpam_component * -mpam_component_alloc(struct mpam_class *class, int id) +mpam_component_alloc(struct mpam_class *class, u32 id) { struct mpam_component *comp; @@ -385,6 +403,8 @@ static void mpam_component_destroy(struct mpam_component *comp) __destroy_component_cfg(comp); + debugfs_remove_recursive(comp->debugfs); + comp->debugfs = NULL; list_del_rcu(&comp->class_list); add_to_garbage(comp); @@ -393,7 +413,7 @@ static void mpam_component_destroy(struct mpam_component *comp) } static struct mpam_component * -mpam_component_find(struct mpam_class *class, int id) +mpam_component_find(struct mpam_class *class, u32 id) { struct mpam_component *comp; @@ -435,6 +455,8 @@ static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc) lockdep_assert_held(&mpam_list_lock); + debugfs_remove_recursive(vmsc->debugfs); + vmsc->debugfs = NULL; list_del_rcu(&vmsc->comp_list); add_to_garbage(vmsc); @@ -505,6 +527,11 @@ static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, case MPAM_CLASS_MEMORY: get_cpumask_from_node_id(comp->comp_id, affinity); /* affinity may be empty for CPU-less memory nodes */ + if (cpumask_empty(affinity)) { + dev_warn_once(&msc->pdev->dev, "CPU-less numa node"); + cpumask_copy(affinity, cpu_possible_mask); + } else if (class->level > 3) + cpumask_copy(affinity, cpu_possible_mask); break; case MPAM_CLASS_UNKNOWN: return 0; @@ -517,7 +544,7 @@ static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, - int component_id) + u32 component_id) { int err; struct mpam_vmsc *vmsc; @@ -594,6 +621,8 @@ static void mpam_ris_destroy(struct mpam_msc_ris *ris) cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity); cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity); clear_bit(ris->ris_idx, &msc->ris_idxs); + debugfs_remove_recursive(ris->debugfs); + ris->debugfs = NULL; list_del_rcu(&ris->msc_list); list_del_rcu(&ris->vmsc_list); add_to_garbage(ris); @@ -603,10 +632,13 @@ static void mpam_ris_destroy(struct mpam_msc_ris *ris) } int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, int component_id) + enum mpam_class_types type, u8 class_id, u32 component_id) { int err; + if (mpam_force_unknown_msc_test(msc)) + type = MPAM_CLASS_UNKNOWN; + mutex_lock(&mpam_list_lock); err = mpam_ris_create_locked(msc, ris_idx, type, class_id, component_id); @@ -731,16 +763,16 @@ static void mpam_enable_quirks(struct mpam_msc *msc) static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) { u32 now; - u64 mon_sel; + u32 mon_sel; bool can_set, can_clear; struct mpam_msc *msc = ris->vmsc->msc; - if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) return false; mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) | FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); - _mpam_write_monsel_reg(msc, mon_reg, mon_sel); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); now = _mpam_read_monsel_reg(msc, mon_reg); @@ -749,7 +781,7 @@ static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) _mpam_write_monsel_reg(msc, mon_reg, 0); now = _mpam_read_monsel_reg(msc, mon_reg); can_clear = !(now & MSMON___NRDY); - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); return (!can_set || !can_clear); } @@ -757,6 +789,19 @@ static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) #define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \ _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg) +/* Align mpam_feat_mbw_max_hardlim_rw with MPAMF_MBW_IDR.MAX_LIM and mbw_max. */ +static void mpam_props_sync_mbw_max_hardlim_rw(struct mpam_props *props) +{ + if (!mpam_has_feature(mpam_feat_mbw_max, props)) { + mpam_clear_feature(mpam_feat_mbw_max_hardlim_rw, props); + return; + } + if (props->mbw_max_lim == 0) + mpam_set_feature(mpam_feat_mbw_max_hardlim_rw, props); + else + mpam_clear_feature(mpam_feat_mbw_max_hardlim_rw, props); +} + static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) { int err; @@ -770,32 +815,32 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) /* Cache Capacity Partitioning */ if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) { - u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR); + ris->ccap_idr = mpam_read_partsel_reg(msc, CCAP_IDR); - props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features); + props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ris->ccap_idr); if (props->cmax_wd && - FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ccap_features)) + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ris->ccap_idr)) mpam_set_feature(mpam_feat_cmax_softlim, props); if (props->cmax_wd && - !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ccap_features)) + !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ris->ccap_idr)) mpam_set_feature(mpam_feat_cmax_cmax, props); if (props->cmax_wd && - FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ccap_features)) + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ris->ccap_idr)) mpam_set_feature(mpam_feat_cmax_cmin, props); - props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ccap_features); + props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ris->ccap_idr); if (props->cassoc_wd && - FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ccap_features)) + FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ris->ccap_idr)) mpam_set_feature(mpam_feat_cmax_cassoc, props); } /* Cache Portion partitioning */ if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { - u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); + ris->cpor_idr = mpam_read_partsel_reg(msc, CPOR_IDR); - props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features); + props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, ris->cpor_idr); if (props->cpbm_wd) mpam_set_feature(mpam_feat_cpor_part, props); } @@ -804,6 +849,8 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) { u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR); + props->mbw_max_lim = 0; + /* portion bitmap resolution */ props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features); if (props->mbw_pbm_bits && @@ -818,14 +865,18 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) */ props->bwa_wd = min(props->bwa_wd, 16); - if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) { mpam_set_feature(mpam_feat_mbw_max, props); + props->mbw_max_lim = FIELD_GET(MPAMF_MBW_IDR_MAX_LIM, mbw_features); + } if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features)) mpam_set_feature(mpam_feat_mbw_min, props); if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features)) mpam_set_feature(mpam_feat_mbw_prop, props); + + mpam_props_sync_mbw_max_hardlim_rw(props); } /* Priority partitioning */ @@ -873,7 +924,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_msmon_csu_xcl, props); /* Is NRDY hardware managed? */ + mpam_mon_sel_outer_lock(msc); hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); + mpam_mon_sel_outer_unlock(msc); if (hw_managed) mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props); } @@ -907,7 +960,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) } /* Is NRDY hardware managed? */ + mpam_mon_sel_outer_lock(msc); hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + mpam_mon_sel_outer_unlock(msc); if (hw_managed) mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); @@ -1201,7 +1256,7 @@ static void __ris_msmon_read(void *arg) struct mpam_msc *msc = m->ris->vmsc->msc; u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; - if (!mpam_mon_sel_lock(msc)) { + if (!mpam_mon_sel_inner_lock(msc)) { m->err = -EIO; return; } @@ -1301,10 +1356,12 @@ static void __ris_msmon_read(void *arg) default: m->err = -EINVAL; } - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); - if (nrdy) + if (nrdy) { + msc->nrdy_retry_count++; m->err = -EBUSY; + } if (m->err) return; @@ -1323,6 +1380,7 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) struct mpam_msc *msc = vmsc->msc; struct mpam_msc_ris *ris; + mpam_mon_sel_outer_lock(msc); list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, srcu_read_lock_held(&mpam_srcu)) { arg->ris = ris; @@ -1341,6 +1399,7 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) if (err) any_err = err; } + mpam_mon_sel_outer_unlock(msc); } return any_err; @@ -1423,18 +1482,20 @@ void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) continue; msc = vmsc->msc; + mpam_mon_sel_outer_lock(msc); list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, srcu_read_lock_held(&mpam_srcu)) { if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) continue; - if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) continue; ris->mbwu_state[ctx->mon].correction = 0; ris->mbwu_state[ctx->mon].reset_on_next_read = true; - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); } + mpam_mon_sel_outer_unlock(msc); } } @@ -1589,20 +1650,41 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, } if (mpam_has_feature(mpam_feat_mbw_max, rprops)) { - if (mpam_has_feature(mpam_feat_mbw_max, cfg)) - mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); - else + if (mpam_has_feature(mpam_feat_mbw_max, cfg) || + mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, cfg)) { + u32 mbw_val = cfg->mbw_max; + + if (mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, cfg) && + cfg->mbw_max_hardlim) + mbw_val |= MPAMCFG_MBW_MAX_HARDLIM; + mpam_write_partsel_reg(msc, MBW_MAX, mbw_val); + } else { mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + } } if (mpam_has_feature(mpam_feat_mbw_prop, rprops)) mpam_write_partsel_reg(msc, MBW_PROP, 0); - if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) - mpam_write_partsel_reg(msc, CMAX, cmax); + if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) { + if (mpam_has_feature(mpam_feat_cmax_cmax, cfg)) { + u32 cmax_val = cfg->cmax; - if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) - mpam_write_partsel_reg(msc, CMIN, 0); + if (cfg->cmax_softlim) + cmax_val |= MPAMCFG_CMAX_SOFTLIM; + mpam_write_partsel_reg(msc, CMAX, cmax_val); + } else { + mpam_write_partsel_reg(msc, CMAX, cmax); + } + } + + if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) { + if (mpam_has_feature(mpam_feat_cmax_cmin, cfg)) { + mpam_write_partsel_reg(msc, CMIN, cfg->cmin); + } else { + mpam_write_partsel_reg(msc, CMIN, 0); + } + } if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops)) mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC); @@ -1635,8 +1717,11 @@ static int mpam_restore_mbwu_state(void *_ris) u64 val; struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; + struct mpam_msc *msc = ris->vmsc->msc; struct mpam_class *class = ris->vmsc->comp->class; + mpam_mon_sel_outer_lock(msc); + for (i = 0; i < ris->props.num_mbwu_mon; i++) { if (ris->mbwu_state[i].enabled) { mwbu_arg.ris = ris; @@ -1648,10 +1733,12 @@ static int mpam_restore_mbwu_state(void *_ris) } } + mpam_mon_sel_outer_unlock(msc); + return 0; } -/* Call with MSC cfg_lock held */ +/* Call with MSC lock and outer mon_sel lock held */ static int mpam_save_mbwu_state(void *arg) { int i; @@ -1666,7 +1753,7 @@ static int mpam_save_mbwu_state(void *arg) mbwu_state = &ris->mbwu_state[i]; cfg = &mbwu_state->cfg; - if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) return -EIO; mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) | @@ -1691,7 +1778,7 @@ static int mpam_save_mbwu_state(void *arg) cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt); mbwu_state->correction += val; mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl); - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); } return 0; @@ -1886,6 +1973,7 @@ static int mpam_cpu_offline(unsigned int cpu) struct mpam_msc_ris *ris; mutex_lock(&msc->cfg_lock); + mpam_mon_sel_outer_lock(msc); list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { mpam_touch_msc(msc, &mpam_reset_ris, ris); @@ -1899,6 +1987,7 @@ static int mpam_cpu_offline(unsigned int cpu) if (mpam_is_enabled()) mpam_touch_msc(msc, &mpam_save_mbwu_state, ris); } + mpam_mon_sel_outer_unlock(msc); mutex_unlock(&msc->cfg_lock); } } @@ -1999,6 +2088,9 @@ static void mpam_msc_destroy(struct mpam_msc *msc) list_del_rcu(&msc->all_msc_list); platform_set_drvdata(pdev, NULL); + debugfs_remove_recursive(msc->debugfs); + msc->debugfs = NULL; + add_to_garbage(msc); } @@ -2017,6 +2109,7 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) { int err; u32 tmp; + char name[20]; struct mpam_msc *msc; struct resource *msc_res; struct device *dev = &pdev->dev; @@ -2084,6 +2177,10 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) list_add_rcu(&msc->all_msc_list, &mpam_all_msc); platform_set_drvdata(pdev, msc); + snprintf(name, sizeof(name), "msc.%u", msc->id); + msc->debugfs = debugfs_create_dir(name, mpam_debugfs); + debugfs_create_x32("max_nrdy_usec", 0400, msc->debugfs, &msc->nrdy_usec); + return msc; } @@ -2196,12 +2293,31 @@ static void __props_mismatch(struct mpam_props *parent, if (alias && !mpam_has_bwa_wd_feature(parent) && mpam_has_bwa_wd_feature(child)) { parent->bwa_wd = child->bwa_wd; + parent->mbw_max_lim = child->mbw_max_lim; + if (mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, child)) + mpam_set_feature(mpam_feat_mbw_max_hardlim_rw, parent); + else + mpam_clear_feature(mpam_feat_mbw_max_hardlim_rw, parent); } else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature, bwa_wd, alias)) { pr_debug("took the min bwa_wd\n"); parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); } + if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_max, alias)) { + parent->mbw_max_lim = child->mbw_max_lim; + if (mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, child)) + mpam_set_feature(mpam_feat_mbw_max_hardlim_rw, parent); + else + mpam_clear_feature(mpam_feat_mbw_max_hardlim_rw, parent); + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_max, + mbw_max_lim, alias)) { + pr_debug("%s mbw_max_lim mismatch, clearing mbw_max\n", __func__); + mpam_clear_feature(mpam_feat_mbw_max, parent); + parent->mbw_max_lim = 0; + mpam_props_sync_mbw_max_hardlim_rw(parent); + } + if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) { parent->cmax_wd = child->cmax_wd; } else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature, @@ -2469,6 +2585,12 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) msc->id, mpam_errcode_names[errcode], partid, pmg, ris); + /* No action is required for the MPAM programming errors */ + if ((errcode != MPAM_ERRCODE_REQ_PARTID_RANGE) && + (errcode != MPAM_ERRCODE_REQ_PMG_RANGE)) { + return IRQ_HANDLED; + } + /* Disable this interrupt. */ mpam_disable_msc_ecr(msc); @@ -2585,15 +2707,20 @@ static void __destroy_component_cfg(struct mpam_component *comp) lockdep_assert_held(&mpam_list_lock); + if (!comp->cfg) + return; + add_to_garbage(comp->cfg); list_for_each_entry(vmsc, &comp->vmsc, comp_list) { msc = vmsc->msc; - if (mpam_mon_sel_lock(msc)) { + mpam_mon_sel_outer_lock(msc); + if (mpam_mon_sel_inner_lock(msc)) { list_for_each_entry(ris, &vmsc->ris, vmsc_list) add_to_garbage(ris->mbwu_state); - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); } + mpam_mon_sel_outer_unlock(msc); } } @@ -2640,6 +2767,7 @@ static int __allocate_component_cfg(struct mpam_component *comp) mpam_reset_component_cfg(comp); list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + int err = 0; struct mpam_msc *msc; struct mpam_msc_ris *ris; struct msmon_mbwu_state *mbwu_state; @@ -2648,6 +2776,7 @@ static int __allocate_component_cfg(struct mpam_component *comp) continue; msc = vmsc->msc; + mpam_mon_sel_outer_lock(msc); list_for_each_entry(ris, &vmsc->ris, vmsc_list) { if (!ris->props.num_mbwu_mon) continue; @@ -2655,17 +2784,22 @@ static int __allocate_component_cfg(struct mpam_component *comp) mbwu_state = kzalloc_objs(*ris->mbwu_state, ris->props.num_mbwu_mon); if (!mbwu_state) { - __destroy_component_cfg(comp); - return -ENOMEM; + err = -ENOMEM; + break; } init_garbage(&mbwu_state[0].garbage); - if (mpam_mon_sel_lock(msc)) { + if (mpam_mon_sel_inner_lock(msc)) { ris->mbwu_state = mbwu_state; - mpam_mon_sel_unlock(msc); + mpam_mon_sel_inner_unlock(msc); } } + mpam_mon_sel_outer_unlock(msc); + if (err) { + __destroy_component_cfg(comp); + return err; + } } return 0; @@ -2689,6 +2823,130 @@ static int mpam_allocate_config(void) return 0; } +static void mpam_debugfs_setup_ris(struct mpam_msc_ris *ris) +{ + char name[40]; + struct dentry *d; + struct mpam_props *rprops = &ris->props; + + snprintf(name, sizeof(name), "ris.%u", ris->ris_idx); + d = debugfs_create_dir(name, ris->vmsc->msc->debugfs); + debugfs_create_x64("mpamf_idr", 0400, d, &ris->idr); + debugfs_create_x32("mpamf_cpor_idr", 0400, d, &ris->cpor_idr); + debugfs_create_x32("mpamf_ccap_idr", 0400, d, &ris->ccap_idr); + debugfs_create_ulong("features", 0400, d, &rprops->features[0]); + debugfs_create_x16("cpbm_wd", 0400, d, &rprops->cpbm_wd); + debugfs_create_x16("mbw_pbm_bits", 0400, d, &rprops->mbw_pbm_bits); + debugfs_create_x16("num_csu_mon", 0400, d, &rprops->num_csu_mon); + debugfs_create_x16("num_mbwu_mon", 0400, d, &rprops->num_mbwu_mon); + debugfs_create_cpumask("affinity", 0400, d, &ris->affinity); + ris->debugfs = d; +} + +static void mpam_debugfs_setup_vmsc(struct mpam_component *comp, + struct mpam_vmsc *vmsc) +{ + u8 ris_idx; + char name[40]; + char path[40]; + struct dentry *d; + struct mpam_msc_ris *ris; + int msc_id = vmsc->msc->id; + + snprintf(name, sizeof(name), "vmsc.%u", msc_id); + d = debugfs_create_dir(name, comp->debugfs); + debugfs_create_ulong("features", 0400, d, &vmsc->props.features[0]); + vmsc->debugfs = d; + + list_for_each_entry_rcu(ris, &vmsc->ris, vmsc_list) { + ris_idx = ris->ris_idx; + + snprintf(name, sizeof(name), "msc.%u_ris.%u", msc_id, + ris_idx); + snprintf(path, sizeof(path), "../../../msc.%u/ris.%u", + msc_id, ris_idx); + debugfs_create_symlink(name, d, path); + } +} + +static void mpam_debugfs_setup_comp(struct mpam_class *class, + struct mpam_component *comp) +{ + char name[40]; + struct dentry *d; + struct mpam_vmsc *vmsc; + + snprintf(name, sizeof(name), "comp.%u", comp->comp_id); + d = debugfs_create_dir(name, class->debugfs); + comp->debugfs = d; + + list_for_each_entry_rcu(vmsc, &comp->vmsc, comp_list) + mpam_debugfs_setup_vmsc(comp, vmsc); +} + +static void mpam_debugfs_setup(void) +{ + char name[40]; + struct dentry *d; + struct mpam_msc *msc; + struct mpam_class *class; + struct mpam_msc_ris *ris; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(msc, &mpam_all_msc, all_msc_list) { + d = msc->debugfs; + debugfs_create_u32("fw_id", 0400, d, &msc->pdev->id); + debugfs_create_x32("iface", 0400, d, &msc->iface); + debugfs_create_x32("mpamf_iidr", 0400, d, &msc->iidr); + debugfs_create_x64("nrdy_retry_count", 0400, d, &msc->nrdy_retry_count); + list_for_each_entry(ris, &msc->ris, msc_list) + mpam_debugfs_setup_ris(ris); + } + + list_for_each_entry_rcu(class, &mpam_classes, classes_list) { + snprintf(name, sizeof(name), "class.%u", class->level); + d = debugfs_create_dir(name, mpam_debugfs); + debugfs_create_ulong("features", 0400, d, &class->props.features[0]); + debugfs_create_x32("nrdy_usec", 0400, d, &class->nrdy_usec); + debugfs_create_x16("quirks", 0400, d, &class->quirks); + debugfs_create_x8("level", 0400, d, &class->level); + debugfs_create_cpumask("affinity", 0400, d, &class->affinity); + class->debugfs = d; + + list_for_each_entry_rcu(comp, &class->components, class_list) + mpam_debugfs_setup_comp(class, comp); + } +} + +static int mpam_force_disable_show(struct seq_file *s, void *data) +{ + seq_puts(s, "Write 1 to this file to trigger an MPAM error.\n"); + return 0; +} + +static ssize_t mpam_force_disable_write(struct file *file, + const char __user *userbuf, size_t count, + loff_t *ppos) +{ + u32 user_val; + int err; + + err = kstrtou32_from_user(userbuf, count, 10, &user_val); + if (err) + return err; + + if (user_val == 1) { + mpam_disable_reason = "debugfs trigger"; + mpam_disable(NULL); + } + + return count; +} + +DEFINE_SHOW_STORE_ATTRIBUTE(mpam_force_disable); + static void mpam_enable_once(void) { int err; @@ -2722,10 +2980,15 @@ static void mpam_enable_once(void) pr_err("Failed to allocate configuration arrays.\n"); break; } + + mpam_debugfs_setup(); } while (0); mutex_unlock(&mpam_list_lock); cpus_read_unlock(); + debugfs_create_file("force_disable", 0600, mpam_debugfs, NULL, + &mpam_force_disable_fops); + if (!err) { err = mpam_resctrl_setup(); if (err) @@ -2887,8 +3150,11 @@ static bool mpam_update_config(struct mpam_config *cfg, bool has_changes = false; maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes); + maybe_update_config(cfg, mpam_feat_cmax_cmax, newcfg, cmax, has_changes); maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes); maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_max_hardlim_rw, newcfg, + mbw_max_hardlim, has_changes); return has_changes; } @@ -2943,12 +3209,105 @@ static int __init mpam_msc_driver_init(void) return -EINVAL; } + mpam_debugfs = debugfs_create_dir("mpam", NULL); + return platform_driver_register(&mpam_msc_driver); } /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); +static int mpam_cmdline_partid_max_set(const char *arg, + const struct kernel_param *kp) +{ + int ret; + + spin_lock(&partid_max_lock); + ret = kstrtou16(arg, 10, &mpam_cmdline_partid_max); + if (!ret) + mpam_cmdline_partid_max_overridden = true; + spin_unlock(&partid_max_lock); + + return 0; +} +static int mpam_cmdline_partid_max_get(char *buffer, + const struct kernel_param *kp) +{ + u16 val = 0xffff; + + spin_lock(&partid_max_lock); + if (mpam_cmdline_partid_max_overridden) + val = mpam_cmdline_partid_max; + spin_unlock(&partid_max_lock); + + return sprintf(buffer, "%u\n", val); +} +static const struct kernel_param_ops mpam_cmdline_partid_max_ops = { + .set = mpam_cmdline_partid_max_set, + .get = mpam_cmdline_partid_max_get, +}; +module_param_cb(partid_max, &mpam_cmdline_partid_max_ops, NULL, 0644); +MODULE_PARM_DESC(partid_max, "Override for reducing the number of PARTID."); + +static DEFINE_XARRAY(mpam_force_unkown_msc); + +static void mpam_force_unknown_msc_add(u32 msc_id, gfp_t gfp) +{ + xa_store(&mpam_force_unkown_msc, msc_id, xa_mk_value(msc_id), gfp); +} + +bool mpam_force_unknown_msc_test(struct mpam_msc *msc) +{ + return !!xa_load(&mpam_force_unkown_msc, msc->pdev->id); +} + +static int mpam_force_unknown_msc_set(const char *_str, + const struct kernel_param *kp) +{ + int err; + u32 val; + char *tok, *iter; + char *str __free(kfree) = kstrdup(_str, GFP_KERNEL); + + iter = str; + do { + tok = strsep(&iter, ","); + err = kstrtou32(tok, 10, &val); + if (err) { + pr_err("Failed to parse commandline: %d\n", err); + break; + } + mpam_force_unknown_msc_add(val, GFP_KERNEL); + } while (iter); + + return 0; +} +static int mpam_force_unknown_msc_get(char *buffer, + const struct kernel_param *kp) +{ + unsigned long index, count = 0; + int result = 0; + void *entry; + + xa_for_each(&mpam_force_unkown_msc, index, entry) { + if (count) + result += sprintf(buffer + result, ","); + + result += sprintf(buffer + result, "%lu", index); + count += 1; + } + + result += sprintf(buffer + result, "\n"); + + return result; +} +static const struct kernel_param_ops mpam_force_unknown_msc_ops = { + .set = mpam_force_unknown_msc_set, + .get = mpam_force_unknown_msc_get, +}; +subsys_param_cb(force_unknown_msc, &mpam_force_unknown_msc_ops, NULL, 0644); +MODULE_PARM_DESC(force_unknown_msc, "Disabling a set of probed MSC."); + #ifdef CONFIG_MPAM_KUNIT_TEST #include "test_mpam_devices.c" #endif diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 1914aefdcba9efbed62b01d05ab88e47fd49c421..b463771bb0d5805634cbde24bce106a46ae6763b 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,7 @@ struct mpam_msc { /* Not modified after mpam_is_enabled() becomes true */ enum mpam_msc_iface iface; u32 nrdy_usec; + u64 nrdy_retry_count; cpumask_t accessibility; bool has_extd_esr; @@ -116,50 +118,82 @@ struct mpam_msc { /* * mon_sel_lock protects access to the MSC hardware registers that are * affected by MPAMCFG_MON_SEL, and the mbwu_state. - * Access to mon_sel is needed from both process and interrupt contexts, - * but is complicated by firmware-backed platforms that can't make any - * access unless they can sleep. - * Always use the mpam_mon_sel_lock() helpers. - * Accesses to mon_sel need to be able to fail if they occur in the wrong - * context. + * Both the 'inner' and 'outer' must be taken. + * For real MMIO MSC, the outer lock is unnecessary - but keeps the + * code common with: + * Firmware backed MSC need to sleep when accessing the MSC, which + * means some code-paths will always fail. For these MSC the outer + * lock is providing the protection, and the inner lock fails to + * be taken if the task is unable to sleep. + * * If needed, take msc->probe_lock first. */ - raw_spinlock_t _mon_sel_lock; - unsigned long _mon_sel_flags; + struct mutex outer_mon_sel_lock; + bool outer_lock_held; + raw_spinlock_t inner_mon_sel_lock; + unsigned long inner_mon_sel_flags; void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; + struct dentry *debugfs; + /* Values only used on some platforms for quirks */ u32 t241_id; struct mpam_garbage garbage; }; -/* Returning false here means accesses to mon_sel must fail and report an error. */ -static inline bool __must_check mpam_mon_sel_lock(struct mpam_msc *msc) +static inline bool __must_check mpam_mon_sel_inner_lock(struct mpam_msc *msc) +{ + /* + * The outer lock may be taken by a CPU that then issues an IPI to run + * a helper that takes the inner lock. lockdep can't help us here. + */ + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + + if (msc->iface == MPAM_IFACE_MMIO) { + raw_spin_lock_irqsave(&msc->inner_mon_sel_lock, msc->inner_mon_sel_flags); + return true; + } + + /* Accesses must fail if we are not pre-emptible */ + return !!preemptible(); +} + +static inline void mpam_mon_sel_inner_unlock(struct mpam_msc *msc) { - /* Locking will require updating to support a firmware backed interface */ - if (WARN_ON_ONCE(msc->iface != MPAM_IFACE_MMIO)) - return false; + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + + if (msc->iface == MPAM_IFACE_MMIO) + raw_spin_unlock_irqrestore(&msc->inner_mon_sel_lock, msc->inner_mon_sel_flags); +} - raw_spin_lock_irqsave(&msc->_mon_sel_lock, msc->_mon_sel_flags); - return true; +static inline void mpam_mon_sel_outer_lock(struct mpam_msc *msc) +{ + mutex_lock(&msc->outer_mon_sel_lock); + msc->outer_lock_held = true; } -static inline void mpam_mon_sel_unlock(struct mpam_msc *msc) +static inline void mpam_mon_sel_outer_unlock(struct mpam_msc *msc) { - raw_spin_unlock_irqrestore(&msc->_mon_sel_lock, msc->_mon_sel_flags); + msc->outer_lock_held = false; + mutex_unlock(&msc->outer_mon_sel_lock); } static inline void mpam_mon_sel_lock_held(struct mpam_msc *msc) { - lockdep_assert_held_once(&msc->_mon_sel_lock); + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + if (msc->iface == MPAM_IFACE_MMIO) + lockdep_assert_held_once(&msc->inner_mon_sel_lock); + else + lockdep_assert_preemption_enabled(); } static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) { - raw_spin_lock_init(&msc->_mon_sel_lock); + raw_spin_lock_init(&msc->inner_mon_sel_lock); + mutex_init(&msc->outer_mon_sel_lock); } /* Bits for mpam features bitmaps */ @@ -172,6 +206,7 @@ enum mpam_device_features { mpam_feat_mbw_part, mpam_feat_mbw_min, mpam_feat_mbw_max, + mpam_feat_mbw_max_hardlim_rw, mpam_feat_mbw_prop, mpam_feat_intpri_part, mpam_feat_intpri_part_0_low, @@ -205,6 +240,11 @@ struct mpam_props { u16 dspri_wd; u16 num_csu_mon; u16 num_mbwu_mon; + /* + * MPAMF_MBW_IDR.MAX_LIM [1:0] when mpam_feat_mbw_max; else 0. + * 0 = soft+hard, 1 = soft only, 2 = hard only, 3 = reserved. + */ + u8 mbw_max_lim; /* * Kunit tests use memset() to set up feature combinations that should be @@ -310,6 +350,7 @@ struct mpam_class { struct ida ida_csu_mon; struct ida ida_mbwu_mon; + struct dentry *debugfs; struct mpam_garbage garbage; }; @@ -320,6 +361,11 @@ struct mpam_config { u32 cpbm; u32 mbw_pbm; u16 mbw_max; + bool mbw_max_hardlim; + u16 cmax; + u16 cmin; + + bool cmax_softlim; struct mpam_garbage garbage; }; @@ -344,6 +390,7 @@ struct mpam_component { /* parent: */ struct mpam_class *class; + struct dentry *debugfs; struct mpam_garbage garbage; }; @@ -362,12 +409,15 @@ struct mpam_vmsc { /* parent: */ struct mpam_component *comp; + struct dentry *debugfs; struct mpam_garbage garbage; }; struct mpam_msc_ris { u8 ris_idx; u64 idr; + u32 cpor_idr; + u32 ccap_idr; struct mpam_props props; bool in_reset_state; @@ -385,6 +435,7 @@ struct mpam_msc_ris { /* msmon mbwu configuration is preserved over reset */ struct msmon_mbwu_state *mbwu_state; + struct dentry *debugfs; struct mpam_garbage garbage; }; @@ -411,7 +462,20 @@ struct mpam_resctrl_res { struct mpam_resctrl_mon { struct mpam_class *class; - /* per-class data that resctrl needs will live here */ + /* + * Array of allocated MBWU monitors, indexed by (closid, rmid). + * When ABMC is not in use, this array directly maps (closid, rmid) + * to the allocated monitor. Otherwise this array is sparse, and + * un-assigned (closid, rmid) are -1. + */ + int *mbwu_idx_to_mon; + + /* + * Array of assigned MBWU monitors, indexed by idx argument. + * When ABMC is not in use, this array can be NULL. Otherwise + * it maps idx to the allocated monitor. + */ + int *assigned_counters; }; static inline int mpam_alloc_csu_mon(struct mpam_class *class) @@ -471,6 +535,8 @@ void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); +bool mpam_force_unknown_msc_test(struct mpam_msc *msc); + #ifdef CONFIG_RESCTRL_FS int mpam_resctrl_setup(void); void mpam_resctrl_exit(void); @@ -571,6 +637,7 @@ static inline void mpam_resctrl_teardown_class(struct mpam_class *class) { } /* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */ #define MPAMF_MBW_IDR_BWA_WD GENMASK(5, 0) +#define MPAMF_MBW_IDR_MAX_LIM GENMASK(9, 8) #define MPAMF_MBW_IDR_HAS_MIN BIT(10) #define MPAMF_MBW_IDR_HAS_MAX BIT(11) #define MPAMF_MBW_IDR_HAS_PBM BIT(12) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 226ff6f532fab42c9e0412127ca9e3acdfd53fee..40e0de181db948f4d75f3ae1d0d83ccd0ae06e1e 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,7 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; * to those supported by MPAM. * Class pointer may be NULL. */ -#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID +#define MPAM_MAX_EVENT QOS_L3_MBM_LOCAL_EVENT_ID static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1]; #define for_each_mpam_resctrl_mon(mon, eventid) \ @@ -75,6 +76,29 @@ static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); */ static bool resctrl_enabled; +/* + * L3 local/total may come from different classes - what is the number of MBWU + * 'on L3'? + */ +static unsigned int l3_num_allocated_mbwu = ~0; + +/* Whether this num_mbw_mon could result in a free_running system */ +static int __mpam_monitors_free_running(u16 num_mbwu_mon) +{ + if (num_mbwu_mon >= resctrl_arch_system_num_rmid_idx()) + return resctrl_arch_system_num_rmid_idx(); + return 0; +} + +/* + * If l3_num_allocated_mbwu is forced below PARTID * PMG, then the counters + * are not free running, and ABMC's user-interface must be used to assign them. + */ +static bool mpam_resctrl_abmc_enabled(void) +{ + return l3_num_allocated_mbwu < resctrl_arch_system_num_rmid_idx(); +} + bool resctrl_arch_alloc_capable(void) { struct mpam_resctrl_res *res; @@ -114,40 +138,6 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domai { } -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, - u32 closid, u32 rmid, enum resctrl_event_id eventid) -{ -} - -void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, - u32 closid, u32 rmid, int cntr_id, - enum resctrl_event_id eventid) -{ -} - -void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, - enum resctrl_event_id evtid, u32 rmid, u32 closid, - u32 cntr_id, bool assign) -{ -} - -int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, - u32 unused, u32 rmid, int cntr_id, - enum resctrl_event_id eventid, u64 *val) -{ - return -EOPNOTSUPP; -} - -bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) -{ - return false; -} - -int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) -{ - return -EINVAL; -} - int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) { return -EOPNOTSUPP; @@ -185,6 +175,36 @@ static void resctrl_reset_task_closids(void) read_unlock(&tasklist_lock); } +static struct mpam_resctrl_res *mpam_resctrl_res_from_mon(struct mpam_resctrl_mon *mon) +{ + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + + if (!mon->class) + return NULL; + + for_each_mpam_resctrl_control(res, rid) { + if (res->class == mon->class) + return res; + } + return NULL; +} + +static void mpam_resctrl_monitor_sync_abmc_vals(struct rdt_resource *r) +{ + r->mon.num_mbm_cntrs = l3_num_allocated_mbwu; + if (cdp_enabled) + r->mon.num_mbm_cntrs /= 2; + + if (r->mon.num_mbm_cntrs) { + r->mon.mbm_cntr_assignable = mpam_resctrl_abmc_enabled(); + r->mon.mbm_assign_on_mkdir = mpam_resctrl_abmc_enabled(); + } else { + r->mon.mbm_cntr_assignable = false; + r->mon.mbm_assign_on_mkdir = false; + } +} + int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) { u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; @@ -244,6 +264,7 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current)); resctrl_reset_task_closids(); + mpam_resctrl_monitor_sync_abmc_vals(l3); for_each_possible_cpu(cpu) mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0); @@ -362,6 +383,58 @@ bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) return (tsk_closid == closid) && (tsk_rmid == rmid); } +int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid) +{ + u16 partid; + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return 0; + + if (cdp_enabled) + partid = closid << 1; + else + partid = closid; + + return iommu_group_set_qos_params(group, partid, rmid); +} + +bool resctrl_arch_match_iommu_closid(struct iommu_group *group, u32 closid) +{ + u16 partid; + int err = iommu_group_get_qos_params(group, &partid, NULL); + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return false; + + if (err) + return false; + + if (cdp_enabled) + partid >>= 1; + + return (partid == closid); +} + +bool resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + u8 pmg; + u16 partid; + int err = iommu_group_get_qos_params(group, &partid, &pmg); + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return false; + + if (err) + return false; + + if (cdp_enabled) + partid >>= 1; + + return (partid == closid) && (rmid == pmg); +} + struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) { if (l >= RDT_NUM_RESOURCES) @@ -441,12 +514,70 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r, resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); } -static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, - enum mpam_device_features mon_type, - int mon_idx, - enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) +/* + * The rmid realloc threshold should be for the smallest cache exposed to + * resctrl. + */ +static int update_rmid_limits(struct mpam_class *class) +{ + u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); + struct mpam_props *cprops = &class->props; + struct cacheinfo *ci; + + lockdep_assert_cpus_held(); + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return 0; + + /* + * Assume cache levels are the same size for all CPUs... + * The check just requires any online CPU and it can't go offline as we + * hold the cpu lock. + */ + ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level); + if (!ci || ci->size == 0) { + pr_debug("Could not read cache size for class %u\n", + class->level); + return -EINVAL; + } + + if (!resctrl_rmid_realloc_limit || + ci->size < resctrl_rmid_realloc_limit) { + resctrl_rmid_realloc_limit = ci->size; + resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg; + } + + return 0; +} + +static bool __resctrl_arch_mon_can_overflow(enum resctrl_event_id eventid) { - struct mon_cfg cfg; + struct mpam_props *cprops; + struct mpam_class *class = mpam_resctrl_counters[eventid].class; + + if (!class) + return false; + + /* No need to worry about a 63 bit counter overflowing */ + cprops = &class->props; + return !mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops); +} + +bool resctrl_arch_mon_can_overflow(void) +{ + if (__resctrl_arch_mon_can_overflow(QOS_L3_MBM_LOCAL_EVENT_ID)) + return true; + + return __resctrl_arch_mon_can_overflow(QOS_L3_MBM_TOTAL_EVENT_ID); +} + +static int +__read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + enum mpam_device_features mon_type, + int mon_idx, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) +{ + struct mon_cfg cfg = { }; if (!mpam_is_enabled()) return -EINVAL; @@ -454,18 +585,29 @@ static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_c /* Shift closid to account for CDP */ closid = resctrl_get_config_index(closid, cdp_type); + if (mon_idx == USE_PRE_ALLOCATED) { + int mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + mon_idx = mon->mbwu_idx_to_mon[mbwu_idx]; + if (mon_idx == -1) { + if (mpam_resctrl_abmc_enabled()) { + /* Report Unassigned */ + return -ENOENT; + } + /* Report Unavailable */ + return -EINVAL; + } + } + + cfg.mon = mon_idx; + cfg.match_pmg = true; + cfg.partid = closid; + cfg.pmg = rmid; + if (irqs_disabled()) { /* Check if we can access this domain without an IPI */ return -EIO; } - cfg = (struct mon_cfg) { - .mon = mon_idx, - .match_pmg = true, - .partid = closid, - .pmg = rmid, - }; - return mpam_msmon_read(mon_comp, &cfg, mon_type, val); } @@ -474,29 +616,27 @@ static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component int mon_idx, u32 closid, u32 rmid, u64 *val) { if (cdp_enabled) { - u64 code_val = 0, data_val = 0; + u64 cdp_val = 0; int err; err = __read_mon(mon, mon_comp, mon_type, mon_idx, - CDP_CODE, closid, rmid, &code_val); + CDP_CODE, closid, rmid, &cdp_val); if (err) return err; err = __read_mon(mon, mon_comp, mon_type, mon_idx, - CDP_DATA, closid, rmid, &data_val); - if (err) - return err; - - *val += code_val + data_val; - return 0; + CDP_DATA, closid, rmid, &cdp_val); + if (!err) + *val += cdp_val; + return err; } return __read_mon(mon, mon_comp, mon_type, mon_idx, CDP_NONE, closid, rmid, val); } -/* MBWU when not in ABMC mode (not supported), and CSU counters. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, +/* MBWU when not in ABMC mode, and CSU counters. */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 closid, u32 rmid, enum resctrl_event_id eventid, void *arch_priv, u64 *val, void *arch_mon_ctx) { @@ -508,58 +648,124 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, resctrl_arch_rmid_read_context_check(); - if (!mpam_is_enabled()) - return -EINVAL; - if (eventid >= QOS_NUM_EVENTS || !mon->class) return -EINVAL; l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr); mon_comp = l3_dom->mon_comp[eventid]; - if (eventid != QOS_L3_OCCUP_EVENT_ID) + switch (eventid) { + case QOS_L3_OCCUP_EVENT_ID: + mon_type = mpam_feat_msmon_csu; + break; + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + mon_type = mpam_feat_msmon_mbwu; + break; + default: return -EINVAL; - - mon_type = mpam_feat_msmon_csu; + } return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx, closid, rmid, val); } -/* - * The rmid realloc threshold should be for the smallest cache exposed to - * resctrl. - */ -static int update_rmid_limits(struct mpam_class *class) +/* MBWU counters when in ABMC mode */ +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 closid, u32 rmid, int mon_idx, + enum resctrl_event_id eventid, u64 *val) { - u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); - struct mpam_props *cprops = &class->props; - struct cacheinfo *ci; - - lockdep_assert_cpus_held(); + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; - if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) - return 0; + if (!mpam_is_enabled()) + return -EINVAL; - /* - * Assume cache levels are the same size for all CPUs... - * The check just requires any online CPU and it can't go offline as we - * hold the cpu lock. - */ - ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level); - if (!ci || ci->size == 0) { - pr_debug("Could not read cache size for class %u\n", - class->level); + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) return -EINVAL; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + + return read_mon_cdp_safe(mon, mon_comp, mpam_feat_msmon_mbwu, mon_idx, + closid, rmid, val); +} + +static void __reset_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + int mon_idx, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid) +{ + struct mon_cfg cfg = { }; + + if (!mpam_is_enabled()) + return; + + /* Shift closid to account for CDP */ + closid = resctrl_get_config_index(closid, cdp_type); + + if (mon_idx == USE_PRE_ALLOCATED) { + int mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + mon_idx = mon->mbwu_idx_to_mon[mbwu_idx]; } - if (!resctrl_rmid_realloc_limit || - ci->size < resctrl_rmid_realloc_limit) { - resctrl_rmid_realloc_limit = ci->size; - resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg; + if (mon_idx == -1) + return; + cfg.mon = mon_idx; + mpam_msmon_reset_mbwu(mon_comp, &cfg); +} + +static void reset_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + int mon_idx, u32 closid, u32 rmid) +{ + if (cdp_enabled) { + __reset_mon(mon, mon_comp, mon_idx, CDP_CODE, closid, rmid); + __reset_mon(mon, mon_comp, mon_idx, CDP_DATA, closid, rmid); + } else { + __reset_mon(mon, mon_comp, mon_idx, CDP_NONE, closid, rmid); } +} - return 0; +/* Called via IPI. Call with read_cpus_lock() held. */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id eventid) +{ + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + + if (!mpam_is_enabled()) + return; + + /* Only MBWU counters are relevant, and for supported event types. */ + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) + return; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + + reset_mon_cdp_safe(mon, mon_comp, USE_PRE_ALLOCATED, closid, rmid); +} + +/* Reset an assigned counter */ +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + + if (!mpam_is_enabled()) + return; + + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) + return; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + + reset_mon_cdp_safe(mon, mon_comp, USE_PRE_ALLOCATED, closid, rmid); } static bool cache_has_usable_cpor(struct mpam_class *class) @@ -573,6 +779,13 @@ static bool cache_has_usable_cpor(struct mpam_class *class) return class->props.cpbm_wd <= 32; } +static bool cache_has_usable_cmax(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + return mpam_has_feature(mpam_feat_cmax_cmax, cprops); +} + static bool mba_class_use_mbw_max(struct mpam_props *cprops) { return (mpam_has_feature(mpam_feat_mbw_max, cprops) && @@ -606,6 +819,31 @@ static bool cache_has_usable_csu(struct mpam_class *class) return true; } +static bool class_has_usable_mbwu(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) + return false; + + /* + * resctrl expects the bandwidth counters to be free running, + * which means we need as many monitors as resctrl has + * control/monitor groups. + */ + if (__mpam_monitors_free_running(cprops->num_mbwu_mon)) { + pr_debug("monitors usable in free-running mode\n"); + return true; + } + + if (cprops->num_mbwu_mon) { + pr_debug("monitors usable via ABMC assignment\n"); + return true; + } + + return false; +} + /* * Calculate the worst-case percentage change from each implemented step * in the control. @@ -638,14 +876,14 @@ static u32 get_mba_granularity(struct mpam_props *cprops) * * Find the nearest percentage value to the upper bound of the selected band: */ -static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) +static u32 fract16_to_percent(u16 fract, u8 wd) { - u32 val = mbw_max; + u32 val = fract; - val >>= 16 - cprops->bwa_wd; + val >>= 16 - wd; val += 1; val *= MAX_MBA_BW; - val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd); + val = DIV_ROUND_CLOSEST(val, 1 << wd); return val; } @@ -660,18 +898,33 @@ static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) * percentages) and over-commit (where the total of the converted * allocations is greater than expected). */ -static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) +static u16 percent_to_fract16(u8 pc, u8 wd) { u32 val = pc; - val <<= cprops->bwa_wd; + val <<= wd; val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); val = max(val, 1) - 1; - val <<= 16 - cprops->bwa_wd; + val <<= 16 - wd; return val; } +static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) +{ + return fract16_to_percent(mbw_max, cprops->bwa_wd); +} + +static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) +{ + return percent_to_fract16(pc, cprops->bwa_wd); +} + +static u16 percent_to_cmax(u8 pc, struct mpam_props *cprops) +{ + return percent_to_fract16(pc, cprops->cmax_wd); +} + static u32 get_mba_min(struct mpam_props *cprops) { if (!mba_class_use_mbw_max(cprops)) { @@ -719,10 +972,11 @@ static bool topology_matches_l3(struct mpam_class *victim) { int cpu, err; struct mpam_component *victim_iter; + bool matched_once = false; + cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; lockdep_assert_cpus_held(); - cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) return false; @@ -736,8 +990,11 @@ static bool topology_matches_l3(struct mpam_class *victim) } cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask); - if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + if (cpu >= nr_cpu_ids) { + if (matched_once) + continue; return false; + } cpumask_clear(tmp_cpumask); err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); @@ -757,6 +1014,7 @@ static bool topology_matches_l3(struct mpam_class *victim) return false; } + matched_once = true; } return true; @@ -829,6 +1087,7 @@ static bool traffic_matches_l3(struct mpam_class *class) /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ static void mpam_resctrl_pick_caches(void) { + bool has_cpor, has_cmax; struct mpam_class *class; struct mpam_resctrl_res *res; @@ -847,7 +1106,9 @@ static void mpam_resctrl_pick_caches(void) continue; } - if (!cache_has_usable_cpor(class)) { + has_cpor = cache_has_usable_cpor(class); + has_cmax = cache_has_usable_cmax(class); + if (!has_cpor && !has_cmax) { pr_debug("class %u cache misses CPOR\n", class->level); continue; } @@ -858,12 +1119,22 @@ static void mpam_resctrl_pick_caches(void) cpumask_pr_args(cpu_possible_mask)); continue; } - - if (class->level == 2) - res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; - else - res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; - res->class = class; + if (has_cpor) { + pr_debug("pick_caches: Class has CPOR\n"); + if (class->level == 2) + res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + res->class = class; + } + if (has_cmax) { + pr_debug("pick_caches: Class has CMAX\n"); + if (class->level == 2) + res = &mpam_resctrl_controls[RDT_RESOURCE_L2_MAX]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3_MAX]; + res->class = class; + } } } @@ -895,13 +1166,15 @@ static void mpam_resctrl_pick_mba(void) continue; } - if (!topology_matches_l3(class)) { + if ((class->level == 3) && !topology_matches_l3(class)) { pr_debug("class %u topology doesn't match L3\n", class->level); continue; } - if (!traffic_matches_l3(class)) { + /* Check memory at egress from L3 for MSC with L3 */ + if (!cpumask_equal(&class->affinity, cpu_possible_mask) && + !traffic_matches_l3(class)) { pr_debug("class %u traffic doesn't match L3 egress\n", class->level); continue; @@ -922,13 +1195,70 @@ static void mpam_resctrl_pick_mba(void) candidate_class->level); res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; res->class = candidate_class; + if (mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, + &candidate_class->props)) { + struct mpam_resctrl_res *mbh = + &mpam_resctrl_controls[RDT_RESOURCE_MB_HLIM]; + + mbh->class = candidate_class; + } + } +} + +static void __free_mbwu_mon(struct mpam_class *class, int *array, + u16 num_mbwu_mon) +{ + for (int i = 0; i < num_mbwu_mon; i++) { + if (array[i] < 0) + continue; + + mpam_free_mbwu_mon(class, array[i]); + array[i] = ~0; + } +} + +static int __alloc_mbwu_mon(struct mpam_class *class, int *array, + u16 num_mbwu_mon) +{ + for (int i = 0; i < num_mbwu_mon; i++) { + int mbwu_mon = mpam_alloc_mbwu_mon(class); + + if (mbwu_mon < 0) { + __free_mbwu_mon(class, array, num_mbwu_mon); + return mbwu_mon; + } + array[i] = mbwu_mon; } + + l3_num_allocated_mbwu = min(l3_num_allocated_mbwu, num_mbwu_mon); + + return 0; +} + +static int *__alloc_mbwu_array(struct mpam_class *class, u16 num_mbwu_mon) +{ + int err; + size_t array_size = num_mbwu_mon * sizeof(int); + int *array __free(kfree) = kmalloc(array_size, GFP_KERNEL); + + if (!array) + return ERR_PTR(-ENOMEM); + + memset(array, -1, array_size); + + err = __alloc_mbwu_mon(class, array, num_mbwu_mon); + if (err) + return ERR_PTR(err); + return_ptr(array); } static void counter_update_class(enum resctrl_event_id evt_id, struct mpam_class *class) { - struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class; + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evt_id]; + struct mpam_class *existing_class = mon->class; + u16 num_mbwu_mon = class->props.num_mbwu_mon; + int *existing_array = mon->mbwu_idx_to_mon; if (existing_class) { if (class->level == 3) { @@ -943,12 +1273,45 @@ static void counter_update_class(enum resctrl_event_id evt_id, } } - mpam_resctrl_counters[evt_id].class = class; + pr_debug("Updating event %u to use class %u\n", evt_id, class->level); + mon->class = class; + + if (evt_id == QOS_L3_OCCUP_EVENT_ID) + return; + + /* Might not need all the monitors */ + num_mbwu_mon = __mpam_monitors_free_running(num_mbwu_mon); + if (!num_mbwu_mon) { + pr_debug("Not pre-allocating free-running counters\n"); + return; + } + + /* + * This is the pre-allocated free-running monitors path. It always + * allocates one monitor per PARTID * PMG. + */ + WARN_ON_ONCE(num_mbwu_mon != resctrl_arch_system_num_rmid_idx()); + + mon->mbwu_idx_to_mon = __alloc_mbwu_array(class, num_mbwu_mon); + if (IS_ERR(mon->mbwu_idx_to_mon)) { + pr_debug("Failed to allocate MBWU array\n"); + mon->class = existing_class; + mon->mbwu_idx_to_mon = existing_array; + return; + } + + if (existing_array) { + pr_debug("Releasing previous class %u's monitors\n", + existing_class->level); + __free_mbwu_mon(existing_class, existing_array, num_mbwu_mon); + kfree(existing_array); + } } static void mpam_resctrl_pick_counters(void) { struct mpam_class *class; + bool has_mbwu; lockdep_assert_cpus_held(); @@ -983,7 +1346,107 @@ static void mpam_resctrl_pick_counters(void) break; } } + + has_mbwu = class_has_usable_mbwu(class); + if (has_mbwu && + ((class->type == MPAM_CLASS_MEMORY) || + (topology_matches_l3(class) && + traffic_matches_l3(class)))) { + pr_debug("class %u has usable MBWU, and matches L3 topology", class->level); + + /* + * MBWU counters may be 'local' or 'total' depending on + * where they are in the topology. Counters on caches + * are assumed to be local. If it's on the memory + * controller, its assumed to be global. + * TODO: check mbm_local matches NUMA boundaries... + */ + switch (class->type) { + case MPAM_CLASS_CACHE: + counter_update_class(QOS_L3_MBM_LOCAL_EVENT_ID, + class); + break; + case MPAM_CLASS_MEMORY: + counter_update_class(QOS_L3_MBM_TOTAL_EVENT_ID, + class); + break; + default: + break; + } + } + } + + /* Allocation of MBWU monitors assumes that the class is unique... */ + if (mpam_resctrl_counters[QOS_L3_MBM_LOCAL_EVENT_ID].class) + WARN_ON_ONCE(mpam_resctrl_counters[QOS_L3_MBM_LOCAL_EVENT_ID].class == + mpam_resctrl_counters[QOS_L3_MBM_TOTAL_EVENT_ID].class); +} + +static void __config_cntr(struct mpam_resctrl_mon *mon, u32 cntr_id, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, + bool assign) +{ + u32 mbwu_idx, mon_idx = resctrl_get_config_index(cntr_id, cdp_type); + + closid = resctrl_get_config_index(closid, cdp_type); + mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + WARN_ON_ONCE(mon_idx > l3_num_allocated_mbwu); + + if (assign) + mon->mbwu_idx_to_mon[mbwu_idx] = mon->assigned_counters[mon_idx]; + else + mon->mbwu_idx_to_mon[mbwu_idx] = -1; +} + +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mon->mbwu_idx_to_mon || !mon->assigned_counters) { + pr_debug("monitor arrays not allocated\n"); + return; + } + + if (cdp_enabled) { + __config_cntr(mon, cntr_id, CDP_CODE, closid, rmid, assign); + __config_cntr(mon, cntr_id, CDP_DATA, closid, rmid, assign); + } else { + __config_cntr(mon, cntr_id, CDP_NONE, closid, rmid, assign); + } + + resctrl_arch_reset_rmid(r, d, closid, rmid, evtid); +} + +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) +{ + struct mpam_resctrl_res *res; + enum resctrl_event_id evt; + + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + + /* OCCUP shares the L3 class but has no MBWU assigned_counters. */ + for (evt = QOS_L3_MBM_TOTAL_EVENT_ID; evt <= QOS_L3_MBM_LOCAL_EVENT_ID; + evt++) { + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evt]; + + if (!mon->assigned_counters) + continue; + if (mpam_resctrl_res_from_mon(mon) == res) + return true; } + + return false; +} + +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) +{ + lockdep_assert_cpus_held(); + + WARN_ON_ONCE(1); + + return 0; } static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) @@ -1018,19 +1481,48 @@ static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) * we have configured the SMMU and GIC not to do this 'all the * bits' is the correct answer here. */ - r->cache.shareable_bits = resctrl_get_default_ctrl(r); + r->cache.shareable_bits = resctrl_get_resource_default_ctrl(r); r->alloc_capable = true; break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + r->alloc_capable = true; + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; + r->membw.min_bw = max(100 / (1 << cprops->cmax_wd), 1); + r->membw.bw_gran = max(100 / (1 << cprops->cmax_wd), 1); + r->membw.max_bw = 100; + + if (r->rid == RDT_RESOURCE_L2_MAX) { + r->name = "L2_MAX"; + r->ctrl_scope = RESCTRL_L2_CACHE; + } else { + r->name = "L3_MAX"; + r->ctrl_scope = RESCTRL_L3_CACHE; + } + + break; + case RDT_RESOURCE_MB_HLIM: + if (!mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, cprops)) + break; + r->alloc_capable = true; + r->schema_fmt = RESCTRL_SCHEMA_MB_HLIM; + r->ctrl_scope = RESCTRL_L3_CACHE; + r->name = "MB_HLIM"; + break; case RDT_RESOURCE_MBA: - r->schema_fmt = RESCTRL_SCHEMA_RANGE; + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; r->ctrl_scope = RESCTRL_L3_CACHE; - - r->membw.delay_linear = true; - r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; + r->mba.delay_linear = true; + r->mba.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = get_mba_min(cprops); r->membw.max_bw = MAX_MBA_BW; r->membw.bw_gran = get_mba_granularity(cprops); + if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { + r->membw.mb_max_lim = cprops->mbw_max_lim; + r->membw.arch_has_mb_max_lim = true; + } + r->name = "MB"; r->alloc_capable = true; break; @@ -1048,6 +1540,9 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) if (class->type == MPAM_CLASS_CACHE) return comp->comp_id; + if ((class->type == MPAM_CLASS_MEMORY) && (class->level > 3)) + return comp->comp_id; + if (topology_matches_l3(class)) { /* Use the corresponding L3 component ID as the domain ID */ int id = get_cpu_cacheinfo_id(cpu, 3); @@ -1063,11 +1558,53 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) return comp->comp_id; } +/* + * This must run after all event counters have been picked so that any free + * running counters have already been allocated. + */ +static int mpam_resctrl_monitor_init_abmc(struct mpam_resctrl_mon *mon) +{ + struct mpam_resctrl_res *res = mpam_resctrl_res_from_mon(mon); + size_t array_size = resctrl_arch_system_num_rmid_idx() * sizeof(int); + int *rmid_array __free(kfree) = kmalloc(array_size, GFP_KERNEL); + struct rdt_resource *r = &res->resctrl_res; + struct mpam_class *class = mon->class; + u16 num_mbwu_mon; + + if (mon->mbwu_idx_to_mon) { + pr_debug("monitors free running\n"); + return 0; + } + + if (!rmid_array) { + pr_debug("Failed to allocate RMID array\n"); + return -ENOMEM; + } + memset(rmid_array, -1, array_size); + + num_mbwu_mon = class->props.num_mbwu_mon; + mon->assigned_counters = __alloc_mbwu_array(mon->class, num_mbwu_mon); + if (IS_ERR(mon->assigned_counters)) + return PTR_ERR(mon->assigned_counters); + mon->mbwu_idx_to_mon = no_free_ptr(rmid_array); + + mpam_resctrl_monitor_sync_abmc_vals(r); + + return 0; +} + static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, enum resctrl_event_id type) { - struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; - struct rdt_resource *l3 = &res->resctrl_res; + struct mpam_resctrl_res *res; + struct rdt_resource *r; + + if ((mon->class->type == MPAM_CLASS_MEMORY) && (mon->class->level > 3)) + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + + r = &res->resctrl_res; lockdep_assert_cpus_held(); @@ -1094,8 +1631,12 @@ static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, * monitoring class. * Setting name is necessary on monitor only platforms. */ - l3->name = "L3"; - l3->mon_scope = RESCTRL_L3_CACHE; + if ((mon->class->type == MPAM_CLASS_MEMORY) && (mon->class->level > 3)) { + r->name = "MB"; + } else { + r->name = "L3"; + } + r->mon_scope = RESCTRL_L3_CACHE; /* * num-rmid is the upper bound for the number of monitoring groups that @@ -1105,14 +1646,49 @@ static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, * this does mean userspace needs to know the architecture to correctly * interpret this value. */ - l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); + r->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); if (resctrl_enable_mon_event(type, false, 0, NULL)) - l3->mon_capable = true; + r->mon_capable = true; + + switch (type) { + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + return mpam_resctrl_monitor_init_abmc(mon); + default: + return 0; + } return 0; } +/* MB_HLIM schemata read: 0/1 per domain for current closid. */ +static u32 mpam_read_mbw_max_hardlim(struct rdt_resource *r, struct rdt_ctrl_domain *dom, + u32 closid, enum resctrl_conf_type type) +{ + struct mpam_resctrl_dom *m_dom; + struct mpam_config *cfg; + u32 partid; + + if (!mpam_is_enabled() || r->rid != RDT_RESOURCE_MB_HLIM) + return 0; + + partid = resctrl_get_config_index(closid, type); + if (partid >= resctrl_arch_get_num_closid(r)) + return 0; + + m_dom = container_of(dom, struct mpam_resctrl_dom, resctrl_ctrl_dom); + if (!m_dom->ctrl_comp || !m_dom->ctrl_comp->cfg) + return 0; + + cfg = &m_dom->ctrl_comp->cfg[partid]; + if (!mpam_has_feature(mpam_feat_mbw_max, cfg) && + !mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, cfg)) + return 0; + + return cfg->mbw_max_hardlim ? 1 : 0; +} + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { @@ -1126,7 +1702,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, lockdep_assert_cpus_held(); if (!mpam_is_enabled()) - return resctrl_get_default_ctrl(r); + return resctrl_get_resource_default_ctrl(r); res = container_of(r, struct mpam_resctrl_res, resctrl_res); dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); @@ -1148,6 +1724,12 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, case RDT_RESOURCE_L3: configured_by = mpam_feat_cpor_part; break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + configured_by = mpam_feat_cmax_cmax; + break; + case RDT_RESOURCE_MB_HLIM: + return mpam_read_mbw_max_hardlim(r, d, closid, type); case RDT_RESOURCE_MBA: if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { configured_by = mpam_feat_mbw_max; @@ -1155,20 +1737,22 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, } fallthrough; default: - return resctrl_get_default_ctrl(r); + return resctrl_get_resource_default_ctrl(r); } if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || !mpam_has_feature(configured_by, cfg)) - return resctrl_get_default_ctrl(r); + return resctrl_get_resource_default_ctrl(r); switch (configured_by) { case mpam_feat_cpor_part: return cfg->cpbm; + case mpam_feat_cmax_cmax: + return fract16_to_percent(cfg->cmax, cprops->cmax_wd); case mpam_feat_mbw_max: return mbw_max_to_percent(cfg->mbw_max, cprops); default: - return resctrl_get_default_ctrl(r); + return resctrl_get_resource_default_ctrl(r); } } @@ -1217,12 +1801,26 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, cfg.cpbm = cfg_val; mpam_set_feature(mpam_feat_cpor_part, &cfg); break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + cfg.cmax = percent_to_cmax(cfg_val, cprops); + mpam_set_feature(mpam_feat_cmax_cmax, &cfg); + break; case RDT_RESOURCE_MBA: if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); mpam_set_feature(mpam_feat_mbw_max, &cfg); break; } + return -EINVAL; + case RDT_RESOURCE_MB_HLIM: + if (mpam_has_feature(mpam_feat_mbw_max_hardlim_rw, cprops) && + mpam_has_feature(mpam_feat_mbw_max, cprops)) { + cfg.mbw_max_hardlim = cfg_val != 0; + mpam_set_feature(mpam_feat_mbw_max_hardlim_rw, &cfg); + mpam_set_feature(mpam_feat_mbw_max, &cfg); + break; + } fallthrough; default: return -EINVAL; @@ -1361,41 +1959,26 @@ static struct mpam_component *find_component(struct mpam_class *class, int cpu) } static struct mpam_resctrl_dom * -mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) +mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res, + struct mpam_component *comp) { int err; struct mpam_resctrl_dom *dom; struct rdt_l3_mon_domain *mon_d; struct rdt_ctrl_domain *ctrl_d; - struct mpam_class *class = res->class; - struct mpam_component *comp_iter, *ctrl_comp; struct rdt_resource *r = &res->resctrl_res; lockdep_assert_held(&domain_list_lock); - ctrl_comp = NULL; - guard(srcu)(&mpam_srcu); - list_for_each_entry_srcu(comp_iter, &class->components, class_list, - srcu_read_lock_held(&mpam_srcu)) { - if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { - ctrl_comp = comp_iter; - break; - } - } - - /* class has no component for this CPU */ - if (WARN_ON_ONCE(!ctrl_comp)) - return ERR_PTR(-EINVAL); - dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); if (!dom) return ERR_PTR(-ENOMEM); - if (r->alloc_capable) { - dom->ctrl_comp = ctrl_comp; + if (resctrl_arch_alloc_capable()) { + dom->ctrl_comp = comp; ctrl_d = &dom->resctrl_ctrl_dom; - mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr); + mpam_resctrl_domain_hdr_init(cpu, comp, r->rid, &ctrl_d->hdr); ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN; err = resctrl_online_ctrl_domain(r, ctrl_d); if (err) @@ -1406,7 +1989,7 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) pr_debug("Skipped control domain online - no controls\n"); } - if (r->mon_capable) { + if (resctrl_arch_mon_capable()) { struct mpam_component *any_mon_comp = NULL; struct mpam_resctrl_mon *mon; enum resctrl_event_id eventid; @@ -1425,7 +2008,7 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) if (!mon->class) continue; // dummy resource - mon_comp = find_component(mon->class, cpu); + mon_comp = comp ? comp: find_component(mon->class, cpu); dom->mon_comp[eventid] = mon_comp; if (mon_comp) any_mon_comp = mon_comp; @@ -1451,7 +2034,7 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) return dom; offline_ctrl_domain: - if (r->alloc_capable) { + if (resctrl_arch_alloc_capable()) { mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); resctrl_offline_ctrl_domain(r, ctrl_d); } @@ -1469,7 +2052,8 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id * for anything that is not a cache. */ -static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) +static struct mpam_resctrl_dom * +mpam_resctrl_get_mon_domain_from_cpu(int cpu, struct mpam_component *comp) { int cache_id; struct mpam_resctrl_dom *dom; @@ -1483,7 +2067,9 @@ static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) if (cache_id < 0) return NULL; - list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) { + list_for_each_entry(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) { + if (comp && (dom->ctrl_comp != comp)) + continue; if (dom->resctrl_mon_dom.hdr.id == cache_id) return dom; } @@ -1492,7 +2078,8 @@ static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) } static struct mpam_resctrl_dom * -mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) +mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res, + struct mpam_component *comp) { struct mpam_resctrl_dom *dom; struct rdt_resource *r = &res->resctrl_res; @@ -1500,6 +2087,8 @@ mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) lockdep_assert_cpus_held(); list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) { + if (comp && (dom->ctrl_comp != comp)) + continue; if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity)) return dom; } @@ -1508,38 +2097,44 @@ mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) return NULL; /* Search the mon domain list too - needed on monitor only platforms. */ - return mpam_resctrl_get_mon_domain_from_cpu(cpu); + return mpam_resctrl_get_mon_domain_from_cpu(cpu, comp); } int mpam_resctrl_online_cpu(unsigned int cpu) { + struct rdt_l3_mon_domain *mon_d; + struct rdt_ctrl_domain *ctrl_d; struct mpam_resctrl_res *res; enum resctrl_res_level rid; + struct mpam_component *comp; guard(mutex)(&domain_list_lock); for_each_mpam_resctrl_control(res, rid) { struct mpam_resctrl_dom *dom; - struct rdt_resource *r = &res->resctrl_res; if (!res->class) continue; // dummy_resource; + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &res->class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &comp->affinity)) + continue; - dom = mpam_resctrl_get_domain_from_cpu(cpu, res); - if (!dom) { - dom = mpam_resctrl_alloc_domain(cpu, res); - if (IS_ERR(dom)) - return PTR_ERR(dom); - } else { - if (r->alloc_capable) { - struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom; - - mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); - } - if (r->mon_capable) { - struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom; - - mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr); + dom = mpam_resctrl_get_domain_from_cpu(cpu, res, comp); + if (!dom) { + dom = mpam_resctrl_alloc_domain(cpu, res, comp); + } else { + if (resctrl_arch_alloc_capable()) { + ctrl_d = &dom->resctrl_ctrl_dom; + mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); + } + if (resctrl_arch_mon_capable()) { + mon_d = &dom->resctrl_mon_dom; + mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr); + } } + if (IS_ERR(dom)) + return PTR_ERR(dom); } } @@ -1550,6 +2145,7 @@ int mpam_resctrl_online_cpu(unsigned int cpu) void mpam_resctrl_offline_cpu(unsigned int cpu) { + struct mpam_component *comp; struct mpam_resctrl_res *res; enum resctrl_res_level rid; @@ -1561,35 +2157,38 @@ void mpam_resctrl_offline_cpu(unsigned int cpu) struct rdt_l3_mon_domain *mon_d; struct rdt_ctrl_domain *ctrl_d; bool ctrl_dom_empty, mon_dom_empty; - struct rdt_resource *r = &res->resctrl_res; if (!res->class) continue; // dummy resource - dom = mpam_resctrl_get_domain_from_cpu(cpu, res); - if (WARN_ON_ONCE(!dom)) - continue; + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &res->class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &comp->affinity)) + continue; + dom = mpam_resctrl_get_domain_from_cpu(cpu, res, comp); + if (WARN_ON_ONCE(!dom)) + continue; - if (r->alloc_capable) { - ctrl_d = &dom->resctrl_ctrl_dom; - ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); - if (ctrl_dom_empty) - resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); - } else { ctrl_dom_empty = true; - } + if (resctrl_arch_alloc_capable()) { + ctrl_d = &dom->resctrl_ctrl_dom; + ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); + if (ctrl_dom_empty) + resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); + } - if (r->mon_capable) { - mon_d = &dom->resctrl_mon_dom; - mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr); - if (mon_dom_empty) - resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr); - } else { mon_dom_empty = true; - } + if (resctrl_arch_mon_capable()) { + mon_d = &dom->resctrl_mon_dom; + mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr); + if (mon_dom_empty) + resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr); + } - if (ctrl_dom_empty && mon_dom_empty) - kfree(dom); + if (ctrl_dom_empty && mon_dom_empty) + kfree(dom); + } } } diff --git a/drivers/resctrl/test_mpam_resctrl.c b/drivers/resctrl/test_mpam_resctrl.c index b93d6ad87e43f4f3e091a162eefa3728f78e0a5c..2dd28336b3d1dc326ca3cf00d723147af590aa40 100644 --- a/drivers/resctrl/test_mpam_resctrl.c +++ b/drivers/resctrl/test_mpam_resctrl.c @@ -133,7 +133,7 @@ static void test_get_mba_granularity(struct kunit *test) KUNIT_EXPECT_EQ(test, ret, 1); /* DIV_ROUND_UP(100, 1 << 16)% = 1% */ } -static void test_mbw_max_to_percent(struct kunit *test) +static void test_fract16_to_percent(struct kunit *test) { const struct percent_value_case *param = test->param_value; struct percent_value_test_info res; @@ -296,14 +296,76 @@ static void test_percent_to_max_rounding(struct kunit *test) KUNIT_EXPECT_LE(test, 4 * num_rounded_up, 3 * total); } +static void test_num_assignable_counters(struct kunit *test) +{ + unsigned int orig_l3_num_allocated_mbwu = l3_num_allocated_mbwu; + u32 orig_mpam_partid_max = mpam_partid_max; + u32 orig_mpam_pmg_max = mpam_pmg_max; + bool orig_cdp_enabled = cdp_enabled; + struct rdt_resource fake_l3; + + /* Force there to be some PARTID/PMG */ + mpam_partid_max = 3; + mpam_pmg_max = 1; + + cdp_enabled = false; + + /* ABMC off, CDP off */ + l3_num_allocated_mbwu = resctrl_arch_system_num_rmid_idx(); + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, resctrl_arch_system_num_rmid_idx()); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC on, CDP off */ + l3_num_allocated_mbwu = 4; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 4); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_assign_on_mkdir); + + cdp_enabled = true; + + /* ABMC off, CDP on */ + l3_num_allocated_mbwu = resctrl_arch_system_num_rmid_idx(); + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + + /* (value not consumed by resctrl) */ + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, resctrl_arch_system_num_rmid_idx() / 2); + + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC on, CDP on */ + l3_num_allocated_mbwu = 4; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 2); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC 'on', CDP on - but not enough counters */ + l3_num_allocated_mbwu = 1; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 0); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* Restore global variables that were messed with */ + l3_num_allocated_mbwu = orig_l3_num_allocated_mbwu; + mpam_partid_max = orig_mpam_partid_max; + mpam_pmg_max = orig_mpam_pmg_max; + cdp_enabled = orig_cdp_enabled; +} + static struct kunit_case mpam_resctrl_test_cases[] = { KUNIT_CASE(test_get_mba_granularity), - KUNIT_CASE_PARAM(test_mbw_max_to_percent, test_percent_value_gen_params), + KUNIT_CASE_PARAM(test_fract16_to_percent, test_percent_value_gen_params), KUNIT_CASE_PARAM(test_percent_to_mbw_max, test_percent_value_gen_params), KUNIT_CASE_PARAM(test_mbw_max_to_percent_limits, test_all_bwa_wd_gen_params), KUNIT_CASE(test_percent_to_max_rounding), KUNIT_CASE_PARAM(test_percent_max_roundtrip_stability, test_all_bwa_wd_gen_params), + KUNIT_CASE(test_num_assignable_counters), {} }; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index edd6aafbfbaaf3234d6894acc1307f5fcf12a65e..6e8c15016ece2fc2167396db3f49058937b67101 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1143,6 +1143,70 @@ void debugfs_create_str(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_str); +static ssize_t debugfs_read_file_cpumask(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = F_DENTRY(file); + struct cpumask *cpumask; + char *kernel_buf; + ssize_t ret; + int len; + + ret = debugfs_file_get(dentry); + if (unlikely(ret)) + return ret; + + /* How long is a piece of string? */ + kernel_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!kernel_buf) { + debugfs_file_put(dentry); + return -ENOMEM; + } + + cpumask = (struct cpumask *)file->private_data; + len = scnprintf(kernel_buf, PAGE_SIZE, + "%*pb\n", cpumask_pr_args(cpumask)); + debugfs_file_put(dentry); + if (len + 1 >= PAGE_SIZE) { + kfree(kernel_buf); + return -EIO; + } + + ret = simple_read_from_buffer(user_buf, count, ppos, kernel_buf, len); + kfree(kernel_buf); + + return ret; +} + +static const struct file_operations fops_cpumask_ro = { + .read = debugfs_read_file_cpumask, + .open = simple_open, + .llseek = default_llseek, +}; + +/** + * debugfs_create_cpumask - create a read-only debugfs file that is used to read a cpumask + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read from. + * + * This function creates a file in debugfs with the given name that + * contains the value of the variable @value. + */ +void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value) +{ + /* Only read-only is supported */ + WARN_ON_ONCE(mode & S_IWUGO); + + debugfs_create_mode_unsafe(name, mode, parent, value, &fops_cpumask_ro, + &fops_cpumask_ro, &fops_cpumask_ro); +} + static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig index 21671301bd8a490a397981ddd7a95b2ccdbce375..145d837c190a317982881508bfd87ef383d1cb57 100644 --- a/fs/resctrl/Kconfig +++ b/fs/resctrl/Kconfig @@ -37,3 +37,9 @@ config RESCTRL_RMID_DEPENDS_ON_CLOSID Enabled by the architecture when the RMID values depend on the CLOSID. This causes the CLOSID allocator to search for CLOSID with clean RMID. + +config RESCTRL_IOMMU + bool + help + Enabled by the architecture when some IOMMU are able to be configured + with CLOSID/RMID. diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 9a7dfc48cb2e2972cda8058a5650704b6ac0604e..138321730f73b90fef2fce972bf8a580e759cc52 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -40,19 +40,11 @@ typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, * hardware. The allocated bandwidth percentage is rounded to the next * control step available on the hardware. */ -static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) +static bool bw_validate(char *buf, u32 *data, struct resctrl_schema *s) { int ret; u32 bw; - /* - * Only linear delay values is supported for current Intel SKUs. - */ - if (!r->membw.delay_linear && r->membw.arch_needs_linear) { - rdt_last_cmd_puts("No support for non-linear MB domains\n"); - return false; - } - ret = kstrtou32(buf, 10, &bw); if (ret) { rdt_last_cmd_printf("Invalid MB value %s\n", buf); @@ -60,18 +52,18 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) } /* Nothing else to do if software controller is enabled. */ - if (is_mba_sc(r)) { + if (is_mba_sc(s->res)) { *data = bw; return true; } - if (bw < r->membw.min_bw || bw > r->membw.max_bw) { + if (bw < s->membw.min_bw || bw > s->membw.max_bw) { rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", - bw, r->membw.min_bw, r->membw.max_bw); + bw, s->membw.min_bw, s->membw.max_bw); return false; } - *data = roundup(bw, (unsigned long)r->membw.bw_gran); + *data = resctrl_arch_round_bw(bw, s); return true; } @@ -83,13 +75,7 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, u32 closid = data->closid; u32 bw_val; - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); - return -EINVAL; - } - - if (!bw_validate(data->buf, &bw_val, r)) + if (!bw_validate(data->buf, &bw_val, s)) return -EINVAL; if (is_mba_sc(r)) { @@ -97,12 +83,40 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, return 0; } + cfg = &d->staged_config[s->conf_type]; cfg->new_ctrl = bw_val; cfg->have_new_ctrl = true; return 0; } +static bool hlim_validate(char *buf, u32 *data) +{ + int ret = kstrtou32(buf, 10, data); + + if (ret || (*data != 0 && *data != 1)) { + rdt_last_cmd_printf("Invalid MB_HLIM value %s (expect 0 or 1)\n", buf); + return false; + } + return true; +} + +static int parse_mb_hlim(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) +{ + struct resctrl_staged_config *cfg; + u32 v; + + if (!hlim_validate(data->buf, &v)) + return -EINVAL; + + cfg = &d->staged_config[s->conf_type]; + cfg->new_ctrl = v; + cfg->have_new_ctrl = true; + + return 0; +} + /* * Check whether a cache bit mask is valid. * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: @@ -164,12 +178,6 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, u32 closid = data->closid; u32 cbm_val; - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); - return -EINVAL; - } - /* * Cannot set up more than one pseudo-locked region in a cache * hierarchy. @@ -206,6 +214,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, } } + cfg = &d->staged_config[s->conf_type]; cfg->new_ctrl = cbm_val; cfg->have_new_ctrl = true; @@ -233,24 +242,39 @@ static int parse_line(char *line, struct resctrl_schema *s, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - switch (r->schema_fmt) { + switch (s->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: parse_ctrlval = &parse_cbm; break; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: parse_ctrlval = &parse_bw; break; + case RESCTRL_SCHEMA_MB_HLIM: + parse_ctrlval = &parse_mb_hlim; + break; } if (WARN_ON_ONCE(!parse_ctrlval)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && - (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { + (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA || + r->rid == RDT_RESOURCE_MB_HLIM)) { rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); return -EINVAL; } + /* + * Only linear delay values is supported for current Intel SKUs. + */ + if (r->rid == RDT_RESOURCE_MBA && + !r->mba.delay_linear && r->mba.arch_needs_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); + return -EINVAL; + } + next: if (!line || line[0] == '\0') return 0; @@ -263,13 +287,18 @@ static int parse_line(char *line, struct resctrl_schema *s, dom = strim(dom); list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (d->hdr.id == dom_id) { + cfg = &d->staged_config[t]; + if (cfg->have_new_ctrl) { + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); + return -EINVAL; + } + data.buf = dom; data.closid = rdtgrp->closid; data.mode = rdtgrp->mode; if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - cfg = &d->staged_config[t]; /* * In pseudo-locking setup mode and just * parsed a valid CBM that should be diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 24e328a5b18b2e1ac389d4ee1c8c85058ec63ff2..75dc5c7cdae753b0af6fb9a8a8bf596c06f07b0f 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -43,6 +43,7 @@ struct rdt_fs_context { bool enable_mba_mbps; bool enable_debug; bool enable_hwdrc; + bool enable_abi_playground; }; static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) @@ -252,6 +253,7 @@ struct rdtgroup { #define RFTYPE_TOP BIT(6) +/* files that are specific to a type of resource, e.g. throttle_mode */ #define RFTYPE_RES_CACHE BIT(8) #define RFTYPE_RES_MB BIT(9) @@ -262,6 +264,11 @@ struct rdtgroup { #define RFTYPE_RES_PERF_PKG BIT(12) +/* files that are specific to a type of control, e.g. percent_min */ +#define RFTYPE_SCHEMA_BITMAP BIT(13) +#define RFTYPE_SCHEMA_PERCENT BIT(14) +#define RFTYPE_SCHEMA_MBPS BIT(15) + #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) @@ -315,6 +322,8 @@ struct mbm_state { u32 prev_bw; }; +DECLARE_STATIC_KEY_FALSE(resctrl_abi_playground); + extern struct mutex rdtgroup_mutex; static inline const char *rdt_kn_name(const struct kernfs_node *kn) @@ -378,9 +387,9 @@ int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); -int resctrl_l3_mon_resource_init(void); +int resctrl_mon_init(void); -void resctrl_l3_mon_resource_exit(void); +void resctrl_mon_exit(void); void mon_event_count(void *info); @@ -467,6 +476,11 @@ ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); u32 resctrl_io_alloc_closid(struct rdt_resource *r); +int mbm_MB_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + +ssize_t mbm_MB_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 9fd901c78dc66484c37236135cf0730b627905ab..b664fb16b4c68d75a5586b1f2ad13e1a1085a3f6 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -98,12 +98,17 @@ unsigned int resctrl_rmid_realloc_limit; * * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code * must accept an attempt to read every index. + * + * Returns NULL if the rmid_ptrs[] array is not allocated. */ static inline struct rmid_entry *__rmid_entry(u32 idx) { struct rmid_entry *entry; u32 closid, rmid; + if (!rmid_ptrs) + return NULL; + entry = &rmid_ptrs[idx]; resctrl_arch_rmid_idx_decode(idx, &closid, &rmid); @@ -113,6 +118,20 @@ static inline struct rmid_entry *__rmid_entry(u32 idx) return entry; } +static bool __has_closid_num_dirty_rmid_array(void) +{ + lockdep_assert_held(&rdtgroup_mutex); + + if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + return false; + + /* + * Avoid a race with dom_data_exit() freeing the array under + * rdtgroup_mutex. + */ + return closid_num_dirty_rmid; +} + static void limbo_release_entry(struct rmid_entry *entry) { lockdep_assert_held(&rdtgroup_mutex); @@ -120,7 +139,7 @@ static void limbo_release_entry(struct rmid_entry *entry) rmid_limbo_count--; list_add_tail(&entry->list, &rmid_free_lru); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (__has_closid_num_dirty_rmid_array()) closid_num_dirty_rmid[entry->closid]--; } @@ -161,6 +180,8 @@ void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free) break; entry = __rmid_entry(idx); + if (!entry) + break; if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid, QOS_L3_OCCUP_EVENT_ID, arch_priv, &val, arch_mon_ctx)) { @@ -244,7 +265,7 @@ int resctrl_find_cleanest_closid(void) lockdep_assert_held(&rdtgroup_mutex); - if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (!__has_closid_num_dirty_rmid_array()) return -EIO; for (i = 0; i < closids_supported(); i++) { @@ -317,7 +338,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) } rmid_limbo_count++; - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (__has_closid_num_dirty_rmid_array()) closid_num_dirty_rmid[entry->closid]++; } @@ -339,6 +360,8 @@ void free_rmid(u32 closid, u32 rmid) return; entry = __rmid_entry(idx); + if (!entry) + return; if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) add_rmid_to_limbo(entry); @@ -427,7 +450,7 @@ static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) struct mbm_state *m; u64 tval = 0; - if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) { + if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, rr->r->rid)) { rr->err = -EIO; return -EINVAL; } @@ -524,6 +547,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { switch (rr->r->rid) { case RDT_RESOURCE_L3: + case RDT_RESOURCE_MBA: WARN_ON_ONCE(rr->evt->any_cpu); if (rr->hdr) return __l3_mon_event_count(rdtgrp, rr); @@ -569,7 +593,7 @@ static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) struct rdt_l3_mon_domain *d; struct mbm_state *m; - if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, rr->r->rid)) return; d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr); m = get_mbm_state(d, closid, rmid, rr->evt->evtid); @@ -895,8 +919,10 @@ void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long del /* * When a domain comes online there is no guarantee the filesystem is * mounted. If not, there is no need to catch counter overflow. + * Some architecture may have ~64bit counters, and can ignore overflow. */ - if (!resctrl_mounted || !resctrl_arch_mon_capable()) + if (!resctrl_mounted || !resctrl_arch_mon_capable() || + !resctrl_arch_mon_can_overflow()) return; cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->mbm_work_cpu = cpu; @@ -945,6 +971,7 @@ int setup_rmid_lru_list(void) idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID); entry = __rmid_entry(idx); + WARN_ON_ONCE(!entry); list_del(&entry->list); return 0; @@ -976,7 +1003,7 @@ void free_rmid_lru_list(void) */ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false), - MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false), + MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_MBA, false), MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false), MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true), MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true), @@ -1605,9 +1632,9 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, return ret; } -int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) +static int mbm_assignments_show(struct kernfs_open_file *of, struct seq_file *s, + void *v, struct rdt_resource *r) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_l3_mon_domain *d; struct rdtgroup *rdtgrp; struct mon_evt *mevt; @@ -1653,6 +1680,18 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi return ret; } +int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) +{ + return mbm_assignments_show(of, s, v, + resctrl_arch_get_resource(RDT_RESOURCE_L3)); +} + +int mbm_MB_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) +{ + return mbm_assignments_show(of, s, v, + resctrl_arch_get_resource(RDT_RESOURCE_MBA)); +} + /* * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching * event name. @@ -1747,10 +1786,10 @@ static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup return -EINVAL; } -ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) +static ssize_t mbm_assignments_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off, + struct rdt_resource *r) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdtgroup *rdtgrp; char *token, *event; int ret = 0; @@ -1792,6 +1831,20 @@ ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, return ret ?: nbytes; } +ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + return mbm_assignments_write(of, buf, nbytes, off, + resctrl_arch_get_resource(RDT_RESOURCE_L3)); +} + +ssize_t mbm_MB_assignments_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + return mbm_assignments_write(of, buf, nbytes, off, + resctrl_arch_get_resource(RDT_RESOURCE_MBA)); +} + static int closid_num_dirty_rmid_alloc(struct rdt_resource *r) { if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { @@ -1832,7 +1885,7 @@ static void closid_num_dirty_rmid_free(void) } /** - * resctrl_l3_mon_resource_init() - Initialise global monitoring structures. + * resctrl_mon_resource_init() - Initialise global monitoring structures. * * Allocate and initialise global monitor resources that do not belong to a * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID @@ -1844,27 +1897,21 @@ static void closid_num_dirty_rmid_free(void) * * Return: 0 for success, or -ENOMEM. */ -int resctrl_l3_mon_resource_init(void) +static void resctrl_mon_resource_init(struct rdt_resource *r) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - int ret; - - if (!r->mon_capable) - return 0; + unsigned long fflags; - ret = closid_num_dirty_rmid_alloc(r); - if (ret) - return ret; + fflags = (r->rid == RDT_RESOURCE_MBA) ? RFTYPE_RES_MB :RFTYPE_RES_CACHE; if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_total_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + RFTYPE_MON_INFO | fflags); } if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_local_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + RFTYPE_MON_INFO | fflags); } if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) @@ -1882,19 +1929,43 @@ int resctrl_l3_mon_resource_init(void) NON_TEMP_WRITE_TO_LOCAL_MEM); r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + RFTYPE_MON_INFO | fflags); resctrl_file_fflags_init("available_mbm_cntrs", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + RFTYPE_MON_INFO | fflags); resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG); resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO | - RFTYPE_RES_CACHE); - resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE); + fflags); + if (r->rid == RDT_RESOURCE_MBA) + resctrl_file_fflags_init("mbm_MB_assignments", RFTYPE_MON_BASE); + else + resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE); + resctrl_file_fflags_init("mbm_assign_mode", RFTYPE_MON_INFO | + fflags); } +} + +int resctrl_mon_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + int ret; + + if (!r->mon_capable) + return 0; + + ret = closid_num_dirty_rmid_alloc(r); + if (ret) + return ret; + + resctrl_mon_resource_init(r); + + r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + if (r) + resctrl_mon_resource_init(r); return 0; } -void resctrl_l3_mon_resource_exit(void) +void resctrl_mon_exit(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ec2e19a2c6fbd6f234bbae76ebef3ae9aec7cb65..b71aa1e5b6550fe8b33214377aed3f1c5adda9f8 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,9 @@ enum resctrl_event_id mba_mbps_default_event; static bool resctrl_debug; +/* Enable wacky behaviour that is not supported upstream. */ +DEFINE_STATIC_KEY_FALSE(resctrl_abi_playground); + void rdt_last_cmd_clear(void) { lockdep_assert_held(&rdtgroup_mutex); @@ -766,10 +770,65 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, return ret; } +static int rdtgroup_move_iommu(int iommu_group_id, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + const struct cred *cred = current_cred(); + struct iommu_group *iommu_group; + int err; + + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + rdt_last_cmd_printf("No permission to move iommu_group %d\n", + iommu_group_id); + return -EPERM; + } + + iommu_group = iommu_group_get_by_id(iommu_group_id); + if (!iommu_group) { + rdt_last_cmd_printf("No matching iommu_group %d\n", + iommu_group_id); + return -ESRCH; + } + + if (rdtgrp->type == RDTMON_GROUP && + !resctrl_arch_match_iommu_closid(iommu_group, + rdtgrp->mon.parent->closid)) { + rdt_last_cmd_puts("Can't move iommu_group to different control group\n"); + err = -EINVAL; + } else { + err = resctrl_arch_set_iommu_closid_rmid(iommu_group, + rdtgrp->closid, + rdtgrp->mon.rmid); + } + + iommu_group_put(iommu_group); + + return err; +} + +static bool string_is_iommu_group(char *buf, int *val) +{ + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU) || + !static_branch_unlikely(&resctrl_abi_playground)) + return false; + + if (strlen(buf) <= strlen("iommu_group:")) + return false; + + if (strncmp(buf, "iommu_group:", strlen("iommu_group:"))) + return false; + + buf += strlen("iommu_group:"); + + return !kstrtoint(buf, 0, val); +} + static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct rdtgroup *rdtgrp; + int iommu_group_id; + bool is_iommu; char *pid_str; int ret = 0; pid_t pid; @@ -791,7 +850,13 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, while (buf && buf[0] != '\0' && buf[0] != '\n') { pid_str = strim(strsep(&buf, ",")); - if (kstrtoint(pid_str, 0, &pid)) { + is_iommu = string_is_iommu_group(pid_str, &iommu_group_id); + if (is_iommu) { + ret = rdtgroup_move_iommu(iommu_group_id, rdtgrp, of); + if (ret) + break; + continue; + } else if (kstrtoint(pid_str, 0, &pid)) { rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); ret = -EINVAL; break; @@ -816,6 +881,44 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, return ret ?: nbytes; } +static bool iommu_matches_rdtgroup(struct iommu_group *group, struct rdtgroup *r) +{ + if (r->type == RDTCTRL_GROUP) + return resctrl_arch_match_iommu_closid(group, r->closid); + + return resctrl_arch_match_iommu_closid_rmid(group, r->closid, + r->mon.rmid); +} + +static void show_rdt_iommu(struct rdtgroup *r, struct seq_file *s) +{ + struct kset *iommu_groups; + struct iommu_group *group; + struct kobject *group_kobj = NULL; + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU) || + !static_branch_unlikely(&resctrl_abi_playground)) + return; + + iommu_groups = iommu_get_group_kset(); + + while ((group_kobj = kset_get_next_obj(iommu_groups, group_kobj))) { + /* iommu_group_get_from_kobj() wants to drop a reference */ + kobject_get(group_kobj); + + group = iommu_group_get_from_kobj(group_kobj); + if (!group) + continue; + + if (iommu_matches_rdtgroup(group, r)) + seq_printf(s, "iommu_group:%s\n", group_kobj->name); + + iommu_group_put(group); + } + + kset_put(iommu_groups); +} + static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) { struct task_struct *p, *t; @@ -830,6 +933,8 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) } } rcu_read_unlock(); + + show_rdt_iommu(r, s); } static int rdtgroup_tasks_show(struct kernfs_open_file *of, @@ -1004,9 +1109,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); + seq_printf(seq, "%x\n", resctrl_get_schema_default_ctrl(s)); return 0; } @@ -1147,9 +1251,8 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.min_bw); + seq_printf(seq, "%u\n", s->membw.min_bw); return 0; } @@ -1185,9 +1288,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.bw_gran); + seq_printf(seq, "%u\n", s->membw.bw_gran); return 0; } @@ -1197,7 +1299,18 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of, struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.delay_linear); + seq_printf(seq, "%u\n", r->mba.delay_linear); + return 0; +} + +static int rdt_mb_max_lim_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r = s->res; + + seq_printf(seq, "%d\n", r->membw.mb_max_lim); + return 0; } @@ -1215,7 +1328,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - switch (r->membw.throttle_mode) { + switch (r->mba.throttle_mode) { case THREAD_THROTTLE_PER_THREAD: seq_puts(seq, "per-thread\n"); return 0; @@ -1412,7 +1525,8 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) + if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA || + r->rid == RDT_RESOURCE_MB_HLIM) continue; has_cache = true; list_for_each_entry(d, &r->ctrl_domains, hdr.list) { @@ -1529,7 +1643,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct cacheinfo *ci; int num_b; - if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) + if (WARN_ON_ONCE(r->schema_fmt != RESCTRL_SCHEMA_BITMAP)) return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); @@ -1552,7 +1666,7 @@ bool is_mba_sc(struct rdt_resource *r) if (r->rid != RDT_RESOURCE_MBA) return false; - return r->membw.mba_sc; + return r->mba.mba_sc; } /* @@ -1616,11 +1730,11 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ctrl = resctrl_arch_get_config(r, d, closid, type); - if (r->rid == RDT_RESOURCE_MBA || - r->rid == RDT_RESOURCE_SMBA) - size = ctrl; - else + + if (schema->schema_fmt == RESCTRL_SCHEMA_BITMAP) size = rdtgroup_cbm_to_size(r, d, ctrl); + else + size = ctrl; } seq_printf(s, "%d=%u", d->hdr.id, size); sep = true; @@ -1690,6 +1804,33 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, return 0; } +static int resctrl_schema_format_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + seq_puts(seq, "bitmap\n"); + break; + case RESCTRL_SCHEMA_PERCENT: + seq_puts(seq, "percentage\n"); + break; + case RESCTRL_SCHEMA_MBPS: + seq_puts(seq, "mbps\n"); + break; + /* The way these schema behave isn't discoverable from resctrl */ + case RESCTRL_SCHEMA__AMD_MBA: + seq_puts(seq, "platform\n"); + break; + case RESCTRL_SCHEMA_MB_HLIM: + seq_puts(seq, "0/1\n"); + break; + } + + return 0; +} + static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 evtid, u32 val) { @@ -1931,6 +2072,13 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_num_mbm_cntrs_show, }, + { + .name = "bitmap_mask", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_default_ctrl_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_BITMAP, + }, { .name = "min_cbm_bits", .mode = 0444, @@ -1938,6 +2086,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_min_cbm_bits_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, + { + .name = "bitmaps_min_bits", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_cbm_bits_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_BITMAP, + }, { .name = "shareable_bits", .mode = 0444, @@ -1959,6 +2114,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_min_bw_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, + { + .name = "percent_min", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_bw_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_PERCENT, + }, { .name = "bandwidth_gran", .mode = 0444, @@ -1966,6 +2128,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_bw_gran_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, + { + .name = "percent_gran", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_bw_gran_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_PERCENT, + }, { .name = "delay_linear", .mode = 0444, @@ -1973,6 +2142,12 @@ static struct rftype res_common_files[] = { .seq_show = rdt_delay_linear_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, + { + .name = "max_lim", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_mb_max_lim_show, + }, /* * Platform specific which (if any) capabilities are provided by * thread_throttle_mode. Defer "fflags" initialization to platform @@ -2034,6 +2209,13 @@ static struct rftype res_common_files[] = { .seq_show = mbm_L3_assignments_show, .write = mbm_L3_assignments_write, }, + { + .name = "mbm_MB_assignments", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_MB_assignments_show, + .write = mbm_MB_assignments_write, + }, { .name = "mbm_assign_mode", .mode = 0644, @@ -2118,6 +2300,14 @@ static struct rftype res_common_files[] = { .seq_show = rdtgroup_closid_show, .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, }, + { + .name = "schema_format", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_schema_format_show, + .fflags = RFTYPE_CTRL_INFO, + }, + }; static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) @@ -2174,13 +2364,13 @@ static void thread_throttle_mode_init(void) r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); if (r_mba->alloc_capable && - r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) - throttle_mode = r_mba->membw.throttle_mode; + r_mba->mba.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_mba->mba.throttle_mode; r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA); if (r_smba->alloc_capable && - r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) - throttle_mode = r_smba->membw.throttle_mode; + r_smba->mba.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_smba->mba.throttle_mode; if (throttle_mode == THREAD_THROTTLE_UNDEFINED) return; @@ -2206,6 +2396,17 @@ static void io_alloc_init(void) } } +/* The resctrl file "max_lim" is added using MB resource if visible. */ +static void mb_max_lim_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + + if (!r->membw.arch_has_mb_max_lim) + return; + + resctrl_file_fflags_init("max_lim", RFTYPE_CTRL_INFO | RFTYPE_RES_MB); +} + void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -2400,9 +2601,40 @@ static unsigned long fflags_from_resource(struct rdt_resource *r) return RFTYPE_RES_MB; case RDT_RESOURCE_PERF_PKG: return RFTYPE_RES_PERF_PKG; + case RDT_RESOURCE_MB_HLIM: + return 0; } - return WARN_ON_ONCE(1); + return 0; +} + +static u32 fflags_from_schema(struct resctrl_schema *s) +{ + struct rdt_resource *r = s->res; + u32 fflags = 0; + + /* Some resources are configured purely from their rid */ + fflags |= fflags_from_resource(r); + if (fflags) + return fflags; + + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + fflags |= RFTYPE_SCHEMA_BITMAP; + break; + case RESCTRL_SCHEMA_PERCENT: + fflags |= RFTYPE_SCHEMA_PERCENT; + break; + case RESCTRL_SCHEMA_MBPS: + fflags |= RFTYPE_SCHEMA_MBPS; + break; + case RESCTRL_SCHEMA__AMD_MBA: + case RESCTRL_SCHEMA_MB_HLIM: + /* No standard files are exposed */ + break; + } + + return fflags; } static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) @@ -2425,7 +2657,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO; + fflags = fflags_from_schema(s) | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; @@ -2482,7 +2714,7 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, static inline bool is_mba_linear(void) { - return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->mba.delay_linear; } static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) @@ -2540,7 +2772,7 @@ static int set_mba_sc(bool mba_sc) if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) return -EINVAL; - r->membw.mba_sc = mba_sc; + r->mba.mba_sc = mba_sc; rdtgroup_default.mba_mbps_event = mba_mbps_default_event; @@ -2790,11 +3022,29 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; - switch (r->schema_fmt) { + s->schema_fmt = r->schema_fmt; + s->membw = r->membw; + + /* + * When mba_sc() is enabled the format used by user space is different + * to that expected by hardware. The conversion is done by + * update_mba_bw(). + */ + if (is_mba_sc(r)) { + s->schema_fmt = RESCTRL_SCHEMA_MBPS; + s->membw.min_bw = 0; + s->membw.max_bw = MBA_MAX_MBPS; + s->membw.bw_gran = 1; + } + + switch (s->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: s->fmt_str = "%d=%x"; break; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: + case RESCTRL_SCHEMA_MB_HLIM: s->fmt_str = "%d=%u"; break; } @@ -2843,6 +3093,42 @@ static void schemata_list_destroy(void) } } +static void hack_file_mode(const char *name, u16 mode) +{ + struct rftype *rfts, *rft; + int len; + + mutex_lock(&rdtgroup_mutex); + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + rft->mode = mode; + } + + mutex_unlock(&rdtgroup_mutex); +} + +static void enable_abi_playground(void) +{ + static_key_enable(&resctrl_abi_playground.key); + + /* Make the tasks file read only */ + if (IS_ENABLED(CONFIG_CGROUP_RESCTRL)) + hack_file_mode("tasks", 0444); +} + +static void disable_abi_playground(void) +{ + static_key_disable(&resctrl_abi_playground.key); + + /* Make the tasks file read/write only */ + if (IS_ENABLED(CONFIG_CGROUP_RESCTRL)) + hack_file_mode("tasks", 0644); +} + static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); @@ -2853,6 +3139,9 @@ static int rdt_get_tree(struct fs_context *fc) DO_ONCE_SLEEPABLE(resctrl_arch_pre_mount); + if (ctx->enable_abi_playground) + enable_abi_playground(); + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /* @@ -2960,6 +3249,10 @@ static int rdt_get_tree(struct fs_context *fc) rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + + if (ret && ctx->enable_abi_playground) + disable_abi_playground(); + return ret; } @@ -2969,6 +3262,7 @@ enum rdt_param { Opt_mba_mbps, Opt_hwdrc, Opt_debug, + Opt_not_abi_playground, nr__rdt_params }; @@ -2978,6 +3272,13 @@ static const struct fs_parameter_spec rdt_fs_parameters[] = { fsparam_flag("mba_MBps", Opt_mba_mbps), fsparam_flag("hwdrc", Opt_hwdrc), fsparam_flag("debug", Opt_debug), + + /* + * Some of MPAM's out of tree code exposes things through resctrl + * that need much more discussion before they are considered for + * mainline. Add a mount option that can be used to hide these crimes. + */ + fsparam_flag("this_is_not_abi", Opt_not_abi_playground), {} }; @@ -3014,6 +3315,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_debug: ctx->enable_debug = true; return 0; + case Opt_not_abi_playground: + ctx->enable_abi_playground = true; + return 0; } return -EINVAL; @@ -3260,6 +3564,9 @@ static void rdt_kill_sb(struct super_block *sb) kernfs_kill_sb(sb); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + + if (static_branch_unlikely(&resctrl_abi_playground)) + disable_abi_playground(); } static struct file_system_type rdt_fs_type = { @@ -3701,7 +4008,20 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) } cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = resctrl_get_default_ctrl(r); + cfg->new_ctrl = resctrl_get_resource_default_ctrl(r); + cfg->have_new_ctrl = true; + } +} + +/* Initialize MB_HLIM resource with default hardlim off (0). */ +static void rdtgroup_init_mb_hlim(struct resctrl_schema *s) +{ + struct resctrl_staged_config *cfg; + struct rdt_ctrl_domain *d; + + list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { + cfg = &d->staged_config[s->conf_type]; + cfg->new_ctrl = 0; cfg->have_new_ctrl = true; } } @@ -3722,6 +4042,8 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) rdtgroup_init_mba(r, rdtgrp->closid); if (is_mba_sc(r)) continue; + } else if (r->rid == RDT_RESOURCE_MB_HLIM) { + rdtgroup_init_mb_hlim(s); } else { ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) @@ -4315,6 +4637,9 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_debug) seq_puts(seq, ",debug"); + if (static_branch_unlikely(&resctrl_abi_playground)) + seq_puts(seq, ",this_is_not_abi"); + return 0; } @@ -4397,10 +4722,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *h if (resctrl_mounted && resctrl_arch_mon_capable()) rmdir_mondata_subdir_allrdtgrp(r, hdr); - if (r->rid != RDT_RESOURCE_L3) - goto out_unlock; - - if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid)) goto out_unlock; d = container_of(hdr, struct rdt_l3_mon_domain, hdr); @@ -4506,10 +4828,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr mutex_lock(&rdtgroup_mutex); - if (r->rid != RDT_RESOURCE_L3) - goto mkdir; - - if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid)) goto out_unlock; d = container_of(hdr, struct rdt_l3_mon_domain, hdr); @@ -4526,7 +4845,6 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); -mkdir: err = 0; /* * If the filesystem is not mounted then only the default resource group @@ -4632,13 +4950,15 @@ int resctrl_init(void) io_alloc_init(); - ret = resctrl_l3_mon_resource_init(); + mb_max_lim_init(); + + ret = resctrl_mon_init(); if (ret) return ret; ret = sysfs_create_mount_point(fs_kobj, "resctrl"); if (ret) { - resctrl_l3_mon_resource_exit(); + resctrl_mon_exit(); return ret; } @@ -4673,7 +4993,7 @@ int resctrl_init(void) cleanup_mountpoint: sysfs_remove_mount_point(fs_kobj, "resctrl"); - resctrl_l3_mon_resource_exit(); + resctrl_mon_exit(); return ret; } @@ -4736,6 +5056,6 @@ void resctrl_exit(void) * it can be used to umount resctrl. */ - resctrl_l3_mon_resource_exit(); + resctrl_mon_exit(); free_rmid_lru_list(); } diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index f92a36187a5272c65124742528ccf4bb9bf81e1b..d3983af5c679438e060b9a692cddb5b0781e96d3 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -5,6 +5,7 @@ #define __LINUX_ARM_MPAM_H #include +#include #include #include @@ -40,11 +41,11 @@ static inline int acpi_mpam_count_msc(void) { return -EINVAL; } #ifdef CONFIG_ARM64_MPAM_DRIVER int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, int component_id); + enum mpam_class_types type, u8 class_id, u32 component_id); #else static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, - int component_id) + u32 component_id) { return -EINVAL; } @@ -52,6 +53,7 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); +bool resctrl_arch_mon_can_overflow(void); void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); @@ -76,6 +78,17 @@ static inline void resctrl_arch_disable_mon(void) { } static inline void resctrl_arch_enable_alloc(void) { } static inline void resctrl_arch_disable_alloc(void) { } +struct resctrl_schema; +static inline u32 resctrl_arch_round_bw(u32 val, + const struct resctrl_schema *s __always_unused) +{ + /* + * Do nothing: for MPAM, resctrl_arch_update_one() has the necessary + * context to round the incoming value correctly. + */ + return val; +} + static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) { return val; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4177c47382826932a88649eb6454379402b89dda..591d4b7267d8b3bc1eeda58ffc616d6b99906946 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -202,6 +202,8 @@ void debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value); +void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -407,6 +409,10 @@ static inline void debugfs_create_str(const char *name, umode_t mode, char **value) { } +static inline void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value) +{ } + static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1af1c457b8a6ad2473e2b03c316a180622fceb6b..d871b0c3156813a84a686dbf4145e781a07e9b35 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -715,6 +715,12 @@ struct iommu_ops { struct iommu_domain *parent_domain, const struct iommu_user_data *user_data); + /* Per group IOMMU features */ + int (*get_group_qos_params)(struct iommu_group *group, u16 *partition, + u8 *perf_mon_grp); + int (*set_group_qos_params)(struct iommu_group *group, u16 partition, + u8 perf_mon_grp); + const struct iommu_domain_ops *default_domain_ops; struct module *owner; struct iommu_domain *identity_domain; @@ -925,12 +931,15 @@ static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) { return iommu_paging_domain_alloc_flags(dev, 0); } +struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj); +extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); +extern struct iommu_domain *iommu_get_domain_for_group(struct iommu_group *group); struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -982,6 +991,7 @@ extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); +struct kset *iommu_get_group_kset(void); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); int iommu_set_pgtable_quirks(struct iommu_domain *domain, @@ -1210,6 +1220,10 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); +int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp); +int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp); /* PCI device reset functions */ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev); @@ -1241,6 +1255,16 @@ static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) return ERR_PTR(-ENODEV); } +static inline struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj) +{ + return NULL; +} + +static inline struct iommu_group *iommu_group_get_by_id(int id) +{ + return NULL; +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } @@ -1399,6 +1423,11 @@ static inline int iommu_group_id(struct iommu_group *group) return -ENODEV; } +static inline struct kset *iommu_get_group_kset(void) +{ + return NULL; +} + static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { @@ -1537,6 +1566,17 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) } static inline void iommu_free_global_pasid(ioasid_t pasid) {} +static inline int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp) +{ + return -ENODEV; +} + +static inline int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp) +{ + return -ENODEV; +} static inline int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) { diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 6f4f63d05643ba04a4f16f25f26d41d1c6cee7fa..3692b279cb22a2c54df5128d7819831a0b5159fd 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -216,6 +216,8 @@ static inline const struct kobj_type *get_ktype(const struct kobject *kobj) struct kobject *kset_find_obj(struct kset *, const char *); +struct kobject *kset_get_next_obj(struct kset *kset, struct kobject *prev); + /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; /* The global /sys/kernel/mm/ kobject for people to chain off of */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b6cea5e2caf7c651d2613859e65a8042cd116de3..9ef861e795f805a3c86b91b6c92375136a5d71f5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -3,6 +3,7 @@ #define _RESCTRL_H #include +#include #include #include #include @@ -54,6 +55,9 @@ enum resctrl_res_level { RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, RDT_RESOURCE_PERF_PKG, + RDT_RESOURCE_L3_MAX, + RDT_RESOURCE_L2_MAX, + RDT_RESOURCE_MB_HLIM, /* Must be the last */ RDT_NUM_RESOURCES, @@ -137,7 +141,7 @@ enum resctrl_domain_type { */ struct rdt_domain_hdr { struct list_head list; - int id; + u32 id; enum resctrl_domain_type type; enum resctrl_res_level rid; struct cpumask cpu_mask; @@ -247,24 +251,34 @@ enum membw_throttle_mode { * @min_bw: Minimum memory bandwidth percentage user can request * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated - * @delay_linear: True if memory B/W delay is in linear scale - * @arch_needs_linear: True if we can't configure non-linear resources - * @throttle_mode: Bandwidth throttling mode when threads request - * different memory bandwidths - * @mba_sc: True if MBA software controller(mba_sc) is enabled * @hwdrc: True if memory bandwidth HWDRC is enabled - * @mb_map: Mapping of memory B/W percentage to memory B/W delay + * @mb_max_lim: MPAM MAX_LIM encoding (MPAMF_MBW_IDR); invalid elsewhere + * @arch_has_mb_max_lim:True if mb_max_lim is supported */ struct resctrl_membw { u32 min_bw; u32 max_bw; u32 bw_gran; - u32 delay_linear; - bool arch_needs_linear; - enum membw_throttle_mode throttle_mode; - bool mba_sc; bool hwdrc; + u8 mb_max_lim; + bool arch_has_mb_max_lim; +}; + +/** + * struct resctrl_mba - Resource properties that are specific to the MBA resource + * @mba_sc: True if MBA software controller(mba_sc) is enabled + * @mb_map: Mapping of memory B/W percentage to memory B/W delay + * @delay_linear: True if control is in linear scale + * @arch_needs_linear: True if we can't configure non-linear resources + * @throttle_mode: Mode when threads request different control values + */ +struct resctrl_mba { + bool mba_sc; u32 *mb_map; + bool delay_linear; + bool arch_needs_linear; + enum membw_throttle_mode throttle_mode; + }; struct resctrl_schema; @@ -279,11 +293,18 @@ enum resctrl_scope { /** * enum resctrl_schema_fmt - The format user-space provides for a schema. * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. - * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + * @RESCTRL_SCHEMA_PERCENT: The schema is a percentage. + * @RESCTRL_SCHEMA_MBPS: The schema ia a MBps value. + * @RESCTRL_SCHEMA__AMD_MBA: The schema value is MBA for AMD platforms. + * @RESCTRL_SCHEMA_MB_HLIM: Per-domain MBW max hard limit (0/1), ARM MPAM only + * when MPAMF_MBW_IDR.MAX_LIM is 0b00 (HARDLIM RW). */ enum resctrl_schema_fmt { RESCTRL_SCHEMA_BITMAP, - RESCTRL_SCHEMA_RANGE, + RESCTRL_SCHEMA_PERCENT, + RESCTRL_SCHEMA_MBPS, + RESCTRL_SCHEMA__AMD_MBA, + RESCTRL_SCHEMA_MB_HLIM, }; /** @@ -316,8 +337,12 @@ struct resctrl_mon { * @mon: Monitoring related data. * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource + * @mba: Properties of the MBA resource * @name: Name to use in "schemata" file. - * @schema_fmt: Which format string and parser is used for this schema. + * @schema_fmt: Which format control parameters should be in for this resource. + * @evt_list: List of monitoring events + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource * @hwdrc_capable: Is the hardware Dynamic Resource Controller available * on this resource. @@ -331,6 +356,7 @@ struct rdt_resource { struct resctrl_cache cache; struct resctrl_membw membw; struct resctrl_mon mon; + struct resctrl_mba mba; struct list_head ctrl_domains; struct list_head mon_domains; char *name; @@ -352,9 +378,12 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. * @fmt_str: Format string to show domain value. + * @schema_fmt: Which format string and parser is used for this schema. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. + * @membw The properties of the schema which may be different to the format + * that was specified by the resource, * @num_closid: The number of closid that can be used with this schema. When * features like CDP are enabled, this will be lower than the * hardware supports for the resource. @@ -363,8 +392,10 @@ struct resctrl_schema { struct list_head list; char name[8]; const char *fmt_str; + enum resctrl_schema_fmt schema_fmt; enum resctrl_conf_type conf_type; struct rdt_resource *res; + struct resctrl_membw membw; u32 num_closid; }; @@ -398,17 +429,42 @@ struct resctrl_mon_config_info { void resctrl_arch_sync_cpu_closid_rmid(void *info); /** - * resctrl_get_default_ctrl() - Return the default control value for this - * resource. - * @r: The resource whose default control type is queried. + * resctrl_get_resource_default_ctrl() - Return the default control value for + * this resource. + * @r: The resource whose default control value is queried. */ -static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +static inline u32 resctrl_get_resource_default_ctrl(struct rdt_resource *r) { switch (r->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: return BIT_MASK(r->cache.cbm_len) - 1; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: return r->membw.max_bw; + case RESCTRL_SCHEMA_MB_HLIM: + return 0; + } + + return WARN_ON_ONCE(1); +} + +/** + * resctrl_get_schema_default_ctrl() - Return the default control value for + * this schema. + * @s: The schema whose default control value is queried. + */ +static inline u32 resctrl_get_schema_default_ctrl(struct resctrl_schema *s) +{ + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return resctrl_get_resource_default_ctrl(s->res); + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: + return s->membw.max_bw; + case RESCTRL_SCHEMA_MB_HLIM: + return 0; } return WARN_ON_ONCE(1); @@ -505,6 +561,8 @@ bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r); */ int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable); +u32 resctrl_arch_round_bw(u32 val, const struct resctrl_schema *s); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -710,6 +768,7 @@ extern unsigned int resctrl_rmid_realloc_limit; int resctrl_init(void); void resctrl_exit(void); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK u64 resctrl_arch_get_prefetch_disable_bits(void); int resctrl_arch_pseudo_lock_fn(void *_plr); @@ -723,4 +782,30 @@ static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } #endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ + +/* When supported, the architecture must implement these */ +#ifdef CONFIG_RESCTRL_IOMMU +int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid); +bool resctrl_arch_match_iommu_closid(struct iommu_group *group, u32 closid); +bool resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid); +#else +static inline int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + return -EOPNOTSUPP; +} +static inline bool resctrl_arch_match_iommu_closid(struct iommu_group *group, + u32 closid) +{ + return false; +} +static inline bool +resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + return false; +} +#endif /* CONFIG_RESCTRL_IOMMU */ #endif /* _RESCTRL_H */ diff --git a/lib/kobject.c b/lib/kobject.c index 9c9ff0f5175fb4a6576f83518144e4ba69ea4c2f..518d95cce9755a9ac0ab328f13811de9013b6c9c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -920,6 +920,27 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name) } EXPORT_SYMBOL_GPL(kset_find_obj); +struct kobject *kset_get_next_obj(struct kset *kset, struct kobject *prev) +{ + struct kobject *k; + + spin_lock(&kset->list_lock); + + if (!prev) + k = list_first_entry_or_null(&kset->list, typeof(*k), entry); + else + k = list_next_entry(prev, entry); + + if (list_entry_is_head(k, &kset->list, entry)) + k = NULL; + + kobject_get(k); + spin_unlock(&kset->list_lock); + kobject_put(prev); + + return k; +} + static void kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj);