Skip to content

Commit a710047

Browse files
committed
crypto: iaa - Optimize rebalance_wq_table()
JIRA: https://issues.redhat.com/browse/RHEL-95629 Upstream Status: merged into the linux.git commit 714ca27 Author: Yury Norov <yury.norov@gmail.com> Date: Thu May 8 15:59:50 2025 -0400 crypto: iaa - Optimize rebalance_wq_table() The function opencodes for_each_cpu() by using a plain for-loop. The loop calls cpumask_weight() inside the conditional section. Because cpumask_weight() is O(1), the overall complexity of the function is O(node * node_cpus^2). Also, cpumask_nth() internally calls hweight(), which, if not hardware accelerated, is slower than cpumask_next() in for_each_cpu(). If switched to the dedicated for_each_cpu(), the rebalance_wq_table() can drop calling cpumask_weight(), together with some housekeeping code. This makes the overall complexity O(node * node_cpus), or simply speaking O(nr_cpu_ids). While there, fix opencoded for_each_possible_cpu() too. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Vladis Dronov <vdronov@redhat.com>
1 parent 5a15c87 commit a710047

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

drivers/crypto/intel/iaa/iaa_crypto_main.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,7 @@ static int wq_table_add_wqs(int iaa, int cpu)
896896
static void rebalance_wq_table(void)
897897
{
898898
const struct cpumask *node_cpus;
899-
int node, cpu, iaa = -1;
899+
int node_cpu, node, cpu, iaa = 0;
900900

901901
if (nr_iaa == 0)
902902
return;
@@ -907,36 +907,29 @@ static void rebalance_wq_table(void)
907907
clear_wq_table();
908908

909909
if (nr_iaa == 1) {
910-
for (cpu = 0; cpu < nr_cpus; cpu++) {
911-
if (WARN_ON(wq_table_add_wqs(0, cpu))) {
912-
pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
913-
return;
914-
}
910+
for_each_possible_cpu(cpu) {
911+
if (WARN_ON(wq_table_add_wqs(0, cpu)))
912+
goto err;
915913
}
916914

917915
return;
918916
}
919917

920918
for_each_node_with_cpus(node) {
919+
cpu = 0;
921920
node_cpus = cpumask_of_node(node);
922921

923-
for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
924-
int node_cpu = cpumask_nth(cpu, node_cpus);
925-
926-
if (WARN_ON(node_cpu >= nr_cpu_ids)) {
927-
pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
928-
return;
929-
}
930-
931-
if ((cpu % cpus_per_iaa) == 0)
932-
iaa++;
933-
934-
if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
935-
pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
936-
return;
937-
}
922+
for_each_cpu(node_cpu, node_cpus) {
923+
iaa = cpu / cpus_per_iaa;
924+
if (WARN_ON(wq_table_add_wqs(iaa, node_cpu)))
925+
goto err;
926+
cpu++;
938927
}
939928
}
929+
930+
return;
931+
err:
932+
pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
940933
}
941934

942935
static inline int check_completion(struct device *dev,

0 commit comments

Comments
 (0)