From 46663cb12756e361785512f963a20760c91019ea Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 13 Nov 2025 17:53:17 +0800 Subject: [PATCH 1/4] RDMA/core: Prevent soft lockup during large user memory region cleanup maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDA3T6 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=d056bc45b62b59 ---------------------------------------------------------------------- When a process exits with numerous large, pinned memory regions consisting of 4KB pages, the cleanup of the memory region through __ib_umem_release() may cause soft lockups. This is because unpin_user_page_range_dirty_lock() is called in a tight loop for unpin and releasing page without yielding the CPU. watchdog: BUG: soft lockup - CPU#44 stuck for 26s! [python3:73464] Kernel panic - not syncing: softlockup: hung tasks CPU: 44 PID: 73464 Comm: python3 Tainted: G OEL asm_sysvec_apic_timer_interrupt+0x1b/0x20 RIP: 0010:free_unref_page+0xff/0x190 ? free_unref_page+0xe3/0x190 __put_page+0x77/0xe0 put_compound_head+0xed/0x100 unpin_user_page_range_dirty_lock+0xb2/0x180 __ib_umem_release+0x57/0xb0 [ib_core] ib_umem_release+0x3f/0xd0 [ib_core] mlx5_ib_dereg_mr+0x2e9/0x440 [mlx5_ib] ib_dereg_mr_user+0x43/0xb0 [ib_core] uverbs_free_mr+0x15/0x20 [ib_uverbs] destroy_hw_idr_uobject+0x21/0x60 [ib_uverbs] uverbs_destroy_uobject+0x38/0x1b0 [ib_uverbs] __uverbs_cleanup_ufile+0xd1/0x150 [ib_uverbs] uverbs_destroy_ufile_hw+0x3f/0x100 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x9c/0x280 ____fput+0xe/0x20 task_work_run+0x6a/0xb0 do_exit+0x217/0x3c0 do_group_exit+0x3b/0xb0 get_signal+0x150/0x900 arch_do_signal_or_restart+0xde/0x100 exit_to_user_mode_loop+0xc4/0x160 exit_to_user_mode_prepare+0xa0/0xb0 syscall_exit_to_user_mode+0x27/0x50 do_syscall_64+0x63/0xb0 Fix soft lockup issues by incorporating cond_resched() calls within __ib_umem_release(), and this SG entries are typically grouped in 2MB chunks on x86_64, adding cond_resched() should has minimal performance impact. Signed-off-by: Li RongQing Link: https://patch.msgid.link/20251113095317.2628-1-lirongqing@baidu.com Acked-by: Junxian Huang Signed-off-by: Leon Romanovsky Signed-off-by: Donghua Huang --- drivers/infiniband/core/umem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 2ed282cd2940..b6af70a7023a 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -58,9 +58,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i) + for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i) { unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); + cond_resched(); + } sg_free_table(&umem->sg_head); } -- Gitee From 7b9480b4f1e89d1089048e20619a289aca7be846 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 26 Nov 2025 10:51:47 +0800 Subject: [PATCH 2/4] RDMA/core: Reduce cond_resched() frequency in __ib_umem_release maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDA3T6 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=f37e2868792335 ---------------------------------------------------------------------- The current implementation calls cond_resched() for every SG entry in __ib_umem_release(), which can increase needless overhead. This patch introduces RESCHED_LOOP_CNT_THRESHOLD (0x1000) to limit how often cond_resched() is called. The function now yields the CPU once every 4096 iterations, and yield at the very first iteration for lots of small umem case, to reduce scheduling overhead. Signed-off-by: Li RongQing Link: https://patch.msgid.link/20251126025147.2627-1-lirongqing@baidu.com Signed-off-by: Leon Romanovsky Signed-off-by: Donghua Huang --- drivers/infiniband/core/umem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b6af70a7023a..aba848f6c47c 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -48,6 +48,8 @@ #include "ib_peer_mem.h" #endif +#define RESCHED_LOOP_CNT_THRESHOLD 0x1000 + static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { bool make_dirty = umem->writable && dirty; @@ -61,7 +63,9 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i) { unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); - cond_resched(); + + if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD)) + cond_resched(); } sg_free_table(&umem->sg_head); -- Gitee From a0ea24c79334dfa21ca8bbcc3fc507712a547fac Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Mon, 1 Dec 2025 17:16:31 +0800 Subject: [PATCH 3/4] RDMA/hns: Fix memory issues in roce reserve PD/CQ driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDA3T6 ---------------------------------------------------------------------- The reserve PD/CQ variable is allocated using `kzalloc()`, but it is freed using `kvfree()`, which does not comply with the API usage guidelines. Here fix it. Signed-off-by: wenglianfa Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fcd953e284d1..6bc16347b378 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3039,7 +3039,7 @@ static void free_mr_uninit_pd(struct hns_roce_dev *hr_dev) return; hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL); - kvfree(free_mr->rsv_pd); + kfree(free_mr->rsv_pd); free_mr->rsv_pd = NULL; } @@ -3089,7 +3089,7 @@ static void free_mr_uninit_cq(struct hns_roce_dev *hr_dev) return; hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL); - kvfree(free_mr->rsv_cq); + kfree(free_mr->rsv_cq); free_mr->rsv_cq = NULL; } -- Gitee From 4a87f101cf58adbd7d0a16f273ef46c9e6a277a0 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Mon, 1 Dec 2025 17:02:08 +0800 Subject: [PATCH 4/4] RDMA/hns: Fix a memory issue in roce bonding driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDA3T6 ---------------------------------------------------------------------- After the fix here, the `die_info` structure is allocated using `kvzalloc()` and always freed using `kvfree()`, ensuring compliance with the API usage guidelines. Signed-off-by: wenglianfa Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_bond.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index e41a11cc7c3c..19ef7b8cf77d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -589,13 +589,13 @@ static struct hns_roce_die_info *alloc_die_info(int bus_num) struct hns_roce_die_info *die_info; int ret; - die_info = kzalloc(sizeof(struct hns_roce_die_info), GFP_KERNEL); + die_info = kvzalloc(sizeof(struct hns_roce_die_info), GFP_KERNEL); if (!die_info) return NULL; ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL)); if (ret) { - kfree(die_info); + kvfree(die_info); return NULL; } -- Gitee