Skip to content

Commit b14472d

Browse files
committed
kernfs: Fix UAF in polling when open file is released
commit 3c9ba27 upstream. JIRA: https://issues.redhat.com/browse/RHEL-122088 Conflicts: Dropped llseek bits, as commit 0fedefd (kernfs: sysfs: support custom llseek method for sysfs entries) is not part of RHEL-9 CVE: CVE-2025-39881 A use-after-free (UAF) vulnerability was identified in the PSI (Pressure Stall Information) monitoring mechanism: BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140 Read of size 8 at addr ffff3de3d50bd308 by task systemd/1 psi_trigger_poll+0x3c/0x140 cgroup_pressure_poll+0x70/0xa0 cgroup_file_poll+0x8c/0x100 kernfs_fop_poll+0x11c/0x1c0 ep_item_poll.isra.0+0x188/0x2c0 Allocated by task 1: cgroup_file_open+0x88/0x388 kernfs_fop_open+0x73c/0xaf0 do_dentry_open+0x5fc/0x1200 vfs_open+0xa0/0x3f0 do_open+0x7e8/0xd08 path_openat+0x2fc/0x6b0 do_filp_open+0x174/0x368 Freed by task 8462: cgroup_file_release+0x130/0x1f8 kernfs_drain_open_files+0x17c/0x440 kernfs_drain+0x2dc/0x360 kernfs_show+0x1b8/0x288 cgroup_file_show+0x150/0x268 cgroup_pressure_write+0x1dc/0x340 cgroup_file_write+0x274/0x548 Reproduction Steps: 1. Open test/cpu.pressure and establish epoll monitoring 2. Disable monitoring: echo 0 > test/cgroup.pressure 3. Re-enable monitoring: echo 1 > test/cgroup.pressure The race condition occurs because: 1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it: - Releases PSI triggers via cgroup_file_release() - Frees of->priv through kernfs_drain_open_files() 2. While epoll still holds reference to the file and continues polling 3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv epolling disable/enable cgroup.pressure fd=open(cpu.pressure) while(1) ... epoll_wait kernfs_fop_poll kernfs_get_active = true echo 0 > cgroup.pressure ... cgroup_file_show kernfs_show // inactive kn kernfs_drain_open_files cft->release(of); kfree(ctx); ... kernfs_get_active = false echo 1 > cgroup.pressure kernfs_show kernfs_activate_one(kn); kernfs_fop_poll kernfs_get_active = true cgroup_file_poll psi_trigger_poll // UAF ... end: close(fd) To address this issue, introduce kernfs_get_active_of() for kernfs open files to obtain active references. This function will fail if the open file has been released. Replace kernfs_get_active() with kernfs_get_active_of() to prevent further operations on released file descriptors. Fixes: 34f26a1 ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface") Cc: stable <stable@kernel.org> Reported-by: Zhang Zhaotian <zhangzhaotian@huawei.com> Signed-off-by: Chen Ridong <chenridong@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com [ Drop llseek bits ] Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> (cherry picked from commit 854baafc00c433cccbe0ab4231b77aeb9b637b77) Signed-off-by: Pavel Reichl <preichl@redhat.com>
1 parent 7576bc3 commit b14472d

File tree

1 file changed

+36
-18
lines changed

1 file changed

+36
-18
lines changed

fs/kernfs/file.c

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
7070
!list_empty(&of->list));
7171
}
7272

73+
/* Get active reference to kernfs node for an open file */
74+
static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
75+
{
76+
/* Skip if file was already released */
77+
if (unlikely(of->released))
78+
return NULL;
79+
80+
if (!kernfs_get_active(of->kn))
81+
return NULL;
82+
83+
return of;
84+
}
85+
86+
static void kernfs_put_active_of(struct kernfs_open_file *of)
87+
{
88+
return kernfs_put_active(of->kn);
89+
}
90+
7391
/**
7492
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
7593
*
@@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
139157

140158
if (ops->seq_stop)
141159
ops->seq_stop(sf, v);
142-
kernfs_put_active(of->kn);
160+
kernfs_put_active_of(of);
143161
}
144162

145163
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
@@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
152170
* the ops aren't called concurrently for the same open file.
153171
*/
154172
mutex_lock(&of->mutex);
155-
if (!kernfs_get_active(of->kn))
173+
if (!kernfs_get_active_of(of))
156174
return ERR_PTR(-ENODEV);
157175

158176
ops = kernfs_ops(of->kn);
@@ -243,7 +261,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
243261
* the ops aren't called concurrently for the same open file.
244262
*/
245263
mutex_lock(&of->mutex);
246-
if (!kernfs_get_active(of->kn)) {
264+
if (!kernfs_get_active_of(of)) {
247265
len = -ENODEV;
248266
mutex_unlock(&of->mutex);
249267
goto out_free;
@@ -257,7 +275,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
257275
else
258276
len = -EINVAL;
259277

260-
kernfs_put_active(of->kn);
278+
kernfs_put_active_of(of);
261279
mutex_unlock(&of->mutex);
262280

263281
if (len < 0)
@@ -328,7 +346,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
328346
* the ops aren't called concurrently for the same open file.
329347
*/
330348
mutex_lock(&of->mutex);
331-
if (!kernfs_get_active(of->kn)) {
349+
if (!kernfs_get_active_of(of)) {
332350
mutex_unlock(&of->mutex);
333351
len = -ENODEV;
334352
goto out_free;
@@ -340,7 +358,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
340358
else
341359
len = -EINVAL;
342360

343-
kernfs_put_active(of->kn);
361+
kernfs_put_active_of(of);
344362
mutex_unlock(&of->mutex);
345363

346364
if (len > 0)
@@ -362,13 +380,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
362380
if (!of->vm_ops)
363381
return;
364382

365-
if (!kernfs_get_active(of->kn))
383+
if (!kernfs_get_active_of(of))
366384
return;
367385

368386
if (of->vm_ops->open)
369387
of->vm_ops->open(vma);
370388

371-
kernfs_put_active(of->kn);
389+
kernfs_put_active_of(of);
372390
}
373391

374392
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
@@ -380,14 +398,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
380398
if (!of->vm_ops)
381399
return VM_FAULT_SIGBUS;
382400

383-
if (!kernfs_get_active(of->kn))
401+
if (!kernfs_get_active_of(of))
384402
return VM_FAULT_SIGBUS;
385403

386404
ret = VM_FAULT_SIGBUS;
387405
if (of->vm_ops->fault)
388406
ret = of->vm_ops->fault(vmf);
389407

390-
kernfs_put_active(of->kn);
408+
kernfs_put_active_of(of);
391409
return ret;
392410
}
393411

@@ -400,7 +418,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
400418
if (!of->vm_ops)
401419
return VM_FAULT_SIGBUS;
402420

403-
if (!kernfs_get_active(of->kn))
421+
if (!kernfs_get_active_of(of))
404422
return VM_FAULT_SIGBUS;
405423

406424
ret = 0;
@@ -409,7 +427,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
409427
else
410428
file_update_time(file);
411429

412-
kernfs_put_active(of->kn);
430+
kernfs_put_active_of(of);
413431
return ret;
414432
}
415433

@@ -423,14 +441,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
423441
if (!of->vm_ops)
424442
return -EINVAL;
425443

426-
if (!kernfs_get_active(of->kn))
444+
if (!kernfs_get_active_of(of))
427445
return -EINVAL;
428446

429447
ret = -EINVAL;
430448
if (of->vm_ops->access)
431449
ret = of->vm_ops->access(vma, addr, buf, len, write);
432450

433-
kernfs_put_active(of->kn);
451+
kernfs_put_active_of(of);
434452
return ret;
435453
}
436454

@@ -460,7 +478,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
460478
mutex_lock(&of->mutex);
461479

462480
rc = -ENODEV;
463-
if (!kernfs_get_active(of->kn))
481+
if (!kernfs_get_active_of(of))
464482
goto out_unlock;
465483

466484
ops = kernfs_ops(of->kn);
@@ -493,7 +511,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
493511
of->vm_ops = vma->vm_ops;
494512
vma->vm_ops = &kernfs_vm_ops;
495513
out_put:
496-
kernfs_put_active(of->kn);
514+
kernfs_put_active_of(of);
497515
out_unlock:
498516
mutex_unlock(&of->mutex);
499517

@@ -847,15 +865,15 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
847865
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
848866
__poll_t ret;
849867

850-
if (!kernfs_get_active(kn))
868+
if (!kernfs_get_active_of(of))
851869
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
852870

853871
if (kn->attr.ops->poll)
854872
ret = kn->attr.ops->poll(of, wait);
855873
else
856874
ret = kernfs_generic_poll(of, wait);
857875

858-
kernfs_put_active(kn);
876+
kernfs_put_active_of(of);
859877
return ret;
860878
}
861879

0 commit comments

Comments
 (0)