]>
Commit | Line | Data |
---|---|---|
3dcf60bc | 1 | // SPDX-License-Identifier: GPL-2.0 |
22e2c507 JA |
2 | /* |
3 | * fs/ioprio.c | |
4 | * | |
0fe23479 | 5 | * Copyright (C) 2004 Jens Axboe <[email protected]> |
22e2c507 JA |
6 | * |
7 | * Helper functions for setting/querying io priorities of processes. The | |
8 | * system calls closely mimmick getpriority/setpriority, see the man page for | |
9 | * those. The prio argument is a composite of prio class and prio data, where | |
10 | * the data argument has meaning within that class. The standard scheduling | |
11 | * classes have 8 distinct prio levels, with 0 being the highest prio and 7 | |
12 | * being the lowest. | |
13 | * | |
14 | * IOW, setting BE scheduling class with prio 2 is done ala: | |
15 | * | |
16 | * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; | |
17 | * | |
18 | * ioprio_set(PRIO_PROCESS, pid, prio); | |
19 | * | |
898bd37a | 20 | * See also Documentation/block/ioprio.rst |
22e2c507 JA |
21 | * |
22 | */ | |
5a0e3ad6 | 23 | #include <linux/gfp.h> |
22e2c507 JA |
24 | #include <linux/kernel.h> |
25 | #include <linux/ioprio.h> | |
5b825c3a | 26 | #include <linux/cred.h> |
22e2c507 | 27 | #include <linux/blkdev.h> |
16f7e0fe | 28 | #include <linux/capability.h> |
9abdc4cd | 29 | #include <linux/syscalls.h> |
03e68060 | 30 | #include <linux/security.h> |
b488893a | 31 | #include <linux/pid_namespace.h> |
22e2c507 | 32 | |
aa434577 | 33 | int ioprio_check_cap(int ioprio) |
22e2c507 JA |
34 | { |
35 | int class = IOPRIO_PRIO_CLASS(ioprio); | |
eca20409 | 36 | int level = IOPRIO_PRIO_LEVEL(ioprio); |
22e2c507 JA |
37 | |
38 | switch (class) { | |
39 | case IOPRIO_CLASS_RT: | |
94c4b4fd AD |
40 | /* |
41 | * Originally this only checked for CAP_SYS_ADMIN, | |
42 | * which was implicitly allowed for pid 0 by security | |
43 | * modules such as SELinux. Make sure we check | |
44 | * CAP_SYS_ADMIN first to avoid a denial/avc for | |
45 | * possibly missing CAP_SYS_NICE permission. | |
46 | */ | |
47 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) | |
22e2c507 | 48 | return -EPERM; |
df561f66 | 49 | fallthrough; |
e29387eb | 50 | /* rt has prio field too */ |
22e2c507 | 51 | case IOPRIO_CLASS_BE: |
eca20409 | 52 | if (level >= IOPRIO_NR_LEVELS) |
22e2c507 | 53 | return -EINVAL; |
22e2c507 JA |
54 | break; |
55 | case IOPRIO_CLASS_IDLE: | |
56 | break; | |
8ec680e4 | 57 | case IOPRIO_CLASS_NONE: |
eca20409 | 58 | if (level) |
8ec680e4 JA |
59 | return -EINVAL; |
60 | break; | |
01584c1e | 61 | case IOPRIO_CLASS_INVALID: |
22e2c507 JA |
62 | default: |
63 | return -EINVAL; | |
64 | } | |
65 | ||
aa434577 AM |
66 | return 0; |
67 | } | |
68 | ||
69 | SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) | |
70 | { | |
71 | struct task_struct *p, *g; | |
72 | struct user_struct *user; | |
73 | struct pid *pgrp; | |
74 | kuid_t uid; | |
75 | int ret; | |
76 | ||
77 | ret = ioprio_check_cap(ioprio); | |
78 | if (ret) | |
79 | return ret; | |
80 | ||
22e2c507 | 81 | ret = -ESRCH; |
d69b78ba | 82 | rcu_read_lock(); |
22e2c507 JA |
83 | switch (which) { |
84 | case IOPRIO_WHO_PROCESS: | |
85 | if (!who) | |
86 | p = current; | |
87 | else | |
228ebcbe | 88 | p = find_task_by_vpid(who); |
22e2c507 JA |
89 | if (p) |
90 | ret = set_task_ioprio(p, ioprio); | |
91 | break; | |
92 | case IOPRIO_WHO_PGRP: | |
93 | if (!who) | |
41487c65 EB |
94 | pgrp = task_pgrp(current); |
95 | else | |
b488893a | 96 | pgrp = find_vpid(who); |
40c7fd3f PZ |
97 | |
98 | read_lock(&tasklist_lock); | |
2d70b68d | 99 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
22e2c507 | 100 | ret = set_task_ioprio(p, ioprio); |
40c7fd3f PZ |
101 | if (ret) { |
102 | read_unlock(&tasklist_lock); | |
103 | goto out; | |
104 | } | |
2d70b68d | 105 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
40c7fd3f PZ |
106 | read_unlock(&tasklist_lock); |
107 | ||
22e2c507 JA |
108 | break; |
109 | case IOPRIO_WHO_USER: | |
7b44ab97 EB |
110 | uid = make_kuid(current_user_ns(), who); |
111 | if (!uid_valid(uid)) | |
112 | break; | |
22e2c507 | 113 | if (!who) |
86a264ab | 114 | user = current_user(); |
22e2c507 | 115 | else |
7b44ab97 | 116 | user = find_user(uid); |
22e2c507 JA |
117 | |
118 | if (!user) | |
119 | break; | |
120 | ||
612dafab | 121 | for_each_process_thread(g, p) { |
8639b461 BS |
122 | if (!uid_eq(task_uid(p), uid) || |
123 | !task_pid_vnr(p)) | |
22e2c507 JA |
124 | continue; |
125 | ret = set_task_ioprio(p, ioprio); | |
126 | if (ret) | |
78bd4d48 | 127 | goto free_uid; |
612dafab | 128 | } |
78bd4d48 | 129 | free_uid: |
22e2c507 JA |
130 | if (who) |
131 | free_uid(user); | |
132 | break; | |
133 | default: | |
134 | ret = -EINVAL; | |
135 | } | |
136 | ||
40c7fd3f | 137 | out: |
d69b78ba | 138 | rcu_read_unlock(); |
22e2c507 JA |
139 | return ret; |
140 | } | |
141 | ||
893e5d32 JK |
142 | /* |
143 | * If the task has set an I/O priority, use that. Otherwise, return | |
144 | * the default I/O priority. | |
145 | * | |
146 | * Expected to be called for current task or with task_lock() held to keep | |
147 | * io_context stable. | |
148 | */ | |
149 | int __get_task_ioprio(struct task_struct *p) | |
150 | { | |
151 | struct io_context *ioc = p->io_context; | |
152 | int prio; | |
153 | ||
154 | if (p != current) | |
155 | lockdep_assert_held(&p->alloc_lock); | |
156 | if (ioc) | |
157 | prio = ioc->ioprio; | |
158 | else | |
159 | prio = IOPRIO_DEFAULT; | |
160 | ||
161 | if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) | |
162 | prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), | |
163 | task_nice_ioprio(p)); | |
164 | return prio; | |
165 | } | |
166 | EXPORT_SYMBOL_GPL(__get_task_ioprio); | |
167 | ||
a1836a42 DQ |
168 | static int get_task_ioprio(struct task_struct *p) |
169 | { | |
170 | int ret; | |
171 | ||
172 | ret = security_task_getioprio(p); | |
173 | if (ret) | |
174 | goto out; | |
4b838d9e JK |
175 | task_lock(p); |
176 | ret = __get_task_ioprio(p); | |
177 | task_unlock(p); | |
178 | out: | |
179 | return ret; | |
180 | } | |
181 | ||
182 | /* | |
183 | * Return raw IO priority value as set by userspace. We use this for | |
184 | * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and | |
185 | * also so that userspace can distinguish unset IO priority (which just gets | |
186 | * overriden based on task's nice value) from IO priority set to some value. | |
187 | */ | |
188 | static int get_task_raw_ioprio(struct task_struct *p) | |
189 | { | |
190 | int ret; | |
191 | ||
192 | ret = security_task_getioprio(p); | |
193 | if (ret) | |
194 | goto out; | |
8ba86821 | 195 | task_lock(p); |
fd0928df JA |
196 | if (p->io_context) |
197 | ret = p->io_context->ioprio; | |
4b838d9e JK |
198 | else |
199 | ret = IOPRIO_DEFAULT; | |
8ba86821 | 200 | task_unlock(p); |
a1836a42 DQ |
201 | out: |
202 | return ret; | |
203 | } | |
204 | ||
fc25545e | 205 | static int ioprio_best(unsigned short aprio, unsigned short bprio) |
e014ff8d | 206 | { |
9a87182c | 207 | return min(aprio, bprio); |
e014ff8d ON |
208 | } |
209 | ||
938bb9f5 | 210 | SYSCALL_DEFINE2(ioprio_get, int, which, int, who) |
22e2c507 JA |
211 | { |
212 | struct task_struct *g, *p; | |
213 | struct user_struct *user; | |
41487c65 | 214 | struct pid *pgrp; |
7b44ab97 | 215 | kuid_t uid; |
22e2c507 | 216 | int ret = -ESRCH; |
a1836a42 | 217 | int tmpio; |
22e2c507 | 218 | |
d69b78ba | 219 | rcu_read_lock(); |
22e2c507 JA |
220 | switch (which) { |
221 | case IOPRIO_WHO_PROCESS: | |
222 | if (!who) | |
223 | p = current; | |
224 | else | |
228ebcbe | 225 | p = find_task_by_vpid(who); |
22e2c507 | 226 | if (p) |
4b838d9e | 227 | ret = get_task_raw_ioprio(p); |
22e2c507 JA |
228 | break; |
229 | case IOPRIO_WHO_PGRP: | |
230 | if (!who) | |
41487c65 EB |
231 | pgrp = task_pgrp(current); |
232 | else | |
b488893a | 233 | pgrp = find_vpid(who); |
e6a59aac | 234 | read_lock(&tasklist_lock); |
2d70b68d | 235 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
a1836a42 DQ |
236 | tmpio = get_task_ioprio(p); |
237 | if (tmpio < 0) | |
238 | continue; | |
22e2c507 | 239 | if (ret == -ESRCH) |
a1836a42 | 240 | ret = tmpio; |
22e2c507 | 241 | else |
a1836a42 | 242 | ret = ioprio_best(ret, tmpio); |
2d70b68d | 243 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
e6a59aac DB |
244 | read_unlock(&tasklist_lock); |
245 | ||
22e2c507 JA |
246 | break; |
247 | case IOPRIO_WHO_USER: | |
7b44ab97 | 248 | uid = make_kuid(current_user_ns(), who); |
22e2c507 | 249 | if (!who) |
86a264ab | 250 | user = current_user(); |
22e2c507 | 251 | else |
7b44ab97 | 252 | user = find_user(uid); |
22e2c507 JA |
253 | |
254 | if (!user) | |
255 | break; | |
256 | ||
612dafab | 257 | for_each_process_thread(g, p) { |
8639b461 BS |
258 | if (!uid_eq(task_uid(p), user->uid) || |
259 | !task_pid_vnr(p)) | |
22e2c507 | 260 | continue; |
a1836a42 DQ |
261 | tmpio = get_task_ioprio(p); |
262 | if (tmpio < 0) | |
263 | continue; | |
22e2c507 | 264 | if (ret == -ESRCH) |
a1836a42 | 265 | ret = tmpio; |
22e2c507 | 266 | else |
a1836a42 | 267 | ret = ioprio_best(ret, tmpio); |
612dafab | 268 | } |
22e2c507 JA |
269 | |
270 | if (who) | |
271 | free_uid(user); | |
272 | break; | |
273 | default: | |
274 | ret = -EINVAL; | |
275 | } | |
276 | ||
d69b78ba | 277 | rcu_read_unlock(); |
22e2c507 JA |
278 | return ret; |
279 | } |