Branch data Line data Source code
1 : : #ifndef _LINUX_CGROUP_H
2 : : #define _LINUX_CGROUP_H
3 : : /*
4 : : * cgroup interface
5 : : *
6 : : * Copyright (C) 2003 BULL SA
7 : : * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 : : *
9 : : */
10 : :
11 : : #include <linux/sched.h>
12 : : #include <linux/cpumask.h>
13 : : #include <linux/nodemask.h>
14 : : #include <linux/rcupdate.h>
15 : : #include <linux/rculist.h>
16 : : #include <linux/cgroupstats.h>
17 : : #include <linux/prio_heap.h>
18 : : #include <linux/rwsem.h>
19 : : #include <linux/idr.h>
20 : : #include <linux/workqueue.h>
21 : : #include <linux/xattr.h>
22 : : #include <linux/fs.h>
23 : : #include <linux/percpu-refcount.h>
24 : :
25 : : #ifdef CONFIG_CGROUPS
26 : :
27 : : struct cgroupfs_root;
28 : : struct cgroup_subsys;
29 : : struct inode;
30 : : struct cgroup;
31 : : struct css_id;
32 : : struct eventfd_ctx;
33 : :
34 : : extern int cgroup_init_early(void);
35 : : extern int cgroup_init(void);
36 : : extern void cgroup_fork(struct task_struct *p);
37 : : extern void cgroup_post_fork(struct task_struct *p);
38 : : extern void cgroup_exit(struct task_struct *p, int run_callbacks);
39 : : extern int cgroupstats_build(struct cgroupstats *stats,
40 : : struct dentry *dentry);
41 : : extern int cgroup_load_subsys(struct cgroup_subsys *ss);
42 : : extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
43 : :
44 : : extern int proc_cgroup_show(struct seq_file *, void *);
45 : :
46 : : /*
47 : : * Define the enumeration of all cgroup subsystems.
48 : : *
49 : : * We define ids for builtin subsystems and then modular ones.
50 : : */
51 : : #define SUBSYS(_x) _x ## _subsys_id,
52 : : enum cgroup_subsys_id {
53 : : #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
54 : : #include <linux/cgroup_subsys.h>
55 : : #undef IS_SUBSYS_ENABLED
56 : : CGROUP_BUILTIN_SUBSYS_COUNT,
57 : :
58 : : __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
59 : :
60 : : #define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
61 : : #include <linux/cgroup_subsys.h>
62 : : #undef IS_SUBSYS_ENABLED
63 : : CGROUP_SUBSYS_COUNT,
64 : : };
65 : : #undef SUBSYS
66 : :
67 : : /* Per-subsystem/per-cgroup state maintained by the system. */
68 : : struct cgroup_subsys_state {
69 : : /* the cgroup that this css is attached to */
70 : : struct cgroup *cgroup;
71 : :
72 : : /* the cgroup subsystem that this css is attached to */
73 : : struct cgroup_subsys *ss;
74 : :
75 : : /* reference count - access via css_[try]get() and css_put() */
76 : : struct percpu_ref refcnt;
77 : :
78 : : /* the parent css */
79 : : struct cgroup_subsys_state *parent;
80 : :
81 : : unsigned long flags;
82 : : /* ID for this css, if possible */
83 : : struct css_id __rcu *id;
84 : :
85 : : /* percpu_ref killing and RCU release */
86 : : struct rcu_head rcu_head;
87 : : struct work_struct destroy_work;
88 : : };
89 : :
90 : : /* bits in struct cgroup_subsys_state flags field */
91 : : enum {
92 : : CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
93 : : CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
94 : : };
95 : :
96 : : /**
97 : : * css_get - obtain a reference on the specified css
98 : : * @css: target css
99 : : *
100 : : * The caller must already have a reference.
101 : : */
102 : : static inline void css_get(struct cgroup_subsys_state *css)
103 : : {
104 : : /* We don't need to reference count the root state */
105 [ # # ][ # # ]: 0 : if (!(css->flags & CSS_ROOT))
106 : : percpu_ref_get(&css->refcnt);
107 : : }
108 : :
109 : : /**
110 : : * css_tryget - try to obtain a reference on the specified css
111 : : * @css: target css
112 : : *
113 : : * Obtain a reference on @css if it's alive. The caller naturally needs to
114 : : * ensure that @css is accessible but doesn't have to be holding a
115 : : * reference on it - IOW, RCU protected access is good enough for this
116 : : * function. Returns %true if a reference count was successfully obtained;
117 : : * %false otherwise.
118 : : */
119 : : static inline bool css_tryget(struct cgroup_subsys_state *css)
120 : : {
121 [ # # ][ # # ]: 0 : if (css->flags & CSS_ROOT)
122 : : return true;
123 : : return percpu_ref_tryget(&css->refcnt);
124 : : }
125 : :
126 : : /**
127 : : * css_put - put a css reference
128 : : * @css: target css
129 : : *
130 : : * Put a reference obtained via css_get() and css_tryget().
131 : : */
132 : : static inline void css_put(struct cgroup_subsys_state *css)
133 : : {
134 [ # # ][ # # ]: 0 : if (!(css->flags & CSS_ROOT))
[ # # # # ]
[ # # ][ # # ]
135 : 0 : percpu_ref_put(&css->refcnt);
136 : : }
137 : :
138 : : /* bits in struct cgroup flags field */
139 : : enum {
140 : : /* Control Group is dead */
141 : : CGRP_DEAD,
142 : : /*
143 : : * Control Group has previously had a child cgroup or a task,
144 : : * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
145 : : */
146 : : CGRP_RELEASABLE,
147 : : /* Control Group requires release notifications to userspace */
148 : : CGRP_NOTIFY_ON_RELEASE,
149 : : /*
150 : : * Clone the parent's configuration when creating a new child
151 : : * cpuset cgroup. For historical reasons, this option can be
152 : : * specified at mount time and thus is implemented here.
153 : : */
154 : : CGRP_CPUSET_CLONE_CHILDREN,
155 : : /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
156 : : CGRP_SANE_BEHAVIOR,
157 : : };
158 : :
159 : : struct cgroup_name {
160 : : struct rcu_head rcu_head;
161 : : char name[];
162 : : };
163 : :
164 : : struct cgroup {
165 : : unsigned long flags; /* "unsigned long" so bitops work */
166 : :
167 : : /*
168 : : * idr allocated in-hierarchy ID.
169 : : *
170 : : * The ID of the root cgroup is always 0, and a new cgroup
171 : : * will be assigned with a smallest available ID.
172 : : */
173 : : int id;
174 : :
175 : : /* the number of attached css's */
176 : : int nr_css;
177 : :
178 : : /*
179 : : * We link our 'sibling' struct into our parent's 'children'.
180 : : * Our children link their 'sibling' into our 'children'.
181 : : */
182 : : struct list_head sibling; /* my parent's children */
183 : : struct list_head children; /* my children */
184 : : struct list_head files; /* my files */
185 : :
186 : : struct cgroup *parent; /* my parent */
187 : : struct dentry *dentry; /* cgroup fs entry, RCU protected */
188 : :
189 : : /*
190 : : * Monotonically increasing unique serial number which defines a
191 : : * uniform order among all cgroups. It's guaranteed that all
192 : : * ->children lists are in the ascending order of ->serial_nr.
193 : : * It's used to allow interrupting and resuming iterations.
194 : : */
195 : : u64 serial_nr;
196 : :
197 : : /*
198 : : * This is a copy of dentry->d_name, and it's needed because
199 : : * we can't use dentry->d_name in cgroup_path().
200 : : *
201 : : * You must acquire rcu_read_lock() to access cgrp->name, and
202 : : * the only place that can change it is rename(), which is
203 : : * protected by parent dir's i_mutex.
204 : : *
205 : : * Normally you should use cgroup_name() wrapper rather than
206 : : * access it directly.
207 : : */
208 : : struct cgroup_name __rcu *name;
209 : :
210 : : /* Private pointers for each registered subsystem */
211 : : struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
212 : :
213 : : struct cgroupfs_root *root;
214 : :
215 : : /*
216 : : * List of cgrp_cset_links pointing at css_sets with tasks in this
217 : : * cgroup. Protected by css_set_lock.
218 : : */
219 : : struct list_head cset_links;
220 : :
221 : : /*
222 : : * Linked list running through all cgroups that can
223 : : * potentially be reaped by the release agent. Protected by
224 : : * release_list_lock
225 : : */
226 : : struct list_head release_list;
227 : :
228 : : /*
229 : : * list of pidlists, up to two for each namespace (one for procs, one
230 : : * for tasks); created on demand.
231 : : */
232 : : struct list_head pidlists;
233 : : struct mutex pidlist_mutex;
234 : :
235 : : /* dummy css with NULL ->ss, points back to this cgroup */
236 : : struct cgroup_subsys_state dummy_css;
237 : :
238 : : /* For css percpu_ref killing and RCU-protected deletion */
239 : : struct rcu_head rcu_head;
240 : : struct work_struct destroy_work;
241 : :
242 : : /* List of events which userspace want to receive */
243 : : struct list_head event_list;
244 : : spinlock_t event_list_lock;
245 : :
246 : : /* directory xattrs */
247 : : struct simple_xattrs xattrs;
248 : : };
249 : :
250 : : #define MAX_CGROUP_ROOT_NAMELEN 64
251 : :
252 : : /* cgroupfs_root->flags */
253 : : enum {
254 : : /*
255 : : * Unfortunately, cgroup core and various controllers are riddled
256 : : * with idiosyncrasies and pointless options. The following flag,
257 : : * when set, will force sane behavior - some options are forced on,
258 : : * others are disallowed, and some controllers will change their
259 : : * hierarchical or other behaviors.
260 : : *
261 : : * The set of behaviors affected by this flag are still being
262 : : * determined and developed and the mount option for this flag is
263 : : * prefixed with __DEVEL__. The prefix will be dropped once we
264 : : * reach the point where all behaviors are compatible with the
265 : : * planned unified hierarchy, which will automatically turn on this
266 : : * flag.
267 : : *
268 : : * The followings are the behaviors currently affected this flag.
269 : : *
270 : : * - Mount options "noprefix" and "clone_children" are disallowed.
271 : : * Also, cgroupfs file cgroup.clone_children is not created.
272 : : *
273 : : * - When mounting an existing superblock, mount options should
274 : : * match.
275 : : *
276 : : * - Remount is disallowed.
277 : : *
278 : : * - rename(2) is disallowed.
279 : : *
280 : : * - "tasks" is removed. Everything should be at process
281 : : * granularity. Use "cgroup.procs" instead.
282 : : *
283 : : * - "release_agent" and "notify_on_release" are removed.
284 : : * Replacement notification mechanism will be implemented.
285 : : *
286 : : * - cpuset: tasks will be kept in empty cpusets when hotplug happens
287 : : * and take masks of ancestors with non-empty cpus/mems, instead of
288 : : * being moved to an ancestor.
289 : : *
290 : : * - cpuset: a task can be moved into an empty cpuset, and again it
291 : : * takes masks of ancestors.
292 : : *
293 : : * - memcg: use_hierarchy is on by default and the cgroup file for
294 : : * the flag is not created.
295 : : *
296 : : * - blkcg: blk-throttle becomes properly hierarchical.
297 : : */
298 : : CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
299 : :
300 : : CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */
301 : : CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */
302 : :
303 : : /* mount options live below bit 16 */
304 : : CGRP_ROOT_OPTION_MASK = (1 << 16) - 1,
305 : :
306 : : CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */
307 : : };
308 : :
309 : : /*
310 : : * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
311 : : * associated with a superblock to form an active hierarchy. This is
312 : : * internal to cgroup core. Don't access directly from controllers.
313 : : */
314 : : struct cgroupfs_root {
315 : : struct super_block *sb;
316 : :
317 : : /* The bitmask of subsystems attached to this hierarchy */
318 : : unsigned long subsys_mask;
319 : :
320 : : /* Unique id for this hierarchy. */
321 : : int hierarchy_id;
322 : :
323 : : /* A list running through the attached subsystems */
324 : : struct list_head subsys_list;
325 : :
326 : : /* The root cgroup for this hierarchy */
327 : : struct cgroup top_cgroup;
328 : :
329 : : /* Tracks how many cgroups are currently defined in hierarchy.*/
330 : : int number_of_cgroups;
331 : :
332 : : /* A list running through the active hierarchies */
333 : : struct list_head root_list;
334 : :
335 : : /* Hierarchy-specific flags */
336 : : unsigned long flags;
337 : :
338 : : /* IDs for cgroups in this hierarchy */
339 : : struct idr cgroup_idr;
340 : :
341 : : /* The path to use for release notifications. */
342 : : char release_agent_path[PATH_MAX];
343 : :
344 : : /* The name for this hierarchy - may be empty */
345 : : char name[MAX_CGROUP_ROOT_NAMELEN];
346 : : };
347 : :
348 : : /*
349 : : * A css_set is a structure holding pointers to a set of
350 : : * cgroup_subsys_state objects. This saves space in the task struct
351 : : * object and speeds up fork()/exit(), since a single inc/dec and a
352 : : * list_add()/del() can bump the reference count on the entire cgroup
353 : : * set for a task.
354 : : */
355 : :
356 : : struct css_set {
357 : :
358 : : /* Reference count */
359 : : atomic_t refcount;
360 : :
361 : : /*
362 : : * List running through all cgroup groups in the same hash
363 : : * slot. Protected by css_set_lock
364 : : */
365 : : struct hlist_node hlist;
366 : :
367 : : /*
368 : : * List running through all tasks using this cgroup
369 : : * group. Protected by css_set_lock
370 : : */
371 : : struct list_head tasks;
372 : :
373 : : /*
374 : : * List of cgrp_cset_links pointing at cgroups referenced from this
375 : : * css_set. Protected by css_set_lock.
376 : : */
377 : : struct list_head cgrp_links;
378 : :
379 : : /*
380 : : * Set of subsystem states, one for each subsystem. This array
381 : : * is immutable after creation apart from the init_css_set
382 : : * during subsystem registration (at boot time) and modular subsystem
383 : : * loading/unloading.
384 : : */
385 : : struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
386 : :
387 : : /* For RCU-protected deletion */
388 : : struct rcu_head rcu_head;
389 : : };
390 : :
391 : : /*
392 : : * cgroup_map_cb is an abstract callback API for reporting map-valued
393 : : * control files
394 : : */
395 : :
396 : : struct cgroup_map_cb {
397 : : int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
398 : : void *state;
399 : : };
400 : :
401 : : /*
402 : : * struct cftype: handler definitions for cgroup control files
403 : : *
404 : : * When reading/writing to a file:
405 : : * - the cgroup to use is file->f_dentry->d_parent->d_fsdata
406 : : * - the 'cftype' of the file is file->f_dentry->d_fsdata
407 : : */
408 : :
409 : : /* cftype->flags */
410 : : enum {
411 : : CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
412 : : CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
413 : : CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
414 : : CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
415 : : };
416 : :
417 : : #define MAX_CFTYPE_NAME 64
418 : :
419 : : struct cftype {
420 : : /*
421 : : * By convention, the name should begin with the name of the
422 : : * subsystem, followed by a period. Zero length string indicates
423 : : * end of cftype array.
424 : : */
425 : : char name[MAX_CFTYPE_NAME];
426 : : int private;
427 : : /*
428 : : * If not 0, file mode is set to this value, otherwise it will
429 : : * be figured out automatically
430 : : */
431 : : umode_t mode;
432 : :
433 : : /*
434 : : * If non-zero, defines the maximum length of string that can
435 : : * be passed to write_string; defaults to 64
436 : : */
437 : : size_t max_write_len;
438 : :
439 : : /* CFTYPE_* flags */
440 : : unsigned int flags;
441 : :
442 : : /*
443 : : * The subsys this file belongs to. Initialized automatically
444 : : * during registration. NULL for cgroup core files.
445 : : */
446 : : struct cgroup_subsys *ss;
447 : :
448 : : int (*open)(struct inode *inode, struct file *file);
449 : : ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
450 : : struct file *file,
451 : : char __user *buf, size_t nbytes, loff_t *ppos);
452 : : /*
453 : : * read_u64() is a shortcut for the common case of returning a
454 : : * single integer. Use it in place of read()
455 : : */
456 : : u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
457 : : /*
458 : : * read_s64() is a signed version of read_u64()
459 : : */
460 : : s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
461 : : /*
462 : : * read_map() is used for defining a map of key/value
463 : : * pairs. It should call cb->fill(cb, key, value) for each
464 : : * entry. The key/value pairs (and their ordering) should not
465 : : * change between reboots.
466 : : */
467 : : int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
468 : : struct cgroup_map_cb *cb);
469 : : /*
470 : : * read_seq_string() is used for outputting a simple sequence
471 : : * using seqfile.
472 : : */
473 : : int (*read_seq_string)(struct cgroup_subsys_state *css,
474 : : struct cftype *cft, struct seq_file *m);
475 : :
476 : : ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
477 : : struct file *file,
478 : : const char __user *buf, size_t nbytes, loff_t *ppos);
479 : :
480 : : /*
481 : : * write_u64() is a shortcut for the common case of accepting
482 : : * a single integer (as parsed by simple_strtoull) from
483 : : * userspace. Use in place of write(); return 0 or error.
484 : : */
485 : : int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
486 : : u64 val);
487 : : /*
488 : : * write_s64() is a signed version of write_u64()
489 : : */
490 : : int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
491 : : s64 val);
492 : :
493 : : /*
494 : : * write_string() is passed a nul-terminated kernelspace
495 : : * buffer of maximum length determined by max_write_len.
496 : : * Returns 0 or -ve error code.
497 : : */
498 : : int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
499 : : const char *buffer);
500 : : /*
501 : : * trigger() callback can be used to get some kick from the
502 : : * userspace, when the actual string written is not important
503 : : * at all. The private field can be used to determine the
504 : : * kick type for multiplexing.
505 : : */
506 : : int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
507 : :
508 : : int (*release)(struct inode *inode, struct file *file);
509 : :
510 : : /*
511 : : * register_event() callback will be used to add new userspace
512 : : * waiter for changes related to the cftype. Implement it if
513 : : * you want to provide this functionality. Use eventfd_signal()
514 : : * on eventfd to send notification to userspace.
515 : : */
516 : : int (*register_event)(struct cgroup_subsys_state *css,
517 : : struct cftype *cft, struct eventfd_ctx *eventfd,
518 : : const char *args);
519 : : /*
520 : : * unregister_event() callback will be called when userspace
521 : : * closes the eventfd or on cgroup removing.
522 : : * This callback must be implemented, if you want provide
523 : : * notification functionality.
524 : : */
525 : : void (*unregister_event)(struct cgroup_subsys_state *css,
526 : : struct cftype *cft,
527 : : struct eventfd_ctx *eventfd);
528 : : };
529 : :
530 : : /*
531 : : * cftype_sets describe cftypes belonging to a subsystem and are chained at
532 : : * cgroup_subsys->cftsets. Each cftset points to an array of cftypes
533 : : * terminated by zero length name.
534 : : */
535 : : struct cftype_set {
536 : : struct list_head node; /* chained at subsys->cftsets */
537 : : struct cftype *cfts;
538 : : };
539 : :
540 : : /*
541 : : * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
542 : : * function can be called as long as @cgrp is accessible.
543 : : */
544 : : static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
545 : : {
546 : 28 : return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
547 : : }
548 : :
549 : : /* Caller should hold rcu_read_lock() */
550 : : static inline const char *cgroup_name(const struct cgroup *cgrp)
551 : : {
552 : : return rcu_dereference(cgrp->name)->name;
553 : : }
554 : :
555 : : int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
556 : : int cgroup_rm_cftypes(struct cftype *cfts);
557 : :
558 : : bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
559 : :
560 : : int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
561 : : int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
562 : :
563 : : int cgroup_task_count(const struct cgroup *cgrp);
564 : :
565 : : /*
566 : : * Control Group taskset, used to pass around set of tasks to cgroup_subsys
567 : : * methods.
568 : : */
569 : : struct cgroup_taskset;
570 : : struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
571 : : struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
572 : : struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
573 : : int subsys_id);
574 : : int cgroup_taskset_size(struct cgroup_taskset *tset);
575 : :
576 : : /**
577 : : * cgroup_taskset_for_each - iterate cgroup_taskset
578 : : * @task: the loop cursor
579 : : * @skip_css: skip if task's css matches this, %NULL to iterate through all
580 : : * @tset: taskset to iterate
581 : : */
582 : : #define cgroup_taskset_for_each(task, skip_css, tset) \
583 : : for ((task) = cgroup_taskset_first((tset)); (task); \
584 : : (task) = cgroup_taskset_next((tset))) \
585 : : if (!(skip_css) || \
586 : : cgroup_taskset_cur_css((tset), \
587 : : (skip_css)->ss->subsys_id) != (skip_css))
588 : :
589 : : /*
590 : : * Control Group subsystem type.
591 : : * See Documentation/cgroups/cgroups.txt for details
592 : : */
593 : :
594 : : struct cgroup_subsys {
595 : : struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
596 : : int (*css_online)(struct cgroup_subsys_state *css);
597 : : void (*css_offline)(struct cgroup_subsys_state *css);
598 : : void (*css_free)(struct cgroup_subsys_state *css);
599 : :
600 : : int (*can_attach)(struct cgroup_subsys_state *css,
601 : : struct cgroup_taskset *tset);
602 : : void (*cancel_attach)(struct cgroup_subsys_state *css,
603 : : struct cgroup_taskset *tset);
604 : : void (*attach)(struct cgroup_subsys_state *css,
605 : : struct cgroup_taskset *tset);
606 : : void (*fork)(struct task_struct *task);
607 : : void (*exit)(struct cgroup_subsys_state *css,
608 : : struct cgroup_subsys_state *old_css,
609 : : struct task_struct *task);
610 : : void (*bind)(struct cgroup_subsys_state *root_css);
611 : :
612 : : int subsys_id;
613 : : int disabled;
614 : : int early_init;
615 : :
616 : : /*
617 : : * If %false, this subsystem is properly hierarchical -
618 : : * configuration, resource accounting and restriction on a parent
619 : : * cgroup cover those of its children. If %true, hierarchy support
620 : : * is broken in some ways - some subsystems ignore hierarchy
621 : : * completely while others are only implemented half-way.
622 : : *
623 : : * It's now disallowed to create nested cgroups if the subsystem is
624 : : * broken and cgroup core will emit a warning message on such
625 : : * cases. Eventually, all subsystems will be made properly
626 : : * hierarchical and this will go away.
627 : : */
628 : : bool broken_hierarchy;
629 : : bool warned_broken_hierarchy;
630 : :
631 : : #define MAX_CGROUP_TYPE_NAMELEN 32
632 : : const char *name;
633 : :
634 : : /*
635 : : * Link to parent, and list entry in parent's children.
636 : : * Protected by cgroup_lock()
637 : : */
638 : : struct cgroupfs_root *root;
639 : : struct list_head sibling;
640 : :
641 : : /* list of cftype_sets */
642 : : struct list_head cftsets;
643 : :
644 : : /* base cftypes, automatically [de]registered with subsys itself */
645 : : struct cftype *base_cftypes;
646 : : struct cftype_set base_cftset;
647 : :
648 : : /* should be defined only by modular subsystems */
649 : : struct module *module;
650 : : };
651 : :
652 : : #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
653 : : #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
654 : : #include <linux/cgroup_subsys.h>
655 : : #undef IS_SUBSYS_ENABLED
656 : : #undef SUBSYS
657 : :
658 : : /**
659 : : * css_parent - find the parent css
660 : : * @css: the target cgroup_subsys_state
661 : : *
662 : : * Return the parent css of @css. This function is guaranteed to return
663 : : * non-NULL parent as long as @css isn't the root.
664 : : */
665 : : static inline
666 : : struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
667 : : {
668 : : return css->parent;
669 : : }
670 : :
671 : : /**
672 : : * task_css_set_check - obtain a task's css_set with extra access conditions
673 : : * @task: the task to obtain css_set for
674 : : * @__c: extra condition expression to be passed to rcu_dereference_check()
675 : : *
676 : : * A task's css_set is RCU protected, initialized and exited while holding
677 : : * task_lock(), and can only be modified while holding both cgroup_mutex
678 : : * and task_lock() while the task is alive. This macro verifies that the
679 : : * caller is inside proper critical section and returns @task's css_set.
680 : : *
681 : : * The caller can also specify additional allowed conditions via @__c, such
682 : : * as locks used during the cgroup_subsys::attach() methods.
683 : : */
684 : : #ifdef CONFIG_PROVE_RCU
685 : : extern struct mutex cgroup_mutex;
686 : : #define task_css_set_check(task, __c) \
687 : : rcu_dereference_check((task)->cgroups, \
688 : : lockdep_is_held(&(task)->alloc_lock) || \
689 : : lockdep_is_held(&cgroup_mutex) || (__c))
690 : : #else
691 : : #define task_css_set_check(task, __c) \
692 : : rcu_dereference((task)->cgroups)
693 : : #endif
694 : :
695 : : /**
696 : : * task_css_check - obtain css for (task, subsys) w/ extra access conds
697 : : * @task: the target task
698 : : * @subsys_id: the target subsystem ID
699 : : * @__c: extra condition expression to be passed to rcu_dereference_check()
700 : : *
701 : : * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
702 : : * synchronization rules are the same as task_css_set_check().
703 : : */
704 : : #define task_css_check(task, subsys_id, __c) \
705 : : task_css_set_check((task), (__c))->subsys[(subsys_id)]
706 : :
707 : : /**
708 : : * task_css_set - obtain a task's css_set
709 : : * @task: the task to obtain css_set for
710 : : *
711 : : * See task_css_set_check().
712 : : */
713 : : static inline struct css_set *task_css_set(struct task_struct *task)
714 : : {
715 : 2260734 : return task_css_set_check(task, false);
716 : : }
717 : :
718 : : /**
719 : : * task_css - obtain css for (task, subsys)
720 : : * @task: the target task
721 : : * @subsys_id: the target subsystem ID
722 : : *
723 : : * See task_css_check().
724 : : */
725 : : static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
726 : : int subsys_id)
727 : : {
728 : : return task_css_check(task, subsys_id, false);
729 : : }
730 : :
731 : : static inline struct cgroup *task_cgroup(struct task_struct *task,
732 : : int subsys_id)
733 : : {
734 : : return task_css(task, subsys_id)->cgroup;
735 : : }
736 : :
737 : : struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
738 : : struct cgroup_subsys_state *parent);
739 : :
740 : : struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
741 : :
742 : : /**
743 : : * css_for_each_child - iterate through children of a css
744 : : * @pos: the css * to use as the loop cursor
745 : : * @parent: css whose children to walk
746 : : *
747 : : * Walk @parent's children. Must be called under rcu_read_lock(). A child
748 : : * css which hasn't finished ->css_online() or already has finished
749 : : * ->css_offline() may show up during traversal and it's each subsystem's
750 : : * responsibility to verify that each @pos is alive.
751 : : *
752 : : * If a subsystem synchronizes against the parent in its ->css_online() and
753 : : * before starting iterating, a css which finished ->css_online() is
754 : : * guaranteed to be visible in the future iterations.
755 : : *
756 : : * It is allowed to temporarily drop RCU read lock during iteration. The
757 : : * caller is responsible for ensuring that @pos remains accessible until
758 : : * the start of the next iteration by, for example, bumping the css refcnt.
759 : : */
760 : : #define css_for_each_child(pos, parent) \
761 : : for ((pos) = css_next_child(NULL, (parent)); (pos); \
762 : : (pos) = css_next_child((pos), (parent)))
763 : :
764 : : struct cgroup_subsys_state *
765 : : css_next_descendant_pre(struct cgroup_subsys_state *pos,
766 : : struct cgroup_subsys_state *css);
767 : :
768 : : struct cgroup_subsys_state *
769 : : css_rightmost_descendant(struct cgroup_subsys_state *pos);
770 : :
771 : : /**
772 : : * css_for_each_descendant_pre - pre-order walk of a css's descendants
773 : : * @pos: the css * to use as the loop cursor
774 : : * @root: css whose descendants to walk
775 : : *
776 : : * Walk @root's descendants. @root is included in the iteration and the
777 : : * first node to be visited. Must be called under rcu_read_lock(). A
778 : : * descendant css which hasn't finished ->css_online() or already has
779 : : * finished ->css_offline() may show up during traversal and it's each
780 : : * subsystem's responsibility to verify that each @pos is alive.
781 : : *
782 : : * If a subsystem synchronizes against the parent in its ->css_online() and
783 : : * before starting iterating, and synchronizes against @pos on each
784 : : * iteration, any descendant css which finished ->css_online() is
785 : : * guaranteed to be visible in the future iterations.
786 : : *
787 : : * In other words, the following guarantees that a descendant can't escape
788 : : * state updates of its ancestors.
789 : : *
790 : : * my_online(@css)
791 : : * {
792 : : * Lock @css's parent and @css;
793 : : * Inherit state from the parent;
794 : : * Unlock both.
795 : : * }
796 : : *
797 : : * my_update_state(@css)
798 : : * {
799 : : * css_for_each_descendant_pre(@pos, @css) {
800 : : * Lock @pos;
801 : : * if (@pos == @css)
802 : : * Update @css's state;
803 : : * else
804 : : * Verify @pos is alive and inherit state from its parent;
805 : : * Unlock @pos;
806 : : * }
807 : : * }
808 : : *
809 : : * As long as the inheriting step, including checking the parent state, is
810 : : * enclosed inside @pos locking, double-locking the parent isn't necessary
811 : : * while inheriting. The state update to the parent is guaranteed to be
812 : : * visible by walking order and, as long as inheriting operations to the
813 : : * same @pos are atomic to each other, multiple updates racing each other
814 : : * still result in the correct state. It's guaranateed that at least one
815 : : * inheritance happens for any css after the latest update to its parent.
816 : : *
817 : : * If checking parent's state requires locking the parent, each inheriting
818 : : * iteration should lock and unlock both @pos->parent and @pos.
819 : : *
820 : : * Alternatively, a subsystem may choose to use a single global lock to
821 : : * synchronize ->css_online() and ->css_offline() against tree-walking
822 : : * operations.
823 : : *
824 : : * It is allowed to temporarily drop RCU read lock during iteration. The
825 : : * caller is responsible for ensuring that @pos remains accessible until
826 : : * the start of the next iteration by, for example, bumping the css refcnt.
827 : : */
828 : : #define css_for_each_descendant_pre(pos, css) \
829 : : for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
830 : : (pos) = css_next_descendant_pre((pos), (css)))
831 : :
832 : : struct cgroup_subsys_state *
833 : : css_next_descendant_post(struct cgroup_subsys_state *pos,
834 : : struct cgroup_subsys_state *css);
835 : :
836 : : /**
837 : : * css_for_each_descendant_post - post-order walk of a css's descendants
838 : : * @pos: the css * to use as the loop cursor
839 : : * @css: css whose descendants to walk
840 : : *
841 : : * Similar to css_for_each_descendant_pre() but performs post-order
842 : : * traversal instead. @root is included in the iteration and the last
843 : : * node to be visited. Note that the walk visibility guarantee described
844 : : * in pre-order walk doesn't apply the same to post-order walks.
845 : : */
846 : : #define css_for_each_descendant_post(pos, css) \
847 : : for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
848 : : (pos) = css_next_descendant_post((pos), (css)))
849 : :
850 : : /* A css_task_iter should be treated as an opaque object */
851 : : struct css_task_iter {
852 : : struct cgroup_subsys_state *origin_css;
853 : : struct list_head *cset_link;
854 : : struct list_head *task;
855 : : };
856 : :
857 : : void css_task_iter_start(struct cgroup_subsys_state *css,
858 : : struct css_task_iter *it);
859 : : struct task_struct *css_task_iter_next(struct css_task_iter *it);
860 : : void css_task_iter_end(struct css_task_iter *it);
861 : :
862 : : int css_scan_tasks(struct cgroup_subsys_state *css,
863 : : bool (*test)(struct task_struct *, void *),
864 : : void (*process)(struct task_struct *, void *),
865 : : void *data, struct ptr_heap *heap);
866 : :
867 : : int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
868 : : int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
869 : :
870 : : struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
871 : : struct cgroup_subsys *ss);
872 : :
873 : : #else /* !CONFIG_CGROUPS */
874 : :
875 : : static inline int cgroup_init_early(void) { return 0; }
876 : : static inline int cgroup_init(void) { return 0; }
877 : : static inline void cgroup_fork(struct task_struct *p) {}
878 : : static inline void cgroup_post_fork(struct task_struct *p) {}
879 : : static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
880 : :
881 : : static inline int cgroupstats_build(struct cgroupstats *stats,
882 : : struct dentry *dentry)
883 : : {
884 : : return -EINVAL;
885 : : }
886 : :
887 : : /* No cgroups - nothing to do */
888 : : static inline int cgroup_attach_task_all(struct task_struct *from,
889 : : struct task_struct *t)
890 : : {
891 : : return 0;
892 : : }
893 : :
894 : : #endif /* !CONFIG_CGROUPS */
895 : :
896 : : #endif /* _LINUX_CGROUP_H */
|