Branch data Line data Source code
1 : : #ifndef _LINUX_CGROUP_H
2 : : #define _LINUX_CGROUP_H
3 : : /*
4 : : * cgroup interface
5 : : *
6 : : * Copyright (C) 2003 BULL SA
7 : : * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 : : *
9 : : */
10 : :
11 : : #include <linux/sched.h>
12 : : #include <linux/cpumask.h>
13 : : #include <linux/nodemask.h>
14 : : #include <linux/rcupdate.h>
15 : : #include <linux/rculist.h>
16 : : #include <linux/cgroupstats.h>
17 : : #include <linux/prio_heap.h>
18 : : #include <linux/rwsem.h>
19 : : #include <linux/idr.h>
20 : : #include <linux/workqueue.h>
21 : : #include <linux/xattr.h>
22 : : #include <linux/fs.h>
23 : : #include <linux/percpu-refcount.h>
24 : : #include <linux/seq_file.h>
25 : :
26 : : #ifdef CONFIG_CGROUPS
27 : :
28 : : struct cgroupfs_root;
29 : : struct cgroup_subsys;
30 : : struct inode;
31 : : struct cgroup;
32 : :
33 : : extern int cgroup_init_early(void);
34 : : extern int cgroup_init(void);
35 : : extern void cgroup_fork(struct task_struct *p);
36 : : extern void cgroup_post_fork(struct task_struct *p);
37 : : extern void cgroup_exit(struct task_struct *p, int run_callbacks);
38 : : extern int cgroupstats_build(struct cgroupstats *stats,
39 : : struct dentry *dentry);
40 : : extern int cgroup_load_subsys(struct cgroup_subsys *ss);
41 : : extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
42 : :
43 : : extern int proc_cgroup_show(struct seq_file *, void *);
44 : :
45 : : /*
46 : : * Define the enumeration of all cgroup subsystems.
47 : : *
48 : : * We define ids for builtin subsystems and then modular ones.
49 : : */
50 : : #define SUBSYS(_x) _x ## _subsys_id,
51 : : enum cgroup_subsys_id {
52 : : #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
53 : : #include <linux/cgroup_subsys.h>
54 : : #undef IS_SUBSYS_ENABLED
55 : : CGROUP_BUILTIN_SUBSYS_COUNT,
56 : :
57 : : __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
58 : :
59 : : #define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
60 : : #include <linux/cgroup_subsys.h>
61 : : #undef IS_SUBSYS_ENABLED
62 : : CGROUP_SUBSYS_COUNT,
63 : : };
64 : : #undef SUBSYS
65 : :
66 : : /* Per-subsystem/per-cgroup state maintained by the system. */
67 : : struct cgroup_subsys_state {
68 : : /* the cgroup that this css is attached to */
69 : : struct cgroup *cgroup;
70 : :
71 : : /* the cgroup subsystem that this css is attached to */
72 : : struct cgroup_subsys *ss;
73 : :
74 : : /* reference count - access via css_[try]get() and css_put() */
75 : : struct percpu_ref refcnt;
76 : :
77 : : /* the parent css */
78 : : struct cgroup_subsys_state *parent;
79 : :
80 : : unsigned long flags;
81 : :
82 : : /* percpu_ref killing and RCU release */
83 : : struct rcu_head rcu_head;
84 : : struct work_struct destroy_work;
85 : : };
86 : :
87 : : /* bits in struct cgroup_subsys_state flags field */
88 : : enum {
89 : : CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
90 : : CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
91 : : };
92 : :
93 : : /**
94 : : * css_get - obtain a reference on the specified css
95 : : * @css: target css
96 : : *
97 : : * The caller must already have a reference.
98 : : */
99 : : static inline void css_get(struct cgroup_subsys_state *css)
100 : : {
101 : : /* We don't need to reference count the root state */
102 : : if (!(css->flags & CSS_ROOT))
103 : : percpu_ref_get(&css->refcnt);
104 : : }
105 : :
106 : : /**
107 : : * css_tryget - try to obtain a reference on the specified css
108 : : * @css: target css
109 : : *
110 : : * Obtain a reference on @css if it's alive. The caller naturally needs to
111 : : * ensure that @css is accessible but doesn't have to be holding a
112 : : * reference on it - IOW, RCU protected access is good enough for this
113 : : * function. Returns %true if a reference count was successfully obtained;
114 : : * %false otherwise.
115 : : */
116 : : static inline bool css_tryget(struct cgroup_subsys_state *css)
117 : : {
118 [ # # ]: 0 : if (css->flags & CSS_ROOT)
119 : : return true;
120 : : return percpu_ref_tryget(&css->refcnt);
121 : : }
122 : :
123 : : /**
124 : : * css_put - put a css reference
125 : : * @css: target css
126 : : *
127 : : * Put a reference obtained via css_get() and css_tryget().
128 : : */
129 : : static inline void css_put(struct cgroup_subsys_state *css)
130 : : {
131 [ # # ][ # # ]: 0 : if (!(css->flags & CSS_ROOT))
132 : 0 : percpu_ref_put(&css->refcnt);
133 : : }
134 : :
135 : : /* bits in struct cgroup flags field */
136 : : enum {
137 : : /* Control Group is dead */
138 : : CGRP_DEAD,
139 : : /*
140 : : * Control Group has previously had a child cgroup or a task,
141 : : * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
142 : : */
143 : : CGRP_RELEASABLE,
144 : : /* Control Group requires release notifications to userspace */
145 : : CGRP_NOTIFY_ON_RELEASE,
146 : : /*
147 : : * Clone the parent's configuration when creating a new child
148 : : * cpuset cgroup. For historical reasons, this option can be
149 : : * specified at mount time and thus is implemented here.
150 : : */
151 : : CGRP_CPUSET_CLONE_CHILDREN,
152 : : /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
153 : : CGRP_SANE_BEHAVIOR,
154 : : };
155 : :
156 : : struct cgroup_name {
157 : : struct rcu_head rcu_head;
158 : : char name[];
159 : : };
160 : :
161 : : struct cgroup {
162 : : unsigned long flags; /* "unsigned long" so bitops work */
163 : :
164 : : /*
165 : : * idr allocated in-hierarchy ID.
166 : : *
167 : : * The ID of the root cgroup is always 0, and a new cgroup
168 : : * will be assigned with a smallest available ID.
169 : : *
170 : : * Allocating/Removing ID must be protected by cgroup_mutex.
171 : : */
172 : : int id;
173 : :
174 : : /* the number of attached css's */
175 : : int nr_css;
176 : :
177 : : /*
178 : : * We link our 'sibling' struct into our parent's 'children'.
179 : : * Our children link their 'sibling' into our 'children'.
180 : : */
181 : : struct list_head sibling; /* my parent's children */
182 : : struct list_head children; /* my children */
183 : : struct list_head files; /* my files */
184 : :
185 : : struct cgroup *parent; /* my parent */
186 : : struct dentry *dentry; /* cgroup fs entry, RCU protected */
187 : :
188 : : /*
189 : : * Monotonically increasing unique serial number which defines a
190 : : * uniform order among all cgroups. It's guaranteed that all
191 : : * ->children lists are in the ascending order of ->serial_nr.
192 : : * It's used to allow interrupting and resuming iterations.
193 : : */
194 : : u64 serial_nr;
195 : :
196 : : /*
197 : : * This is a copy of dentry->d_name, and it's needed because
198 : : * we can't use dentry->d_name in cgroup_path().
199 : : *
200 : : * You must acquire rcu_read_lock() to access cgrp->name, and
201 : : * the only place that can change it is rename(), which is
202 : : * protected by parent dir's i_mutex.
203 : : *
204 : : * Normally you should use cgroup_name() wrapper rather than
205 : : * access it directly.
206 : : */
207 : : struct cgroup_name __rcu *name;
208 : :
209 : : /* Private pointers for each registered subsystem */
210 : : struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
211 : :
212 : : struct cgroupfs_root *root;
213 : :
214 : : /*
215 : : * List of cgrp_cset_links pointing at css_sets with tasks in this
216 : : * cgroup. Protected by css_set_lock.
217 : : */
218 : : struct list_head cset_links;
219 : :
220 : : /*
221 : : * Linked list running through all cgroups that can
222 : : * potentially be reaped by the release agent. Protected by
223 : : * release_list_lock
224 : : */
225 : : struct list_head release_list;
226 : :
227 : : /*
228 : : * list of pidlists, up to two for each namespace (one for procs, one
229 : : * for tasks); created on demand.
230 : : */
231 : : struct list_head pidlists;
232 : : struct mutex pidlist_mutex;
233 : :
234 : : /* dummy css with NULL ->ss, points back to this cgroup */
235 : : struct cgroup_subsys_state dummy_css;
236 : :
237 : : /* For css percpu_ref killing and RCU-protected deletion */
238 : : struct rcu_head rcu_head;
239 : : struct work_struct destroy_work;
240 : :
241 : : /* directory xattrs */
242 : : struct simple_xattrs xattrs;
243 : : };
244 : :
245 : : #define MAX_CGROUP_ROOT_NAMELEN 64
246 : :
247 : : /* cgroupfs_root->flags */
248 : : enum {
249 : : /*
250 : : * Unfortunately, cgroup core and various controllers are riddled
251 : : * with idiosyncrasies and pointless options. The following flag,
252 : : * when set, will force sane behavior - some options are forced on,
253 : : * others are disallowed, and some controllers will change their
254 : : * hierarchical or other behaviors.
255 : : *
256 : : * The set of behaviors affected by this flag are still being
257 : : * determined and developed and the mount option for this flag is
258 : : * prefixed with __DEVEL__. The prefix will be dropped once we
259 : : * reach the point where all behaviors are compatible with the
260 : : * planned unified hierarchy, which will automatically turn on this
261 : : * flag.
262 : : *
263 : : * The followings are the behaviors currently affected this flag.
264 : : *
265 : : * - Mount options "noprefix" and "clone_children" are disallowed.
266 : : * Also, cgroupfs file cgroup.clone_children is not created.
267 : : *
268 : : * - When mounting an existing superblock, mount options should
269 : : * match.
270 : : *
271 : : * - Remount is disallowed.
272 : : *
273 : : * - rename(2) is disallowed.
274 : : *
275 : : * - "tasks" is removed. Everything should be at process
276 : : * granularity. Use "cgroup.procs" instead.
277 : : *
278 : : * - "cgroup.procs" is not sorted. pids will be unique unless they
279 : : * got recycled inbetween reads.
280 : : *
281 : : * - "release_agent" and "notify_on_release" are removed.
282 : : * Replacement notification mechanism will be implemented.
283 : : *
284 : : * - cpuset: tasks will be kept in empty cpusets when hotplug happens
285 : : * and take masks of ancestors with non-empty cpus/mems, instead of
286 : : * being moved to an ancestor.
287 : : *
288 : : * - cpuset: a task can be moved into an empty cpuset, and again it
289 : : * takes masks of ancestors.
290 : : *
291 : : * - memcg: use_hierarchy is on by default and the cgroup file for
292 : : * the flag is not created.
293 : : *
294 : : * - blkcg: blk-throttle becomes properly hierarchical.
295 : : */
296 : : CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
297 : :
298 : : CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */
299 : : CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */
300 : :
301 : : /* mount options live below bit 16 */
302 : : CGRP_ROOT_OPTION_MASK = (1 << 16) - 1,
303 : :
304 : : CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */
305 : : };
306 : :
307 : : /*
308 : : * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
309 : : * associated with a superblock to form an active hierarchy. This is
310 : : * internal to cgroup core. Don't access directly from controllers.
311 : : */
312 : : struct cgroupfs_root {
313 : : struct super_block *sb;
314 : :
315 : : /* The bitmask of subsystems attached to this hierarchy */
316 : : unsigned long subsys_mask;
317 : :
318 : : /* Unique id for this hierarchy. */
319 : : int hierarchy_id;
320 : :
321 : : /* The root cgroup for this hierarchy */
322 : : struct cgroup top_cgroup;
323 : :
324 : : /* Tracks how many cgroups are currently defined in hierarchy.*/
325 : : int number_of_cgroups;
326 : :
327 : : /* A list running through the active hierarchies */
328 : : struct list_head root_list;
329 : :
330 : : /* Hierarchy-specific flags */
331 : : unsigned long flags;
332 : :
333 : : /* IDs for cgroups in this hierarchy */
334 : : struct idr cgroup_idr;
335 : :
336 : : /* The path to use for release notifications. */
337 : : char release_agent_path[PATH_MAX];
338 : :
339 : : /* The name for this hierarchy - may be empty */
340 : : char name[MAX_CGROUP_ROOT_NAMELEN];
341 : : };
342 : :
343 : : /*
344 : : * A css_set is a structure holding pointers to a set of
345 : : * cgroup_subsys_state objects. This saves space in the task struct
346 : : * object and speeds up fork()/exit(), since a single inc/dec and a
347 : : * list_add()/del() can bump the reference count on the entire cgroup
348 : : * set for a task.
349 : : */
350 : :
351 : : struct css_set {
352 : :
353 : : /* Reference count */
354 : : atomic_t refcount;
355 : :
356 : : /*
357 : : * List running through all cgroup groups in the same hash
358 : : * slot. Protected by css_set_lock
359 : : */
360 : : struct hlist_node hlist;
361 : :
362 : : /*
363 : : * List running through all tasks using this cgroup
364 : : * group. Protected by css_set_lock
365 : : */
366 : : struct list_head tasks;
367 : :
368 : : /*
369 : : * List of cgrp_cset_links pointing at cgroups referenced from this
370 : : * css_set. Protected by css_set_lock.
371 : : */
372 : : struct list_head cgrp_links;
373 : :
374 : : /*
375 : : * Set of subsystem states, one for each subsystem. This array
376 : : * is immutable after creation apart from the init_css_set
377 : : * during subsystem registration (at boot time) and modular subsystem
378 : : * loading/unloading.
379 : : */
380 : : struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
381 : :
382 : : /* For RCU-protected deletion */
383 : : struct rcu_head rcu_head;
384 : : };
385 : :
386 : : /*
387 : : * struct cftype: handler definitions for cgroup control files
388 : : *
389 : : * When reading/writing to a file:
390 : : * - the cgroup to use is file->f_dentry->d_parent->d_fsdata
391 : : * - the 'cftype' of the file is file->f_dentry->d_fsdata
392 : : */
393 : :
394 : : /* cftype->flags */
395 : : enum {
396 : : CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
397 : : CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
398 : : CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
399 : : CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
400 : : };
401 : :
402 : : #define MAX_CFTYPE_NAME 64
403 : :
404 : : struct cftype {
405 : : /*
406 : : * By convention, the name should begin with the name of the
407 : : * subsystem, followed by a period. Zero length string indicates
408 : : * end of cftype array.
409 : : */
410 : : char name[MAX_CFTYPE_NAME];
411 : : int private;
412 : : /*
413 : : * If not 0, file mode is set to this value, otherwise it will
414 : : * be figured out automatically
415 : : */
416 : : umode_t mode;
417 : :
418 : : /*
419 : : * If non-zero, defines the maximum length of string that can
420 : : * be passed to write_string; defaults to 64
421 : : */
422 : : size_t max_write_len;
423 : :
424 : : /* CFTYPE_* flags */
425 : : unsigned int flags;
426 : :
427 : : /*
428 : : * The subsys this file belongs to. Initialized automatically
429 : : * during registration. NULL for cgroup core files.
430 : : */
431 : : struct cgroup_subsys *ss;
432 : :
433 : : /*
434 : : * read_u64() is a shortcut for the common case of returning a
435 : : * single integer. Use it in place of read()
436 : : */
437 : : u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
438 : : /*
439 : : * read_s64() is a signed version of read_u64()
440 : : */
441 : : s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
442 : :
443 : : /* generic seq_file read interface */
444 : : int (*seq_show)(struct seq_file *sf, void *v);
445 : :
446 : : /* optional ops, implement all or none */
447 : : void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
448 : : void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
449 : : void (*seq_stop)(struct seq_file *sf, void *v);
450 : :
451 : : /*
452 : : * write_u64() is a shortcut for the common case of accepting
453 : : * a single integer (as parsed by simple_strtoull) from
454 : : * userspace. Use in place of write(); return 0 or error.
455 : : */
456 : : int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
457 : : u64 val);
458 : : /*
459 : : * write_s64() is a signed version of write_u64()
460 : : */
461 : : int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
462 : : s64 val);
463 : :
464 : : /*
465 : : * write_string() is passed a nul-terminated kernelspace
466 : : * buffer of maximum length determined by max_write_len.
467 : : * Returns 0 or -ve error code.
468 : : */
469 : : int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
470 : : const char *buffer);
471 : : /*
472 : : * trigger() callback can be used to get some kick from the
473 : : * userspace, when the actual string written is not important
474 : : * at all. The private field can be used to determine the
475 : : * kick type for multiplexing.
476 : : */
477 : : int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
478 : : };
479 : :
480 : : /*
481 : : * cftype_sets describe cftypes belonging to a subsystem and are chained at
482 : : * cgroup_subsys->cftsets. Each cftset points to an array of cftypes
483 : : * terminated by zero length name.
484 : : */
485 : : struct cftype_set {
486 : : struct list_head node; /* chained at subsys->cftsets */
487 : : struct cftype *cfts;
488 : : };
489 : :
490 : : /*
491 : : * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't
492 : : * access directly.
493 : : */
494 : : struct cfent {
495 : : struct list_head node;
496 : : struct dentry *dentry;
497 : : struct cftype *type;
498 : : struct cgroup_subsys_state *css;
499 : :
500 : : /* file xattrs */
501 : : struct simple_xattrs xattrs;
502 : : };
503 : :
504 : : /* seq_file->private points to the following, only ->priv is public */
505 : : struct cgroup_open_file {
506 : : struct cfent *cfe;
507 : : void *priv;
508 : : };
509 : :
510 : : /*
511 : : * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
512 : : * function can be called as long as @cgrp is accessible.
513 : : */
514 : : static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
515 : : {
516 : 132 : return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
517 : : }
518 : :
519 : : /* Caller should hold rcu_read_lock() */
520 : : static inline const char *cgroup_name(const struct cgroup *cgrp)
521 : : {
522 : : return rcu_dereference(cgrp->name)->name;
523 : : }
524 : :
525 : : static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
526 : : {
527 : : struct cgroup_open_file *of = seq->private;
528 : 197 : return of->cfe->css;
529 : : }
530 : :
531 : : static inline struct cftype *seq_cft(struct seq_file *seq)
532 : : {
533 : : struct cgroup_open_file *of = seq->private;
534 : 212 : return of->cfe->type;
535 : : }
536 : :
537 : : int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
538 : : int cgroup_rm_cftypes(struct cftype *cfts);
539 : :
540 : : bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
541 : :
542 : : int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
543 : : int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
544 : :
545 : : int cgroup_task_count(const struct cgroup *cgrp);
546 : :
547 : : /*
548 : : * Control Group taskset, used to pass around set of tasks to cgroup_subsys
549 : : * methods.
550 : : */
551 : : struct cgroup_taskset;
552 : : struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
553 : : struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
554 : : struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
555 : : int subsys_id);
556 : : int cgroup_taskset_size(struct cgroup_taskset *tset);
557 : :
558 : : /**
559 : : * cgroup_taskset_for_each - iterate cgroup_taskset
560 : : * @task: the loop cursor
561 : : * @skip_css: skip if task's css matches this, %NULL to iterate through all
562 : : * @tset: taskset to iterate
563 : : */
564 : : #define cgroup_taskset_for_each(task, skip_css, tset) \
565 : : for ((task) = cgroup_taskset_first((tset)); (task); \
566 : : (task) = cgroup_taskset_next((tset))) \
567 : : if (!(skip_css) || \
568 : : cgroup_taskset_cur_css((tset), \
569 : : (skip_css)->ss->subsys_id) != (skip_css))
570 : :
571 : : /*
572 : : * Control Group subsystem type.
573 : : * See Documentation/cgroups/cgroups.txt for details
574 : : */
575 : :
576 : : struct cgroup_subsys {
577 : : struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
578 : : int (*css_online)(struct cgroup_subsys_state *css);
579 : : void (*css_offline)(struct cgroup_subsys_state *css);
580 : : void (*css_free)(struct cgroup_subsys_state *css);
581 : :
582 : : int (*allow_attach)(struct cgroup_subsys_state *css,
583 : : struct cgroup_taskset *tset);
584 : : int (*can_attach)(struct cgroup_subsys_state *css,
585 : : struct cgroup_taskset *tset);
586 : : void (*cancel_attach)(struct cgroup_subsys_state *css,
587 : : struct cgroup_taskset *tset);
588 : : void (*attach)(struct cgroup_subsys_state *css,
589 : : struct cgroup_taskset *tset);
590 : : void (*fork)(struct task_struct *task);
591 : : void (*exit)(struct cgroup_subsys_state *css,
592 : : struct cgroup_subsys_state *old_css,
593 : : struct task_struct *task);
594 : : void (*bind)(struct cgroup_subsys_state *root_css);
595 : :
596 : : int subsys_id;
597 : : int disabled;
598 : : int early_init;
599 : :
600 : : /*
601 : : * If %false, this subsystem is properly hierarchical -
602 : : * configuration, resource accounting and restriction on a parent
603 : : * cgroup cover those of its children. If %true, hierarchy support
604 : : * is broken in some ways - some subsystems ignore hierarchy
605 : : * completely while others are only implemented half-way.
606 : : *
607 : : * It's now disallowed to create nested cgroups if the subsystem is
608 : : * broken and cgroup core will emit a warning message on such
609 : : * cases. Eventually, all subsystems will be made properly
610 : : * hierarchical and this will go away.
611 : : */
612 : : bool broken_hierarchy;
613 : : bool warned_broken_hierarchy;
614 : :
615 : : #define MAX_CGROUP_TYPE_NAMELEN 32
616 : : const char *name;
617 : :
618 : : /* link to parent, protected by cgroup_lock() */
619 : : struct cgroupfs_root *root;
620 : :
621 : : /* list of cftype_sets */
622 : : struct list_head cftsets;
623 : :
624 : : /* base cftypes, automatically [de]registered with subsys itself */
625 : : struct cftype *base_cftypes;
626 : : struct cftype_set base_cftset;
627 : :
628 : : /* should be defined only by modular subsystems */
629 : : struct module *module;
630 : : };
631 : :
632 : : #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
633 : : #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
634 : : #include <linux/cgroup_subsys.h>
635 : : #undef IS_SUBSYS_ENABLED
636 : : #undef SUBSYS
637 : :
638 : : /**
639 : : * css_parent - find the parent css
640 : : * @css: the target cgroup_subsys_state
641 : : *
642 : : * Return the parent css of @css. This function is guaranteed to return
643 : : * non-NULL parent as long as @css isn't the root.
644 : : */
645 : : static inline
646 : : struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
647 : : {
648 : : return css->parent;
649 : : }
650 : :
651 : : /**
652 : : * task_css_set_check - obtain a task's css_set with extra access conditions
653 : : * @task: the task to obtain css_set for
654 : : * @__c: extra condition expression to be passed to rcu_dereference_check()
655 : : *
656 : : * A task's css_set is RCU protected, initialized and exited while holding
657 : : * task_lock(), and can only be modified while holding both cgroup_mutex
658 : : * and task_lock() while the task is alive. This macro verifies that the
659 : : * caller is inside proper critical section and returns @task's css_set.
660 : : *
661 : : * The caller can also specify additional allowed conditions via @__c, such
662 : : * as locks used during the cgroup_subsys::attach() methods.
663 : : */
664 : : #ifdef CONFIG_PROVE_RCU
665 : : extern struct mutex cgroup_mutex;
666 : : #define task_css_set_check(task, __c) \
667 : : rcu_dereference_check((task)->cgroups, \
668 : : lockdep_is_held(&(task)->alloc_lock) || \
669 : : lockdep_is_held(&cgroup_mutex) || (__c))
670 : : #else
671 : : #define task_css_set_check(task, __c) \
672 : : rcu_dereference((task)->cgroups)
673 : : #endif
674 : :
675 : : /**
676 : : * task_css_check - obtain css for (task, subsys) w/ extra access conds
677 : : * @task: the target task
678 : : * @subsys_id: the target subsystem ID
679 : : * @__c: extra condition expression to be passed to rcu_dereference_check()
680 : : *
681 : : * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
682 : : * synchronization rules are the same as task_css_set_check().
683 : : */
684 : : #define task_css_check(task, subsys_id, __c) \
685 : : task_css_set_check((task), (__c))->subsys[(subsys_id)]
686 : :
687 : : /**
688 : : * task_css_set - obtain a task's css_set
689 : : * @task: the task to obtain css_set for
690 : : *
691 : : * See task_css_set_check().
692 : : */
693 : : static inline struct css_set *task_css_set(struct task_struct *task)
694 : : {
695 : 2221480 : return task_css_set_check(task, false);
696 : : }
697 : :
698 : : /**
699 : : * task_css - obtain css for (task, subsys)
700 : : * @task: the target task
701 : : * @subsys_id: the target subsystem ID
702 : : *
703 : : * See task_css_check().
704 : : */
705 : : static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
706 : : int subsys_id)
707 : : {
708 : : return task_css_check(task, subsys_id, false);
709 : : }
710 : :
711 : : static inline struct cgroup *task_cgroup(struct task_struct *task,
712 : : int subsys_id)
713 : : {
714 : : return task_css(task, subsys_id)->cgroup;
715 : : }
716 : :
717 : : struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
718 : : struct cgroup_subsys_state *parent);
719 : :
720 : : struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
721 : :
722 : : /**
723 : : * css_for_each_child - iterate through children of a css
724 : : * @pos: the css * to use as the loop cursor
725 : : * @parent: css whose children to walk
726 : : *
727 : : * Walk @parent's children. Must be called under rcu_read_lock(). A child
728 : : * css which hasn't finished ->css_online() or already has finished
729 : : * ->css_offline() may show up during traversal and it's each subsystem's
730 : : * responsibility to verify that each @pos is alive.
731 : : *
732 : : * If a subsystem synchronizes against the parent in its ->css_online() and
733 : : * before starting iterating, a css which finished ->css_online() is
734 : : * guaranteed to be visible in the future iterations.
735 : : *
736 : : * It is allowed to temporarily drop RCU read lock during iteration. The
737 : : * caller is responsible for ensuring that @pos remains accessible until
738 : : * the start of the next iteration by, for example, bumping the css refcnt.
739 : : */
740 : : #define css_for_each_child(pos, parent) \
741 : : for ((pos) = css_next_child(NULL, (parent)); (pos); \
742 : : (pos) = css_next_child((pos), (parent)))
743 : :
744 : : struct cgroup_subsys_state *
745 : : css_next_descendant_pre(struct cgroup_subsys_state *pos,
746 : : struct cgroup_subsys_state *css);
747 : :
748 : : struct cgroup_subsys_state *
749 : : css_rightmost_descendant(struct cgroup_subsys_state *pos);
750 : :
751 : : /**
752 : : * css_for_each_descendant_pre - pre-order walk of a css's descendants
753 : : * @pos: the css * to use as the loop cursor
754 : : * @root: css whose descendants to walk
755 : : *
756 : : * Walk @root's descendants. @root is included in the iteration and the
757 : : * first node to be visited. Must be called under rcu_read_lock(). A
758 : : * descendant css which hasn't finished ->css_online() or already has
759 : : * finished ->css_offline() may show up during traversal and it's each
760 : : * subsystem's responsibility to verify that each @pos is alive.
761 : : *
762 : : * If a subsystem synchronizes against the parent in its ->css_online() and
763 : : * before starting iterating, and synchronizes against @pos on each
764 : : * iteration, any descendant css which finished ->css_online() is
765 : : * guaranteed to be visible in the future iterations.
766 : : *
767 : : * In other words, the following guarantees that a descendant can't escape
768 : : * state updates of its ancestors.
769 : : *
770 : : * my_online(@css)
771 : : * {
772 : : * Lock @css's parent and @css;
773 : : * Inherit state from the parent;
774 : : * Unlock both.
775 : : * }
776 : : *
777 : : * my_update_state(@css)
778 : : * {
779 : : * css_for_each_descendant_pre(@pos, @css) {
780 : : * Lock @pos;
781 : : * if (@pos == @css)
782 : : * Update @css's state;
783 : : * else
784 : : * Verify @pos is alive and inherit state from its parent;
785 : : * Unlock @pos;
786 : : * }
787 : : * }
788 : : *
789 : : * As long as the inheriting step, including checking the parent state, is
790 : : * enclosed inside @pos locking, double-locking the parent isn't necessary
791 : : * while inheriting. The state update to the parent is guaranteed to be
792 : : * visible by walking order and, as long as inheriting operations to the
793 : : * same @pos are atomic to each other, multiple updates racing each other
794 : : * still result in the correct state. It's guaranateed that at least one
795 : : * inheritance happens for any css after the latest update to its parent.
796 : : *
797 : : * If checking parent's state requires locking the parent, each inheriting
798 : : * iteration should lock and unlock both @pos->parent and @pos.
799 : : *
800 : : * Alternatively, a subsystem may choose to use a single global lock to
801 : : * synchronize ->css_online() and ->css_offline() against tree-walking
802 : : * operations.
803 : : *
804 : : * It is allowed to temporarily drop RCU read lock during iteration. The
805 : : * caller is responsible for ensuring that @pos remains accessible until
806 : : * the start of the next iteration by, for example, bumping the css refcnt.
807 : : */
808 : : #define css_for_each_descendant_pre(pos, css) \
809 : : for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
810 : : (pos) = css_next_descendant_pre((pos), (css)))
811 : :
812 : : struct cgroup_subsys_state *
813 : : css_next_descendant_post(struct cgroup_subsys_state *pos,
814 : : struct cgroup_subsys_state *css);
815 : :
816 : : /**
817 : : * css_for_each_descendant_post - post-order walk of a css's descendants
818 : : * @pos: the css * to use as the loop cursor
819 : : * @css: css whose descendants to walk
820 : : *
821 : : * Similar to css_for_each_descendant_pre() but performs post-order
822 : : * traversal instead. @root is included in the iteration and the last
823 : : * node to be visited. Note that the walk visibility guarantee described
824 : : * in pre-order walk doesn't apply the same to post-order walks.
825 : : */
826 : : #define css_for_each_descendant_post(pos, css) \
827 : : for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
828 : : (pos) = css_next_descendant_post((pos), (css)))
829 : :
830 : : /* A css_task_iter should be treated as an opaque object */
831 : : struct css_task_iter {
832 : : struct cgroup_subsys_state *origin_css;
833 : : struct list_head *cset_link;
834 : : struct list_head *task;
835 : : };
836 : :
837 : : void css_task_iter_start(struct cgroup_subsys_state *css,
838 : : struct css_task_iter *it);
839 : : struct task_struct *css_task_iter_next(struct css_task_iter *it);
840 : : void css_task_iter_end(struct css_task_iter *it);
841 : :
842 : : int css_scan_tasks(struct cgroup_subsys_state *css,
843 : : bool (*test)(struct task_struct *, void *),
844 : : void (*process)(struct task_struct *, void *),
845 : : void *data, struct ptr_heap *heap);
846 : :
847 : : int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
848 : : int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
849 : :
850 : : struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
851 : : struct cgroup_subsys *ss);
852 : :
853 : : #else /* !CONFIG_CGROUPS */
854 : :
855 : : static inline int cgroup_init_early(void) { return 0; }
856 : : static inline int cgroup_init(void) { return 0; }
857 : : static inline void cgroup_fork(struct task_struct *p) {}
858 : : static inline void cgroup_post_fork(struct task_struct *p) {}
859 : : static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
860 : :
861 : : static inline int cgroupstats_build(struct cgroupstats *stats,
862 : : struct dentry *dentry)
863 : : {
864 : : return -EINVAL;
865 : : }
866 : :
867 : : /* No cgroups - nothing to do */
868 : : static inline int cgroup_attach_task_all(struct task_struct *from,
869 : : struct task_struct *t)
870 : : {
871 : : return 0;
872 : : }
873 : :
874 : : #endif /* !CONFIG_CGROUPS */
875 : :
876 : : #endif /* _LINUX_CGROUP_H */
|