LCOV - code coverage report
Current view: top level - kernel - cgroup.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 556 1494 37.2 %
Date: 2014-04-07 Functions: 65 137 47.4 %
Branches: 233 1070 21.8 %

           Branch data     Line data    Source code
       1                 :            : /*
       2                 :            :  *  Generic process-grouping system.
       3                 :            :  *
       4                 :            :  *  Based originally on the cpuset system, extracted by Paul Menage
       5                 :            :  *  Copyright (C) 2006 Google, Inc
       6                 :            :  *
       7                 :            :  *  Notifications support
       8                 :            :  *  Copyright (C) 2009 Nokia Corporation
       9                 :            :  *  Author: Kirill A. Shutemov
      10                 :            :  *
      11                 :            :  *  Copyright notices from the original cpuset code:
      12                 :            :  *  --------------------------------------------------
      13                 :            :  *  Copyright (C) 2003 BULL SA.
      14                 :            :  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
      15                 :            :  *
      16                 :            :  *  Portions derived from Patrick Mochel's sysfs code.
      17                 :            :  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
      18                 :            :  *
      19                 :            :  *  2003-10-10 Written by Simon Derr.
      20                 :            :  *  2003-10-22 Updates by Stephen Hemminger.
      21                 :            :  *  2004 May-July Rework by Paul Jackson.
      22                 :            :  *  ---------------------------------------------------
      23                 :            :  *
      24                 :            :  *  This file is subject to the terms and conditions of the GNU General Public
      25                 :            :  *  License.  See the file COPYING in the main directory of the Linux
      26                 :            :  *  distribution for more details.
      27                 :            :  */
      28                 :            : 
      29                 :            : #include <linux/cgroup.h>
      30                 :            : #include <linux/cred.h>
      31                 :            : #include <linux/ctype.h>
      32                 :            : #include <linux/errno.h>
      33                 :            : #include <linux/init_task.h>
      34                 :            : #include <linux/kernel.h>
      35                 :            : #include <linux/list.h>
      36                 :            : #include <linux/mm.h>
      37                 :            : #include <linux/mutex.h>
      38                 :            : #include <linux/mount.h>
      39                 :            : #include <linux/pagemap.h>
      40                 :            : #include <linux/proc_fs.h>
      41                 :            : #include <linux/rcupdate.h>
      42                 :            : #include <linux/sched.h>
      43                 :            : #include <linux/backing-dev.h>
      44                 :            : #include <linux/seq_file.h>
      45                 :            : #include <linux/slab.h>
      46                 :            : #include <linux/magic.h>
      47                 :            : #include <linux/spinlock.h>
      48                 :            : #include <linux/string.h>
      49                 :            : #include <linux/sort.h>
      50                 :            : #include <linux/kmod.h>
      51                 :            : #include <linux/module.h>
      52                 :            : #include <linux/delayacct.h>
      53                 :            : #include <linux/cgroupstats.h>
      54                 :            : #include <linux/hashtable.h>
      55                 :            : #include <linux/namei.h>
      56                 :            : #include <linux/pid_namespace.h>
      57                 :            : #include <linux/idr.h>
      58                 :            : #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
      59                 :            : #include <linux/eventfd.h>
      60                 :            : #include <linux/poll.h>
      61                 :            : #include <linux/flex_array.h> /* used in cgroup_attach_task */
      62                 :            : #include <linux/kthread.h>
      63                 :            : #include <linux/file.h>
      64                 :            : 
      65                 :            : #include <linux/atomic.h>
      66                 :            : 
      67                 :            : /*
      68                 :            :  * cgroup_mutex is the master lock.  Any modification to cgroup or its
      69                 :            :  * hierarchy must be performed while holding it.
      70                 :            :  *
      71                 :            :  * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
      72                 :            :  * cgroupfs_root of any cgroup hierarchy - subsys list, flags,
      73                 :            :  * release_agent_path and so on.  Modifying requires both cgroup_mutex and
      74                 :            :  * cgroup_root_mutex.  Readers can acquire either of the two.  This is to
      75                 :            :  * break the following locking order cycle.
      76                 :            :  *
      77                 :            :  *  A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
      78                 :            :  *  B. namespace_sem -> cgroup_mutex
      79                 :            :  *
      80                 :            :  * B happens only through cgroup_show_options() and using cgroup_root_mutex
      81                 :            :  * breaks it.
      82                 :            :  */
      83                 :            : #ifdef CONFIG_PROVE_RCU
      84                 :            : DEFINE_MUTEX(cgroup_mutex);
      85                 :            : EXPORT_SYMBOL_GPL(cgroup_mutex);        /* only for lockdep */
      86                 :            : #else
      87                 :            : static DEFINE_MUTEX(cgroup_mutex);
      88                 :            : #endif
      89                 :            : 
      90                 :            : static DEFINE_MUTEX(cgroup_root_mutex);
      91                 :            : 
      92                 :            : /*
      93                 :            :  * cgroup destruction makes heavy use of work items and there can be a lot
      94                 :            :  * of concurrent destructions.  Use a separate workqueue so that cgroup
      95                 :            :  * destruction work items don't end up filling up max_active of system_wq
      96                 :            :  * which may lead to deadlock.
      97                 :            :  */
      98                 :            : static struct workqueue_struct *cgroup_destroy_wq;
      99                 :            : 
     100                 :            : /*
     101                 :            :  * Generate an array of cgroup subsystem pointers. At boot time, this is
     102                 :            :  * populated with the built in subsystems, and modular subsystems are
     103                 :            :  * registered after that. The mutable section of this array is protected by
     104                 :            :  * cgroup_mutex.
     105                 :            :  */
     106                 :            : #define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
     107                 :            : #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
     108                 :            : static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = {
     109                 :            : #include <linux/cgroup_subsys.h>
     110                 :            : };
     111                 :            : 
     112                 :            : /*
     113                 :            :  * The dummy hierarchy, reserved for the subsystems that are otherwise
     114                 :            :  * unattached - it never has more than a single cgroup, and all tasks are
     115                 :            :  * part of that cgroup.
     116                 :            :  */
     117                 :            : static struct cgroupfs_root cgroup_dummy_root;
     118                 :            : 
     119                 :            : /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
     120                 :            : static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
     121                 :            : 
     122                 :            : /*
     123                 :            :  * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
     124                 :            :  */
     125                 :            : struct cfent {
     126                 :            :         struct list_head                node;
     127                 :            :         struct dentry                   *dentry;
     128                 :            :         struct cftype                   *type;
     129                 :            :         struct cgroup_subsys_state      *css;
     130                 :            : 
     131                 :            :         /* file xattrs */
     132                 :            :         struct simple_xattrs            xattrs;
     133                 :            : };
     134                 :            : 
     135                 :            : /*
     136                 :            :  * cgroup_event represents events which userspace want to receive.
     137                 :            :  */
     138                 :            : struct cgroup_event {
     139                 :            :         /*
     140                 :            :          * css which the event belongs to.
     141                 :            :          */
     142                 :            :         struct cgroup_subsys_state *css;
     143                 :            :         /*
     144                 :            :          * Control file which the event associated.
     145                 :            :          */
     146                 :            :         struct cftype *cft;
     147                 :            :         /*
     148                 :            :          * eventfd to signal userspace about the event.
     149                 :            :          */
     150                 :            :         struct eventfd_ctx *eventfd;
     151                 :            :         /*
     152                 :            :          * Each of these stored in a list by the cgroup.
     153                 :            :          */
     154                 :            :         struct list_head list;
     155                 :            :         /*
     156                 :            :          * All fields below needed to unregister event when
     157                 :            :          * userspace closes eventfd.
     158                 :            :          */
     159                 :            :         poll_table pt;
     160                 :            :         wait_queue_head_t *wqh;
     161                 :            :         wait_queue_t wait;
     162                 :            :         struct work_struct remove;
     163                 :            : };
     164                 :            : 
     165                 :            : /* The list of hierarchy roots */
     166                 :            : 
     167                 :            : static LIST_HEAD(cgroup_roots);
     168                 :            : static int cgroup_root_count;
     169                 :            : 
     170                 :            : /*
     171                 :            :  * Hierarchy ID allocation and mapping.  It follows the same exclusion
     172                 :            :  * rules as other root ops - both cgroup_mutex and cgroup_root_mutex for
     173                 :            :  * writes, either for reads.
     174                 :            :  */
     175                 :            : static DEFINE_IDR(cgroup_hierarchy_idr);
     176                 :            : 
     177                 :            : static struct cgroup_name root_cgroup_name = { .name = "/" };
     178                 :            : 
     179                 :            : /*
     180                 :            :  * Assign a monotonically increasing serial number to cgroups.  It
     181                 :            :  * guarantees cgroups with bigger numbers are newer than those with smaller
     182                 :            :  * numbers.  Also, as cgroups are always appended to the parent's
     183                 :            :  * ->children list, it guarantees that sibling cgroups are always sorted in
     184                 :            :  * the ascending serial number order on the list.  Protected by
     185                 :            :  * cgroup_mutex.
     186                 :            :  */
     187                 :            : static u64 cgroup_serial_nr_next = 1;
     188                 :            : 
     189                 :            : /* This flag indicates whether tasks in the fork and exit paths should
     190                 :            :  * check for fork/exit handlers to call. This avoids us having to do
     191                 :            :  * extra work in the fork/exit path if none of the subsystems need to
     192                 :            :  * be called.
     193                 :            :  */
     194                 :            : static int need_forkexit_callback __read_mostly;
     195                 :            : 
     196                 :            : static struct cftype cgroup_base_files[];
     197                 :            : 
     198                 :            : static void cgroup_destroy_css_killed(struct cgroup *cgrp);
     199                 :            : static int cgroup_destroy_locked(struct cgroup *cgrp);
     200                 :            : static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
     201                 :            :                               bool is_add);
     202                 :            : static int cgroup_file_release(struct inode *inode, struct file *file);
     203                 :            : 
     204                 :            : /**
     205                 :            :  * cgroup_css - obtain a cgroup's css for the specified subsystem
     206                 :            :  * @cgrp: the cgroup of interest
     207                 :            :  * @ss: the subsystem of interest (%NULL returns the dummy_css)
     208                 :            :  *
     209                 :            :  * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
     210                 :            :  * function must be called either under cgroup_mutex or rcu_read_lock() and
     211                 :            :  * the caller is responsible for pinning the returned css if it wants to
     212                 :            :  * keep accessing it outside the said locks.  This function may return
     213                 :            :  * %NULL if @cgrp doesn't have @subsys_id enabled.
     214                 :            :  */
     215                 :            : static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
     216                 :            :                                               struct cgroup_subsys *ss)
     217                 :            : {
     218 [ #  # ][ #  #  :          5 :         if (ss)
          #  #  #  #  #  
              # ][ #  # ]
         [ #  # ][ #  # ]
           [ #  #  #  # ]
                 [ #  # ]
           [ #  #  #  # ]
           [ #  #  -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     219                 :          0 :                 return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
     220                 :            :                                              lockdep_is_held(&cgroup_mutex));
     221                 :            :         else
     222                 :          5 :                 return &cgrp->dummy_css;
     223                 :            : }
     224                 :            : 
     225                 :            : /* convenient tests for these bits */
     226                 :            : static inline bool cgroup_is_dead(const struct cgroup *cgrp)
     227                 :            : {
     228                 :          0 :         return test_bit(CGRP_DEAD, &cgrp->flags);
     229                 :            : }
     230                 :            : 
     231                 :            : /**
     232                 :            :  * cgroup_is_descendant - test ancestry
     233                 :            :  * @cgrp: the cgroup to be tested
     234                 :            :  * @ancestor: possible ancestor of @cgrp
     235                 :            :  *
     236                 :            :  * Test whether @cgrp is a descendant of @ancestor.  It also returns %true
     237                 :            :  * if @cgrp == @ancestor.  This function is safe to call as long as @cgrp
     238                 :            :  * and @ancestor are accessible.
     239                 :            :  */
     240                 :          0 : bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
     241                 :            : {
     242         [ #  # ]:          0 :         while (cgrp) {
     243         [ #  # ]:          0 :                 if (cgrp == ancestor)
     244                 :            :                         return true;
     245                 :          0 :                 cgrp = cgrp->parent;
     246                 :            :         }
     247                 :            :         return false;
     248                 :            : }
     249                 :            : EXPORT_SYMBOL_GPL(cgroup_is_descendant);
     250                 :            : 
     251                 :            : static int cgroup_is_releasable(const struct cgroup *cgrp)
     252                 :            : {
     253                 :            :         const int bits =
     254                 :            :                 (1 << CGRP_RELEASABLE) |
     255                 :            :                 (1 << CGRP_NOTIFY_ON_RELEASE);
     256                 :          2 :         return (cgrp->flags & bits) == bits;
     257                 :            : }
     258                 :            : 
     259                 :            : static int notify_on_release(const struct cgroup *cgrp)
     260                 :            : {
     261                 :            :         return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
     262                 :            : }
     263                 :            : 
     264                 :            : /**
     265                 :            :  * for_each_subsys - iterate all loaded cgroup subsystems
     266                 :            :  * @ss: the iteration cursor
     267                 :            :  * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
     268                 :            :  *
     269                 :            :  * Should be called under cgroup_mutex.
     270                 :            :  */
     271                 :            : #define for_each_subsys(ss, i)                                          \
     272                 :            :         for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++)                      \
     273                 :            :                 if (({ lockdep_assert_held(&cgroup_mutex);          \
     274                 :            :                        !((ss) = cgroup_subsys[i]); })) { }              \
     275                 :            :                 else
     276                 :            : 
     277                 :            : /**
     278                 :            :  * for_each_builtin_subsys - iterate all built-in cgroup subsystems
     279                 :            :  * @ss: the iteration cursor
     280                 :            :  * @i: the index of @ss, CGROUP_BUILTIN_SUBSYS_COUNT after reaching the end
     281                 :            :  *
     282                 :            :  * Bulit-in subsystems are always present and iteration itself doesn't
     283                 :            :  * require any synchronization.
     284                 :            :  */
     285                 :            : #define for_each_builtin_subsys(ss, i)                                  \
     286                 :            :         for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT &&           \
     287                 :            :              (((ss) = cgroup_subsys[i]) || true); (i)++)
     288                 :            : 
     289                 :            : /* iterate each subsystem attached to a hierarchy */
     290                 :            : #define for_each_root_subsys(root, ss)                                  \
     291                 :            :         list_for_each_entry((ss), &(root)->subsys_list, sibling)
     292                 :            : 
     293                 :            : /* iterate across the active hierarchies */
     294                 :            : #define for_each_active_root(root)                                      \
     295                 :            :         list_for_each_entry((root), &cgroup_roots, root_list)
     296                 :            : 
     297                 :            : static inline struct cgroup *__d_cgrp(struct dentry *dentry)
     298                 :            : {
     299                 :            :         return dentry->d_fsdata;
     300                 :            : }
     301                 :            : 
     302                 :            : static inline struct cfent *__d_cfe(struct dentry *dentry)
     303                 :            : {
     304                 :            :         return dentry->d_fsdata;
     305                 :            : }
     306                 :            : 
     307                 :          0 : static inline struct cftype *__d_cft(struct dentry *dentry)
     308                 :            : {
     309                 :          0 :         return __d_cfe(dentry)->type;
     310                 :            : }
     311                 :            : 
     312                 :            : /**
     313                 :            :  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
     314                 :            :  * @cgrp: the cgroup to be checked for liveness
     315                 :            :  *
     316                 :            :  * On success, returns true; the mutex should be later unlocked.  On
     317                 :            :  * failure returns false with no lock held.
     318                 :            :  */
     319                 :          0 : static bool cgroup_lock_live_group(struct cgroup *cgrp)
     320                 :            : {
     321                 :          2 :         mutex_lock(&cgroup_mutex);
     322         [ -  + ]:          2 :         if (cgroup_is_dead(cgrp)) {
     323                 :          0 :                 mutex_unlock(&cgroup_mutex);
     324                 :          0 :                 return false;
     325                 :            :         }
     326                 :            :         return true;
     327                 :            : }
     328                 :            : 
     329                 :            : /* the list of cgroups eligible for automatic release. Protected by
     330                 :            :  * release_list_lock */
     331                 :            : static LIST_HEAD(release_list);
     332                 :            : static DEFINE_RAW_SPINLOCK(release_list_lock);
     333                 :            : static void cgroup_release_agent(struct work_struct *work);
     334                 :            : static DECLARE_WORK(release_agent_work, cgroup_release_agent);
     335                 :            : static void check_for_release(struct cgroup *cgrp);
     336                 :            : 
     337                 :            : /*
     338                 :            :  * A cgroup can be associated with multiple css_sets as different tasks may
     339                 :            :  * belong to different cgroups on different hierarchies.  In the other
     340                 :            :  * direction, a css_set is naturally associated with multiple cgroups.
     341                 :            :  * This M:N relationship is represented by the following link structure
     342                 :            :  * which exists for each association and allows traversing the associations
     343                 :            :  * from both sides.
     344                 :            :  */
     345                 :            : struct cgrp_cset_link {
     346                 :            :         /* the cgroup and css_set this link associates */
     347                 :            :         struct cgroup           *cgrp;
     348                 :            :         struct css_set          *cset;
     349                 :            : 
     350                 :            :         /* list of cgrp_cset_links anchored at cgrp->cset_links */
     351                 :            :         struct list_head        cset_link;
     352                 :            : 
     353                 :            :         /* list of cgrp_cset_links anchored at css_set->cgrp_links */
     354                 :            :         struct list_head        cgrp_link;
     355                 :            : };
     356                 :            : 
     357                 :            : /* The default css_set - used by init and its children prior to any
     358                 :            :  * hierarchies being mounted. It contains a pointer to the root state
     359                 :            :  * for each subsystem. Also used to anchor the list of css_sets. Not
     360                 :            :  * reference-counted, to improve performance when child cgroups
     361                 :            :  * haven't been created.
     362                 :            :  */
     363                 :            : 
     364                 :            : static struct css_set init_css_set;
     365                 :            : static struct cgrp_cset_link init_cgrp_cset_link;
     366                 :            : 
     367                 :            : /*
     368                 :            :  * css_set_lock protects the list of css_set objects, and the chain of
     369                 :            :  * tasks off each css_set.  Nests outside task->alloc_lock due to
     370                 :            :  * css_task_iter_start().
     371                 :            :  */
     372                 :            : static DEFINE_RWLOCK(css_set_lock);
     373                 :            : static int css_set_count;
     374                 :            : 
     375                 :            : /*
     376                 :            :  * hash table for cgroup groups. This improves the performance to find
     377                 :            :  * an existing css_set. This hash doesn't (currently) take into
     378                 :            :  * account cgroups in empty hierarchies.
     379                 :            :  */
     380                 :            : #define CSS_SET_HASH_BITS       7
     381                 :            : static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
     382                 :            : 
     383                 :            : static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
     384                 :            : {
     385                 :            :         unsigned long key = 0UL;
     386                 :            :         struct cgroup_subsys *ss;
     387                 :            :         int i;
     388                 :            : 
     389                 :            :         for_each_subsys(ss, i)
     390                 :            :                 key += (unsigned long)css[i];
     391                 :            :         key = (key >> 16) ^ key;
     392                 :            : 
     393                 :            :         return key;
     394                 :            : }
     395                 :            : 
     396                 :            : /*
     397                 :            :  * We don't maintain the lists running through each css_set to its task
     398                 :            :  * until after the first call to css_task_iter_start().  This reduces the
     399                 :            :  * fork()/exit() overhead for people who have cgroups compiled into their
     400                 :            :  * kernel but not actually in use.
     401                 :            :  */
     402                 :            : static int use_task_css_set_links __read_mostly;
     403                 :            : 
     404                 :          0 : static void __put_css_set(struct css_set *cset, int taskexit)
     405                 :            : {
     406                 :            :         struct cgrp_cset_link *link, *tmp_link;
     407                 :            : 
     408                 :            :         /*
     409                 :            :          * Ensure that the refcount doesn't hit zero while any readers
     410                 :            :          * can see it. Similar to atomic_dec_and_lock(), but for an
     411                 :            :          * rwlock
     412                 :            :          */
     413         [ -  + ]:    1122957 :         if (atomic_add_unless(&cset->refcount, -1, 1))
     414                 :            :                 return;
     415                 :          0 :         write_lock(&css_set_lock);
     416         [ #  # ]:    1122967 :         if (!atomic_dec_and_test(&cset->refcount)) {
     417                 :            :                 write_unlock(&css_set_lock);
     418                 :            :                 return;
     419                 :            :         }
     420                 :            : 
     421                 :            :         /* This css_set is dead. unlink it and release cgroup refcounts */
     422                 :            :         hash_del(&cset->hlist);
     423                 :          0 :         css_set_count--;
     424                 :            : 
     425         [ #  # ]:          0 :         list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
     426                 :          0 :                 struct cgroup *cgrp = link->cgrp;
     427                 :            : 
     428                 :            :                 list_del(&link->cset_link);
     429                 :            :                 list_del(&link->cgrp_link);
     430                 :            : 
     431                 :            :                 /* @cgrp can't go away while we're holding css_set_lock */
     432 [ #  # ][ #  # ]:          0 :                 if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
     433         [ #  # ]:          0 :                         if (taskexit)
     434                 :          0 :                                 set_bit(CGRP_RELEASABLE, &cgrp->flags);
     435                 :          0 :                         check_for_release(cgrp);
     436                 :            :                 }
     437                 :            : 
     438                 :          0 :                 kfree(link);
     439                 :            :         }
     440                 :            : 
     441                 :            :         write_unlock(&css_set_lock);
     442                 :          0 :         kfree_rcu(cset, rcu_head);
     443                 :            : }
     444                 :            : 
     445                 :            : /*
     446                 :            :  * refcounted get/put for css_set objects
     447                 :            :  */
     448                 :            : static inline void get_css_set(struct css_set *cset)
     449                 :            : {
     450                 :    1122974 :         atomic_inc(&cset->refcount);
     451                 :            : }
     452                 :            : 
     453                 :            : static inline void put_css_set(struct css_set *cset)
     454                 :            : {
     455                 :          0 :         __put_css_set(cset, 0);
     456                 :            : }
     457                 :            : 
     458                 :            : static inline void put_css_set_taskexit(struct css_set *cset)
     459                 :            : {
     460                 :    1122970 :         __put_css_set(cset, 1);
     461                 :            : }
     462                 :            : 
     463                 :            : /**
     464                 :            :  * compare_css_sets - helper function for find_existing_css_set().
     465                 :            :  * @cset: candidate css_set being tested
     466                 :            :  * @old_cset: existing css_set for a task
     467                 :            :  * @new_cgrp: cgroup that's being entered by the task
     468                 :            :  * @template: desired set of css pointers in css_set (pre-calculated)
     469                 :            :  *
     470                 :            :  * Returns true if "cset" matches "old_cset" except for the hierarchy
     471                 :            :  * which "new_cgrp" belongs to, for which it should match "new_cgrp".
     472                 :            :  */
     473                 :          0 : static bool compare_css_sets(struct css_set *cset,
     474                 :            :                              struct css_set *old_cset,
     475                 :            :                              struct cgroup *new_cgrp,
     476                 :            :                              struct cgroup_subsys_state *template[])
     477                 :            : {
     478                 :            :         struct list_head *l1, *l2;
     479                 :            : 
     480                 :            :         if (memcmp(template, cset->subsys, sizeof(cset->subsys))) {
     481                 :            :                 /* Not all subsystems matched */
     482                 :            :                 return false;
     483                 :            :         }
     484                 :            : 
     485                 :            :         /*
     486                 :            :          * Compare cgroup pointers in order to distinguish between
     487                 :            :          * different cgroups in heirarchies with no subsystems. We
     488                 :            :          * could get by with just this check alone (and skip the
     489                 :            :          * memcmp above) but on most setups the memcmp check will
     490                 :            :          * avoid the need for this more expensive check on almost all
     491                 :            :          * candidates.
     492                 :            :          */
     493                 :            : 
     494                 :          0 :         l1 = &cset->cgrp_links;
     495                 :          0 :         l2 = &old_cset->cgrp_links;
     496                 :            :         while (1) {
     497                 :            :                 struct cgrp_cset_link *link1, *link2;
     498                 :            :                 struct cgroup *cgrp1, *cgrp2;
     499                 :            : 
     500                 :          0 :                 l1 = l1->next;
     501                 :          0 :                 l2 = l2->next;
     502                 :            :                 /* See if we reached the end - both lists are equal length. */
     503         [ #  # ]:          0 :                 if (l1 == &cset->cgrp_links) {
     504         [ #  # ]:          0 :                         BUG_ON(l2 != &old_cset->cgrp_links);
     505                 :            :                         break;
     506                 :            :                 } else {
     507         [ #  # ]:          0 :                         BUG_ON(l2 == &old_cset->cgrp_links);
     508                 :            :                 }
     509                 :            :                 /* Locate the cgroups associated with these links. */
     510                 :            :                 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
     511                 :            :                 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
     512                 :          0 :                 cgrp1 = link1->cgrp;
     513                 :          0 :                 cgrp2 = link2->cgrp;
     514                 :            :                 /* Hierarchies should be linked in the same order. */
     515         [ #  # ]:          0 :                 BUG_ON(cgrp1->root != cgrp2->root);
     516                 :            : 
     517                 :            :                 /*
     518                 :            :                  * If this hierarchy is the hierarchy of the cgroup
     519                 :            :                  * that's changing, then we need to check that this
     520                 :            :                  * css_set points to the new cgroup; if it's any other
     521                 :            :                  * hierarchy, then this css_set should point to the
     522                 :            :                  * same cgroup as the old css_set.
     523                 :            :                  */
     524         [ #  # ]:          0 :                 if (cgrp1->root == new_cgrp->root) {
     525         [ #  # ]:          0 :                         if (cgrp1 != new_cgrp)
     526                 :            :                                 return false;
     527                 :            :                 } else {
     528         [ #  # ]:          0 :                         if (cgrp1 != cgrp2)
     529                 :            :                                 return false;
     530                 :            :                 }
     531                 :            :         }
     532                 :            :         return true;
     533                 :            : }
     534                 :            : 
     535                 :            : /**
     536                 :            :  * find_existing_css_set - init css array and find the matching css_set
     537                 :            :  * @old_cset: the css_set that we're using before the cgroup transition
     538                 :            :  * @cgrp: the cgroup that we're moving into
     539                 :            :  * @template: out param for the new set of csses, should be clear on entry
     540                 :            :  */
     541                 :          0 : static struct css_set *find_existing_css_set(struct css_set *old_cset,
     542                 :            :                                         struct cgroup *cgrp,
     543                 :            :                                         struct cgroup_subsys_state *template[])
     544                 :            : {
     545                 :            :         struct cgroupfs_root *root = cgrp->root;
     546                 :            :         struct cgroup_subsys *ss;
     547                 :            :         struct css_set *cset;
     548                 :            :         unsigned long key;
     549                 :            :         int i;
     550                 :            : 
     551                 :            :         /*
     552                 :            :          * Build the set of subsystem state objects that we want to see in the
     553                 :            :          * new css_set. while subsystems can change globally, the entries here
     554                 :            :          * won't change, so no need for locking.
     555                 :            :          */
     556                 :            :         for_each_subsys(ss, i) {
     557                 :            :                 if (root->subsys_mask & (1UL << i)) {
     558                 :            :                         /* Subsystem is in this hierarchy. So we want
     559                 :            :                          * the subsystem state from the new
     560                 :            :                          * cgroup */
     561                 :            :                         template[i] = cgroup_css(cgrp, ss);
     562                 :            :                 } else {
     563                 :            :                         /* Subsystem is not in this hierarchy, so we
     564                 :            :                          * don't want to change the subsystem state */
     565                 :            :                         template[i] = old_cset->subsys[i];
     566                 :            :                 }
     567                 :            :         }
     568                 :            : 
     569                 :            :         key = css_set_hash(template);
     570 [ #  # ][ #  # ]:          0 :         hash_for_each_possible(css_set_table, cset, hlist, key) {
                 [ #  # ]
     571         [ #  # ]:          0 :                 if (!compare_css_sets(cset, old_cset, cgrp, template))
     572                 :          0 :                         continue;
     573                 :            : 
     574                 :            :                 /* This css_set matches what we need */
     575                 :            :                 return cset;
     576                 :            :         }
     577                 :            : 
     578                 :            :         /* No existing cgroup group matched */
     579                 :            :         return NULL;
     580                 :            : }
     581                 :            : 
     582                 :          0 : static void free_cgrp_cset_links(struct list_head *links_to_free)
     583                 :            : {
     584                 :            :         struct cgrp_cset_link *link, *tmp_link;
     585                 :            : 
     586         [ -  + ]:          3 :         list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
     587                 :            :                 list_del(&link->cset_link);
     588                 :          0 :                 kfree(link);
     589                 :            :         }
     590                 :          3 : }
     591                 :            : 
     592                 :            : /**
     593                 :            :  * allocate_cgrp_cset_links - allocate cgrp_cset_links
     594                 :            :  * @count: the number of links to allocate
     595                 :            :  * @tmp_links: list_head the allocated links are put on
     596                 :            :  *
     597                 :            :  * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
     598                 :            :  * through ->cset_link.  Returns 0 on success or -errno.
     599                 :            :  */
     600                 :          0 : static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
     601                 :            : {
     602                 :            :         struct cgrp_cset_link *link;
     603                 :            :         int i;
     604                 :            : 
     605                 :            :         INIT_LIST_HEAD(tmp_links);
     606                 :            : 
     607         [ +  + ]:          6 :         for (i = 0; i < count; i++) {
     608                 :            :                 link = kzalloc(sizeof(*link), GFP_KERNEL);
     609         [ -  + ]:          3 :                 if (!link) {
     610                 :          0 :                         free_cgrp_cset_links(tmp_links);
     611                 :          0 :                         return -ENOMEM;
     612                 :            :                 }
     613                 :          3 :                 list_add(&link->cset_link, tmp_links);
     614                 :            :         }
     615                 :            :         return 0;
     616                 :            : }
     617                 :            : 
     618                 :            : /**
     619                 :            :  * link_css_set - a helper function to link a css_set to a cgroup
     620                 :            :  * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
     621                 :            :  * @cset: the css_set to be linked
     622                 :            :  * @cgrp: the destination cgroup
     623                 :            :  */
     624                 :          0 : static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
     625                 :            :                          struct cgroup *cgrp)
     626                 :            : {
     627                 :            :         struct cgrp_cset_link *link;
     628                 :            : 
     629         [ -  + ]:          3 :         BUG_ON(list_empty(tmp_links));
     630                 :            :         link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
     631                 :          3 :         link->cset = cset;
     632                 :          3 :         link->cgrp = cgrp;
     633                 :          3 :         list_move(&link->cset_link, &cgrp->cset_links);
     634                 :            :         /*
     635                 :            :          * Always add links to the tail of the list so that the list
     636                 :            :          * is sorted by order of hierarchy creation
     637                 :            :          */
     638                 :          3 :         list_add_tail(&link->cgrp_link, &cset->cgrp_links);
     639                 :          3 : }
     640                 :            : 
     641                 :            : /**
     642                 :            :  * find_css_set - return a new css_set with one cgroup updated
     643                 :            :  * @old_cset: the baseline css_set
     644                 :            :  * @cgrp: the cgroup to be updated
     645                 :            :  *
     646                 :            :  * Return a new css_set that's equivalent to @old_cset, but with @cgrp
     647                 :            :  * substituted into the appropriate hierarchy.
     648                 :            :  */
     649                 :          0 : static struct css_set *find_css_set(struct css_set *old_cset,
     650                 :            :                                     struct cgroup *cgrp)
     651                 :            : {
     652                 :            :         struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
     653                 :            :         struct css_set *cset;
     654                 :            :         struct list_head tmp_links;
     655                 :            :         struct cgrp_cset_link *link;
     656                 :            :         unsigned long key;
     657                 :            : 
     658                 :            :         lockdep_assert_held(&cgroup_mutex);
     659                 :            : 
     660                 :            :         /* First see if we already have a cgroup group that matches
     661                 :            :          * the desired set */
     662                 :          0 :         read_lock(&css_set_lock);
     663                 :          0 :         cset = find_existing_css_set(old_cset, cgrp, template);
     664         [ #  # ]:          0 :         if (cset)
     665                 :            :                 get_css_set(cset);
     666                 :            :         read_unlock(&css_set_lock);
     667                 :            : 
     668         [ #  # ]:          0 :         if (cset)
     669                 :            :                 return cset;
     670                 :            : 
     671                 :            :         cset = kzalloc(sizeof(*cset), GFP_KERNEL);
     672         [ #  # ]:          0 :         if (!cset)
     673                 :            :                 return NULL;
     674                 :            : 
     675                 :            :         /* Allocate all the cgrp_cset_link objects that we'll need */
     676         [ #  # ]:          0 :         if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
     677                 :          0 :                 kfree(cset);
     678                 :          0 :                 return NULL;
     679                 :            :         }
     680                 :            : 
     681                 :          0 :         atomic_set(&cset->refcount, 1);
     682                 :          0 :         INIT_LIST_HEAD(&cset->cgrp_links);
     683                 :          0 :         INIT_LIST_HEAD(&cset->tasks);
     684                 :            :         INIT_HLIST_NODE(&cset->hlist);
     685                 :            : 
     686                 :            :         /* Copy the set of subsystem state objects generated in
     687                 :            :          * find_existing_css_set() */
     688                 :            :         memcpy(cset->subsys, template, sizeof(cset->subsys));
     689                 :            : 
     690                 :          0 :         write_lock(&css_set_lock);
     691                 :            :         /* Add reference counts and links from the new css_set. */
     692         [ #  # ]:          0 :         list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
     693                 :          0 :                 struct cgroup *c = link->cgrp;
     694                 :            : 
     695         [ #  # ]:          0 :                 if (c->root == cgrp->root)
     696                 :            :                         c = cgrp;
     697                 :          0 :                 link_css_set(&tmp_links, cset, c);
     698                 :            :         }
     699                 :            : 
     700         [ #  # ]:          0 :         BUG_ON(!list_empty(&tmp_links));
     701                 :            : 
     702                 :          0 :         css_set_count++;
     703                 :            : 
     704                 :            :         /* Add this cgroup group to the hash table */
     705                 :            :         key = css_set_hash(cset->subsys);
     706                 :          0 :         hash_add(css_set_table, &cset->hlist, key);
     707                 :            : 
     708                 :            :         write_unlock(&css_set_lock);
     709                 :            : 
     710                 :          0 :         return cset;
     711                 :            : }
     712                 :            : 
     713                 :            : /*
     714                 :            :  * Return the cgroup for "task" from the given hierarchy. Must be
     715                 :            :  * called with cgroup_mutex held.
     716                 :            :  */
     717                 :          0 : static struct cgroup *task_cgroup_from_root(struct task_struct *task,
     718                 :            :                                             struct cgroupfs_root *root)
     719                 :            : {
     720                 :            :         struct css_set *cset;
     721                 :            :         struct cgroup *res = NULL;
     722                 :            : 
     723         [ #  # ]:          0 :         BUG_ON(!mutex_is_locked(&cgroup_mutex));
     724                 :          0 :         read_lock(&css_set_lock);
     725                 :            :         /*
     726                 :            :          * No need to lock the task - since we hold cgroup_mutex the
     727                 :            :          * task can't change groups, so the only thing that can happen
     728                 :            :          * is that it exits and its css is set back to init_css_set.
     729                 :            :          */
     730                 :            :         cset = task_css_set(task);
     731         [ #  # ]:          0 :         if (cset == &init_css_set) {
     732                 :          0 :                 res = &root->top_cgroup;
     733                 :            :         } else {
     734                 :            :                 struct cgrp_cset_link *link;
     735                 :            : 
     736         [ #  # ]:          0 :                 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
     737                 :          0 :                         struct cgroup *c = link->cgrp;
     738                 :            : 
     739         [ #  # ]:          0 :                         if (c->root == root) {
     740                 :            :                                 res = c;
     741                 :            :                                 break;
     742                 :            :                         }
     743                 :            :                 }
     744                 :            :         }
     745                 :            :         read_unlock(&css_set_lock);
     746         [ #  # ]:          0 :         BUG_ON(!res);
     747                 :          0 :         return res;
     748                 :            : }
     749                 :            : 
     750                 :            : /*
     751                 :            :  * There is one global cgroup mutex. We also require taking
     752                 :            :  * task_lock() when dereferencing a task's cgroup subsys pointers.
     753                 :            :  * See "The task_lock() exception", at the end of this comment.
     754                 :            :  *
     755                 :            :  * A task must hold cgroup_mutex to modify cgroups.
     756                 :            :  *
     757                 :            :  * Any task can increment and decrement the count field without lock.
     758                 :            :  * So in general, code holding cgroup_mutex can't rely on the count
     759                 :            :  * field not changing.  However, if the count goes to zero, then only
     760                 :            :  * cgroup_attach_task() can increment it again.  Because a count of zero
     761                 :            :  * means that no tasks are currently attached, therefore there is no
     762                 :            :  * way a task attached to that cgroup can fork (the other way to
     763                 :            :  * increment the count).  So code holding cgroup_mutex can safely
     764                 :            :  * assume that if the count is zero, it will stay zero. Similarly, if
     765                 :            :  * a task holds cgroup_mutex on a cgroup with zero count, it
     766                 :            :  * knows that the cgroup won't be removed, as cgroup_rmdir()
     767                 :            :  * needs that mutex.
     768                 :            :  *
     769                 :            :  * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
     770                 :            :  * (usually) take cgroup_mutex.  These are the two most performance
     771                 :            :  * critical pieces of code here.  The exception occurs on cgroup_exit(),
     772                 :            :  * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
     773                 :            :  * is taken, and if the cgroup count is zero, a usermode call made
     774                 :            :  * to the release agent with the name of the cgroup (path relative to
     775                 :            :  * the root of cgroup file system) as the argument.
     776                 :            :  *
     777                 :            :  * A cgroup can only be deleted if both its 'count' of using tasks
     778                 :            :  * is zero, and its list of 'children' cgroups is empty.  Since all
     779                 :            :  * tasks in the system use _some_ cgroup, and since there is always at
     780                 :            :  * least one task in the system (init, pid == 1), therefore, top_cgroup
     781                 :            :  * always has either children cgroups and/or using tasks.  So we don't
     782                 :            :  * need a special hack to ensure that top_cgroup cannot be deleted.
     783                 :            :  *
     784                 :            :  *      The task_lock() exception
     785                 :            :  *
     786                 :            :  * The need for this exception arises from the action of
     787                 :            :  * cgroup_attach_task(), which overwrites one task's cgroup pointer with
     788                 :            :  * another.  It does so using cgroup_mutex, however there are
     789                 :            :  * several performance critical places that need to reference
     790                 :            :  * task->cgroup without the expense of grabbing a system global
     791                 :            :  * mutex.  Therefore except as noted below, when dereferencing or, as
     792                 :            :  * in cgroup_attach_task(), modifying a task's cgroup pointer we use
     793                 :            :  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
     794                 :            :  * the task_struct routinely used for such matters.
     795                 :            :  *
     796                 :            :  * P.S.  One more locking exception.  RCU is used to guard the
     797                 :            :  * update of a tasks cgroup pointer by cgroup_attach_task()
     798                 :            :  */
     799                 :            : 
     800                 :            : /*
     801                 :            :  * A couple of forward declarations required, due to cyclic reference loop:
     802                 :            :  * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
     803                 :            :  * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
     804                 :            :  * -> cgroup_mkdir.
     805                 :            :  */
     806                 :            : 
     807                 :            : static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
     808                 :            : static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
     809                 :            : static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
     810                 :            : static const struct inode_operations cgroup_dir_inode_operations;
     811                 :            : static const struct file_operations proc_cgroupstats_operations;
     812                 :            : 
     813                 :            : static struct backing_dev_info cgroup_backing_dev_info = {
     814                 :            :         .name           = "cgroup",
     815                 :            :         .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
     816                 :            : };
     817                 :            : 
     818                 :          0 : static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
     819                 :            : {
     820                 :         36 :         struct inode *inode = new_inode(sb);
     821                 :            : 
     822         [ +  - ]:         36 :         if (inode) {
     823                 :         36 :                 inode->i_ino = get_next_ino();
     824                 :         36 :                 inode->i_mode = mode;
     825                 :         36 :                 inode->i_uid = current_fsuid();
     826                 :         36 :                 inode->i_gid = current_fsgid();
     827                 :         36 :                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
     828                 :         36 :                 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
     829                 :            :         }
     830                 :          0 :         return inode;
     831                 :            : }
     832                 :            : 
     833                 :          0 : static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
     834                 :            : {
     835                 :            :         struct cgroup_name *name;
     836                 :            : 
     837                 :          2 :         name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
     838         [ +  - ]:          2 :         if (!name)
     839                 :            :                 return NULL;
     840                 :          2 :         strcpy(name->name, dentry->d_name.name);
     841                 :            :         return name;
     842                 :            : }
     843                 :            : 
     844                 :          0 : static void cgroup_free_fn(struct work_struct *work)
     845                 :            : {
     846                 :          2 :         struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
     847                 :            : 
     848                 :          2 :         mutex_lock(&cgroup_mutex);
     849                 :          2 :         cgrp->root->number_of_cgroups--;
     850                 :          2 :         mutex_unlock(&cgroup_mutex);
     851                 :            : 
     852                 :            :         /*
     853                 :            :          * We get a ref to the parent's dentry, and put the ref when
     854                 :            :          * this cgroup is being freed, so it's guaranteed that the
     855                 :            :          * parent won't be destroyed before its children.
     856                 :            :          */
     857                 :          2 :         dput(cgrp->parent->dentry);
     858                 :            : 
     859                 :            :         /*
     860                 :            :          * Drop the active superblock reference that we took when we
     861                 :            :          * created the cgroup. This will free cgrp->root, if we are
     862                 :            :          * holding the last reference to @sb.
     863                 :            :          */
     864                 :          2 :         deactivate_super(cgrp->root->sb);
     865                 :            : 
     866                 :            :         /*
     867                 :            :          * if we're getting rid of the cgroup, refcount should ensure
     868                 :            :          * that there are no pidlists left.
     869                 :            :          */
     870         [ -  + ]:          4 :         BUG_ON(!list_empty(&cgrp->pidlists));
     871                 :            : 
     872                 :          2 :         simple_xattrs_free(&cgrp->xattrs);
     873                 :            : 
     874                 :          2 :         kfree(rcu_dereference_raw(cgrp->name));
     875                 :          2 :         kfree(cgrp);
     876                 :          2 : }
     877                 :            : 
     878                 :          0 : static void cgroup_free_rcu(struct rcu_head *head)
     879                 :            : {
     880                 :            :         struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
     881                 :            : 
     882                 :          4 :         INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
     883                 :          2 :         queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
     884                 :          2 : }
     885                 :            : 
     886                 :          0 : static void cgroup_diput(struct dentry *dentry, struct inode *inode)
     887                 :            : {
     888                 :            :         /* is dentry a directory ? if so, kfree() associated cgroup */
     889         [ +  + ]:         33 :         if (S_ISDIR(inode->i_mode)) {
     890                 :          2 :                 struct cgroup *cgrp = dentry->d_fsdata;
     891                 :            : 
     892         [ -  + ]:          2 :                 BUG_ON(!(cgroup_is_dead(cgrp)));
     893                 :            : 
     894                 :            :                 /*
     895                 :            :                  * XXX: cgrp->id is only used to look up css's.  As cgroup
     896                 :            :                  * and css's lifetimes will be decoupled, it should be made
     897                 :            :                  * per-subsystem and moved to css->id so that lookups are
     898                 :            :                  * successful until the target css is released.
     899                 :            :                  */
     900                 :          2 :                 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
     901                 :          2 :                 cgrp->id = -1;
     902                 :            : 
     903                 :          2 :                 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
     904                 :            :         } else {
     905                 :            :                 struct cfent *cfe = __d_cfe(dentry);
     906                 :         31 :                 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
     907                 :            : 
     908 [ +  + ][ +  - ]:         31 :                 WARN_ONCE(!list_empty(&cfe->node) &&
         [ -  + ][ #  # ]
                 [ -  - ]
     909                 :            :                           cgrp != &cgrp->root->top_cgroup,
     910                 :            :                           "cfe still linked for %s\n", cfe->type->name);
     911                 :         31 :                 simple_xattrs_free(&cfe->xattrs);
     912                 :         31 :                 kfree(cfe);
     913                 :            :         }
     914                 :         33 :         iput(inode);
     915                 :         33 : }
     916                 :            : 
     917                 :          0 : static void remove_dir(struct dentry *d)
     918                 :            : {
     919                 :          2 :         struct dentry *parent = dget(d->d_parent);
     920                 :            : 
     921                 :          2 :         d_delete(d);
     922                 :          2 :         simple_rmdir(parent->d_inode, d);
     923                 :          2 :         dput(parent);
     924                 :          2 : }
     925                 :            : 
     926                 :          0 : static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
     927                 :            : {
     928                 :            :         struct cfent *cfe;
     929                 :            : 
     930                 :            :         lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
     931                 :            :         lockdep_assert_held(&cgroup_mutex);
     932                 :            : 
     933                 :            :         /*
     934                 :            :          * If we're doing cleanup due to failure of cgroup_create(),
     935                 :            :          * the corresponding @cfe may not exist.
     936                 :            :          */
     937         [ +  - ]:         10 :         list_for_each_entry(cfe, &cgrp->files, node) {
     938                 :         10 :                 struct dentry *d = cfe->dentry;
     939                 :            : 
     940 [ +  - ][ -  + ]:         10 :                 if (cft && cfe->type != cft)
     941                 :          0 :                         continue;
     942                 :            : 
     943                 :            :                 dget(d);
     944                 :         10 :                 d_delete(d);
     945                 :         10 :                 simple_unlink(cgrp->dentry->d_inode, d);
     946                 :            :                 list_del_init(&cfe->node);
     947                 :         10 :                 dput(d);
     948                 :            : 
     949                 :         10 :                 break;
     950                 :            :         }
     951                 :         10 : }
     952                 :            : 
     953                 :            : /**
     954                 :            :  * cgroup_clear_dir - remove subsys files in a cgroup directory
     955                 :            :  * @cgrp: target cgroup
     956                 :            :  * @subsys_mask: mask of the subsystem ids whose files should be removed
     957                 :            :  */
     958                 :            : static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
     959                 :            : {
     960                 :            :         struct cgroup_subsys *ss;
     961                 :            :         int i;
     962                 :            : 
     963                 :            :         for_each_subsys(ss, i) {
     964                 :            :                 struct cftype_set *set;
     965                 :            : 
     966                 :            :                 if (!test_bit(i, &subsys_mask))
     967                 :            :                         continue;
     968                 :            :                 list_for_each_entry(set, &ss->cftsets, node)
     969                 :            :                         cgroup_addrm_files(cgrp, set->cfts, false);
     970                 :            :         }
     971                 :            : }
     972                 :            : 
     973                 :            : /*
     974                 :            :  * NOTE : the dentry must have been dget()'ed
     975                 :            :  */
     976                 :          0 : static void cgroup_d_remove_dir(struct dentry *dentry)
     977                 :            : {
     978                 :            :         struct dentry *parent;
     979                 :            : 
     980                 :          2 :         parent = dentry->d_parent;
     981                 :            :         spin_lock(&parent->d_lock);
     982                 :          2 :         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
     983                 :          2 :         list_del_init(&dentry->d_u.d_child);
     984                 :            :         spin_unlock(&dentry->d_lock);
     985                 :            :         spin_unlock(&parent->d_lock);
     986                 :          2 :         remove_dir(dentry);
     987                 :          2 : }
     988                 :            : 
     989                 :            : /*
     990                 :            :  * Call with cgroup_mutex held. Drops reference counts on modules, including
     991                 :            :  * any duplicate ones that parse_cgroupfs_options took. If this function
     992                 :            :  * returns an error, no reference counts are touched.
     993                 :            :  */
     994                 :          0 : static int rebind_subsystems(struct cgroupfs_root *root,
     995                 :            :                              unsigned long added_mask, unsigned removed_mask)
     996                 :            : {
     997                 :            :         struct cgroup *cgrp = &root->top_cgroup;
     998                 :            :         struct cgroup_subsys *ss;
     999                 :            :         unsigned long pinned = 0;
    1000                 :            :         int i, ret;
    1001                 :            : 
    1002         [ -  + ]:          6 :         BUG_ON(!mutex_is_locked(&cgroup_mutex));
    1003         [ -  + ]:          6 :         BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
    1004                 :            : 
    1005                 :            :         /* Check that any added subsystems are currently free */
    1006                 :            :         for_each_subsys(ss, i) {
    1007                 :            :                 if (!(added_mask & (1 << i)))
    1008                 :            :                         continue;
    1009                 :            : 
    1010                 :            :                 /* is the subsystem mounted elsewhere? */
    1011                 :            :                 if (ss->root != &cgroup_dummy_root) {
    1012                 :            :                         ret = -EBUSY;
    1013                 :            :                         goto out_put;
    1014                 :            :                 }
    1015                 :            : 
    1016                 :            :                 /* pin the module */
    1017                 :            :                 if (!try_module_get(ss->module)) {
    1018                 :            :                         ret = -ENOENT;
    1019                 :            :                         goto out_put;
    1020                 :            :                 }
    1021                 :            :                 pinned |= 1 << i;
    1022                 :            :         }
    1023                 :            : 
    1024                 :            :         /* subsys could be missing if unloaded between parsing and here */
    1025         [ +  - ]:          6 :         if (added_mask != pinned) {
    1026                 :            :                 ret = -ENOENT;
    1027                 :            :                 goto out_put;
    1028                 :            :         }
    1029                 :            : 
    1030                 :            :         ret = cgroup_populate_dir(cgrp, added_mask);
    1031                 :            :         if (ret)
    1032                 :            :                 goto out_put;
    1033                 :            : 
    1034                 :            :         /*
    1035                 :            :          * Nothing can fail from this point on.  Remove files for the
    1036                 :            :          * removed subsystems and rebind each subsystem.
    1037                 :            :          */
    1038                 :            :         cgroup_clear_dir(cgrp, removed_mask);
    1039                 :            : 
    1040                 :            :         for_each_subsys(ss, i) {
    1041                 :            :                 unsigned long bit = 1UL << i;
    1042                 :            : 
    1043                 :            :                 if (bit & added_mask) {
    1044                 :            :                         /* We're binding this subsystem to this hierarchy */
    1045                 :            :                         BUG_ON(cgroup_css(cgrp, ss));
    1046                 :            :                         BUG_ON(!cgroup_css(cgroup_dummy_top, ss));
    1047                 :            :                         BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top);
    1048                 :            : 
    1049                 :            :                         rcu_assign_pointer(cgrp->subsys[i],
    1050                 :            :                                            cgroup_css(cgroup_dummy_top, ss));
    1051                 :            :                         cgroup_css(cgrp, ss)->cgroup = cgrp;
    1052                 :            : 
    1053                 :            :                         list_move(&ss->sibling, &root->subsys_list);
    1054                 :            :                         ss->root = root;
    1055                 :            :                         if (ss->bind)
    1056                 :            :                                 ss->bind(cgroup_css(cgrp, ss));
    1057                 :            : 
    1058                 :            :                         /* refcount was already taken, and we're keeping it */
    1059                 :            :                         root->subsys_mask |= bit;
    1060                 :            :                 } else if (bit & removed_mask) {
    1061                 :            :                         /* We're removing this subsystem */
    1062                 :            :                         BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss));
    1063                 :            :                         BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp);
    1064                 :            : 
    1065                 :            :                         if (ss->bind)
    1066                 :            :                                 ss->bind(cgroup_css(cgroup_dummy_top, ss));
    1067                 :            : 
    1068                 :            :                         cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top;
    1069                 :            :                         RCU_INIT_POINTER(cgrp->subsys[i], NULL);
    1070                 :            : 
    1071                 :            :                         cgroup_subsys[i]->root = &cgroup_dummy_root;
    1072                 :            :                         list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
    1073                 :            : 
    1074                 :            :                         /* subsystem is now free - drop reference on module */
    1075                 :            :                         module_put(ss->module);
    1076                 :            :                         root->subsys_mask &= ~bit;
    1077                 :            :                 }
    1078                 :            :         }
    1079                 :            : 
    1080                 :            :         /*
    1081                 :            :          * Mark @root has finished binding subsystems.  @root->subsys_mask
    1082                 :            :          * now matches the bound subsystems.
    1083                 :            :          */
    1084                 :          6 :         root->flags |= CGRP_ROOT_SUBSYS_BOUND;
    1085                 :            : 
    1086                 :            :         return 0;
    1087                 :            : 
    1088                 :            : out_put:
    1089                 :            :         for_each_subsys(ss, i)
    1090                 :            :                 if (pinned & (1 << i))
    1091                 :            :                         module_put(ss->module);
    1092                 :            :         return ret;
    1093                 :            : }
    1094                 :            : 
    1095                 :          0 : static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
    1096                 :            : {
    1097                 :          0 :         struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
    1098                 :            :         struct cgroup_subsys *ss;
    1099                 :            : 
    1100                 :          0 :         mutex_lock(&cgroup_root_mutex);
    1101         [ #  # ]:          0 :         for_each_root_subsys(root, ss)
    1102                 :          0 :                 seq_printf(seq, ",%s", ss->name);
    1103         [ #  # ]:          0 :         if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
    1104                 :          0 :                 seq_puts(seq, ",sane_behavior");
    1105         [ #  # ]:          0 :         if (root->flags & CGRP_ROOT_NOPREFIX)
    1106                 :          0 :                 seq_puts(seq, ",noprefix");
    1107         [ #  # ]:          0 :         if (root->flags & CGRP_ROOT_XATTR)
    1108                 :          0 :                 seq_puts(seq, ",xattr");
    1109         [ #  # ]:          0 :         if (strlen(root->release_agent_path))
    1110                 :          0 :                 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
    1111         [ #  # ]:          0 :         if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
    1112                 :          0 :                 seq_puts(seq, ",clone_children");
    1113         [ #  # ]:          0 :         if (strlen(root->name))
    1114                 :          0 :                 seq_printf(seq, ",name=%s", root->name);
    1115                 :          0 :         mutex_unlock(&cgroup_root_mutex);
    1116                 :          0 :         return 0;
    1117                 :            : }
    1118                 :            : 
    1119                 :            : struct cgroup_sb_opts {
    1120                 :            :         unsigned long subsys_mask;
    1121                 :            :         unsigned long flags;
    1122                 :            :         char *release_agent;
    1123                 :            :         bool cpuset_clone_children;
    1124                 :            :         char *name;
    1125                 :            :         /* User explicitly requested empty subsystem */
    1126                 :            :         bool none;
    1127                 :            : 
    1128                 :            :         struct cgroupfs_root *new_root;
    1129                 :            : 
    1130                 :            : };
    1131                 :            : 
    1132                 :            : /*
    1133                 :            :  * Convert a hierarchy specifier into a bitmask of subsystems and
    1134                 :            :  * flags. Call with cgroup_mutex held to protect the cgroup_subsys[]
    1135                 :            :  * array. This function takes refcounts on subsystems to be used, unless it
    1136                 :            :  * returns error, in which case no refcounts are taken.
    1137                 :            :  */
    1138                 :          0 : static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
    1139                 :            : {
    1140                 :       3193 :         char *token, *o = data;
    1141                 :            :         bool all_ss = false, one_ss = false;
    1142                 :            :         unsigned long mask = (unsigned long)-1;
    1143                 :            :         struct cgroup_subsys *ss;
    1144                 :            :         int i;
    1145                 :            : 
    1146         [ -  + ]:       3193 :         BUG_ON(!mutex_is_locked(&cgroup_mutex));
    1147                 :            : 
    1148                 :            : #ifdef CONFIG_CPUSETS
    1149                 :            :         mask = ~(1UL << cpuset_subsys_id);
    1150                 :            : #endif
    1151                 :            : 
    1152                 :       3193 :         memset(opts, 0, sizeof(*opts));
    1153                 :            : 
    1154         [ +  + ]:       3199 :         while ((token = strsep(&o, ",")) != NULL) {
    1155            [ + ]:          6 :                 if (!*token)
    1156                 :            :                         return -EINVAL;
    1157         [ +  + ]:       3199 :                 if (!strcmp(token, "none")) {
    1158                 :            :                         /* Explicitly have no subsystems */
    1159                 :          3 :                         opts->none = true;
    1160                 :          3 :                         continue;
    1161                 :            :                 }
    1162         [ -  + ]:       3196 :                 if (!strcmp(token, "all")) {
    1163                 :            :                         /* Mutually exclusive option 'all' + subsystem name */
    1164                 :            :                         if (one_ss)
    1165                 :            :                                 return -EINVAL;
    1166                 :            :                         all_ss = true;
    1167                 :          0 :                         continue;
    1168                 :            :                 }
    1169         [ -  + ]:          3 :                 if (!strcmp(token, "__DEVEL__sane_behavior")) {
    1170                 :          0 :                         opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
    1171                 :          0 :                         continue;
    1172                 :            :                 }
    1173         [ -  + ]:          3 :                 if (!strcmp(token, "noprefix")) {
    1174                 :          0 :                         opts->flags |= CGRP_ROOT_NOPREFIX;
    1175                 :          0 :                         continue;
    1176                 :            :                 }
    1177         [ -  + ]:          3 :                 if (!strcmp(token, "clone_children")) {
    1178                 :          0 :                         opts->cpuset_clone_children = true;
    1179                 :          0 :                         continue;
    1180                 :            :                 }
    1181         [ -  + ]:          3 :                 if (!strcmp(token, "xattr")) {
    1182                 :          0 :                         opts->flags |= CGRP_ROOT_XATTR;
    1183                 :          0 :                         continue;
    1184                 :            :                 }
    1185         [ -  + ]:          3 :                 if (!strncmp(token, "release_agent=", 14)) {
    1186                 :            :                         /* Specifying two release agents is forbidden */
    1187         [ #  # ]:          0 :                         if (opts->release_agent)
    1188                 :            :                                 return -EINVAL;
    1189                 :          0 :                         opts->release_agent =
    1190                 :          0 :                                 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
    1191         [ #  # ]:          0 :                         if (!opts->release_agent)
    1192                 :            :                                 return -ENOMEM;
    1193                 :          0 :                         continue;
    1194                 :            :                 }
    1195         [ +  - ]:          3 :                 if (!strncmp(token, "name=", 5)) {
    1196                 :          3 :                         const char *name = token + 5;
    1197                 :            :                         /* Can't specify an empty name */
    1198         [ +  - ]:          3 :                         if (!strlen(name))
    1199                 :            :                                 return -EINVAL;
    1200                 :            :                         /* Must match [\w.-]+ */
    1201         [ +  + ]:         12 :                         for (i = 0; i < strlen(name); i++) {
    1202                 :          9 :                                 char c = name[i];
    1203         [ +  - ]:          9 :                                 if (isalnum(c))
    1204                 :          9 :                                         continue;
    1205         [ #  # ]:          0 :                                 if ((c == '.') || (c == '-') || (c == '_'))
    1206                 :          0 :                                         continue;
    1207                 :            :                                 return -EINVAL;
    1208                 :            :                         }
    1209                 :            :                         /* Specifying two names is forbidden */
    1210         [ +  - ]:          3 :                         if (opts->name)
    1211                 :            :                                 return -EINVAL;
    1212                 :          3 :                         opts->name = kstrndup(name,
    1213                 :            :                                               MAX_CGROUP_ROOT_NAMELEN - 1,
    1214                 :            :                                               GFP_KERNEL);
    1215         [ +  - ]:          3 :                         if (!opts->name)
    1216                 :            :                                 return -ENOMEM;
    1217                 :            : 
    1218                 :          6 :                         continue;
    1219                 :            :                 }
    1220                 :            : 
    1221                 :            :                 for_each_subsys(ss, i) {
    1222                 :            :                         if (strcmp(token, ss->name))
    1223                 :            :                                 continue;
    1224                 :            :                         if (ss->disabled)
    1225                 :            :                                 continue;
    1226                 :            : 
    1227                 :            :                         /* Mutually exclusive option 'all' + subsystem name */
    1228                 :            :                         if (all_ss)
    1229                 :            :                                 return -EINVAL;
    1230                 :            :                         set_bit(i, &opts->subsys_mask);
    1231                 :            :                         one_ss = true;
    1232                 :            : 
    1233                 :            :                         break;
    1234                 :            :                 }
    1235                 :            :                 if (i == CGROUP_SUBSYS_COUNT)
    1236                 :            :                         return -ENOENT;
    1237                 :            :         }
    1238                 :            : 
    1239                 :            :         /*
    1240                 :            :          * If the 'all' option was specified select all the subsystems,
    1241                 :            :          * otherwise if 'none', 'name=' and a subsystem name options
    1242                 :            :          * were not specified, let's default to 'all'
    1243                 :            :          */
    1244                 :            :         if (all_ss || (!one_ss && !opts->none && !opts->name))
    1245                 :            :                 for_each_subsys(ss, i)
    1246                 :            :                         if (!ss->disabled)
    1247                 :            :                                 set_bit(i, &opts->subsys_mask);
    1248                 :            : 
    1249                 :            :         /* Consistency checks */
    1250                 :            : 
    1251         [ -  + ]:       3193 :         if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
    1252                 :          0 :                 pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
    1253                 :            : 
    1254         [ #  # ]:          0 :                 if (opts->flags & CGRP_ROOT_NOPREFIX) {
    1255                 :          0 :                         pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
    1256                 :          0 :                         return -EINVAL;
    1257                 :            :                 }
    1258                 :            : 
    1259         [ #  # ]:          0 :                 if (opts->cpuset_clone_children) {
    1260                 :          0 :                         pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
    1261                 :          0 :                         return -EINVAL;
    1262                 :            :                 }
    1263                 :            :         }
    1264                 :            : 
    1265                 :            :         /*
    1266                 :            :          * Option noprefix was introduced just for backward compatibility
    1267                 :            :          * with the old cpuset, so we allow noprefix only if mounting just
    1268                 :            :          * the cpuset subsystem.
    1269                 :            :          */
    1270 [ -  + ][ #  # ]:       3193 :         if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
    1271                 :            :                 return -EINVAL;
    1272                 :            : 
    1273                 :            : 
    1274                 :            :         /* Can't specify "none" and some subsystems */
    1275 [ -  + ][ #  # ]:       3193 :         if (opts->subsys_mask && opts->none)
    1276                 :            :                 return -EINVAL;
    1277                 :            : 
    1278                 :            :         /*
    1279                 :            :          * We either have to specify by name or by subsystems. (So all
    1280                 :            :          * empty hierarchies must have a name).
    1281                 :            :          */
    1282 [ +  - ][ +  + ]:       3193 :         if (!opts->subsys_mask && !opts->name)
    1283                 :            :                 return -EINVAL;
    1284                 :            : 
    1285                 :          3 :         return 0;
    1286                 :            : }
    1287                 :            : 
    1288                 :          0 : static int cgroup_remount(struct super_block *sb, int *flags, char *data)
    1289                 :            : {
    1290                 :            :         int ret = 0;
    1291                 :          0 :         struct cgroupfs_root *root = sb->s_fs_info;
    1292                 :            :         struct cgroup *cgrp = &root->top_cgroup;
    1293                 :            :         struct cgroup_sb_opts opts;
    1294                 :            :         unsigned long added_mask, removed_mask;
    1295                 :            : 
    1296         [ #  # ]:          0 :         if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
    1297                 :          0 :                 pr_err("cgroup: sane_behavior: remount is not allowed\n");
    1298                 :          0 :                 return -EINVAL;
    1299                 :            :         }
    1300                 :            : 
    1301                 :          0 :         mutex_lock(&cgrp->dentry->d_inode->i_mutex);
    1302                 :          0 :         mutex_lock(&cgroup_mutex);
    1303                 :          0 :         mutex_lock(&cgroup_root_mutex);
    1304                 :            : 
    1305                 :            :         /* See what subsystems are wanted */
    1306                 :          0 :         ret = parse_cgroupfs_options(data, &opts);
    1307         [ #  # ]:          0 :         if (ret)
    1308                 :            :                 goto out_unlock;
    1309                 :            : 
    1310 [ #  # ][ #  # ]:          0 :         if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
    1311                 :          0 :                 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
    1312                 :            :                            task_tgid_nr(current), current->comm);
    1313                 :            : 
    1314                 :          0 :         added_mask = opts.subsys_mask & ~root->subsys_mask;
    1315                 :            :         removed_mask = root->subsys_mask & ~opts.subsys_mask;
    1316                 :            : 
    1317                 :            :         /* Don't allow flags or name to change at remount */
    1318 [ #  # ][ #  # ]:          0 :         if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
    1319         [ #  # ]:          0 :             (opts.name && strcmp(opts.name, root->name))) {
    1320         [ #  # ]:          0 :                 pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
    1321                 :            :                        opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
    1322                 :            :                        root->flags & CGRP_ROOT_OPTION_MASK, root->name);
    1323                 :            :                 ret = -EINVAL;
    1324                 :          0 :                 goto out_unlock;
    1325                 :            :         }
    1326                 :            : 
    1327                 :            :         /* remounting is not allowed for populated hierarchies */
    1328         [ #  # ]:          0 :         if (root->number_of_cgroups > 1) {
    1329                 :            :                 ret = -EBUSY;
    1330                 :            :                 goto out_unlock;
    1331                 :            :         }
    1332                 :            : 
    1333                 :          0 :         ret = rebind_subsystems(root, added_mask, removed_mask);
    1334         [ #  # ]:          0 :         if (ret)
    1335                 :            :                 goto out_unlock;
    1336                 :            : 
    1337         [ #  # ]:          0 :         if (opts.release_agent)
    1338                 :          0 :                 strcpy(root->release_agent_path, opts.release_agent);
    1339                 :            :  out_unlock:
    1340                 :          0 :         kfree(opts.release_agent);
    1341                 :          0 :         kfree(opts.name);
    1342                 :          0 :         mutex_unlock(&cgroup_root_mutex);
    1343                 :          0 :         mutex_unlock(&cgroup_mutex);
    1344                 :          0 :         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
    1345                 :          0 :         return ret;
    1346                 :            : }
    1347                 :            : 
    1348                 :            : static const struct super_operations cgroup_ops = {
    1349                 :            :         .statfs = simple_statfs,
    1350                 :            :         .drop_inode = generic_delete_inode,
    1351                 :            :         .show_options = cgroup_show_options,
    1352                 :            :         .remount_fs = cgroup_remount,
    1353                 :            : };
    1354                 :            : 
    1355                 :          0 : static void init_cgroup_housekeeping(struct cgroup *cgrp)
    1356                 :            : {
    1357                 :          5 :         INIT_LIST_HEAD(&cgrp->sibling);
    1358                 :          5 :         INIT_LIST_HEAD(&cgrp->children);
    1359                 :          5 :         INIT_LIST_HEAD(&cgrp->files);
    1360                 :          5 :         INIT_LIST_HEAD(&cgrp->cset_links);
    1361                 :          5 :         INIT_LIST_HEAD(&cgrp->release_list);
    1362                 :          5 :         INIT_LIST_HEAD(&cgrp->pidlists);
    1363                 :          5 :         mutex_init(&cgrp->pidlist_mutex);
    1364                 :          5 :         cgrp->dummy_css.cgroup = cgrp;
    1365                 :          5 :         INIT_LIST_HEAD(&cgrp->event_list);
    1366                 :          5 :         spin_lock_init(&cgrp->event_list_lock);
    1367                 :            :         simple_xattrs_init(&cgrp->xattrs);
    1368                 :          5 : }
    1369                 :            : 
    1370                 :          0 : static void init_cgroup_root(struct cgroupfs_root *root)
    1371                 :            : {
    1372                 :          3 :         struct cgroup *cgrp = &root->top_cgroup;
    1373                 :            : 
    1374                 :          3 :         INIT_LIST_HEAD(&root->subsys_list);
    1375                 :          3 :         INIT_LIST_HEAD(&root->root_list);
    1376                 :          3 :         root->number_of_cgroups = 1;
    1377                 :          3 :         cgrp->root = root;
    1378                 :          3 :         RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
    1379                 :          3 :         init_cgroup_housekeeping(cgrp);
    1380                 :          3 :         idr_init(&root->cgroup_idr);
    1381                 :          3 : }
    1382                 :            : 
    1383                 :            : static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
    1384                 :            : {
    1385                 :            :         int id;
    1386                 :            : 
    1387                 :            :         lockdep_assert_held(&cgroup_mutex);
    1388                 :            :         lockdep_assert_held(&cgroup_root_mutex);
    1389                 :            : 
    1390                 :          3 :         id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, start, end,
    1391                 :            :                               GFP_KERNEL);
    1392   [ #  #  +  - ]:          3 :         if (id < 0)
    1393                 :            :                 return id;
    1394                 :            : 
    1395                 :          3 :         root->hierarchy_id = id;
    1396                 :            :         return 0;
    1397                 :            : }
    1398                 :            : 
    1399                 :            : static void cgroup_exit_root_id(struct cgroupfs_root *root)
    1400                 :            : {
    1401                 :            :         lockdep_assert_held(&cgroup_mutex);
    1402                 :            :         lockdep_assert_held(&cgroup_root_mutex);
    1403                 :            : 
    1404 [ +  - ][ #  # ]:          3 :         if (root->hierarchy_id) {
    1405                 :          3 :                 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
    1406                 :          3 :                 root->hierarchy_id = 0;
    1407                 :            :         }
    1408                 :            : }
    1409                 :            : 
    1410                 :          0 : static int cgroup_test_super(struct super_block *sb, void *data)
    1411                 :            : {
    1412                 :            :         struct cgroup_sb_opts *opts = data;
    1413                 :          0 :         struct cgroupfs_root *root = sb->s_fs_info;
    1414                 :            : 
    1415                 :            :         /* If we asked for a name then it must match */
    1416 [ #  # ][ #  # ]:          0 :         if (opts->name && strcmp(opts->name, root->name))
    1417                 :            :                 return 0;
    1418                 :            : 
    1419                 :            :         /*
    1420                 :            :          * If we asked for subsystems (or explicitly for no
    1421                 :            :          * subsystems) then they must match
    1422                 :            :          */
    1423 [ #  # ][ #  # ]:          0 :         if ((opts->subsys_mask || opts->none)
    1424         [ #  # ]:          0 :             && (opts->subsys_mask != root->subsys_mask))
    1425                 :            :                 return 0;
    1426                 :            : 
    1427                 :          0 :         return 1;
    1428                 :            : }
    1429                 :            : 
    1430                 :          0 : static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
    1431                 :            : {
    1432                 :            :         struct cgroupfs_root *root;
    1433                 :            : 
    1434 [ +  - ][ +  - ]:          3 :         if (!opts->subsys_mask && !opts->none)
    1435                 :            :                 return NULL;
    1436                 :            : 
    1437                 :            :         root = kzalloc(sizeof(*root), GFP_KERNEL);
    1438         [ +  - ]:          3 :         if (!root)
    1439                 :            :                 return ERR_PTR(-ENOMEM);
    1440                 :            : 
    1441                 :          3 :         init_cgroup_root(root);
    1442                 :            : 
    1443                 :            :         /*
    1444                 :            :          * We need to set @root->subsys_mask now so that @root can be
    1445                 :            :          * matched by cgroup_test_super() before it finishes
    1446                 :            :          * initialization; otherwise, competing mounts with the same
    1447                 :            :          * options may try to bind the same subsystems instead of waiting
    1448                 :            :          * for the first one leading to unexpected mount errors.
    1449                 :            :          * SUBSYS_BOUND will be set once actual binding is complete.
    1450                 :            :          */
    1451                 :          3 :         root->subsys_mask = opts->subsys_mask;
    1452                 :          3 :         root->flags = opts->flags;
    1453         [ -  + ]:          3 :         if (opts->release_agent)
    1454                 :          0 :                 strcpy(root->release_agent_path, opts->release_agent);
    1455         [ +  - ]:          3 :         if (opts->name)
    1456                 :          3 :                 strcpy(root->name, opts->name);
    1457         [ #  # ]:          3 :         if (opts->cpuset_clone_children)
    1458                 :          0 :                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
    1459                 :          3 :         return root;
    1460                 :            : }
    1461                 :            : 
    1462                 :          0 : static void cgroup_free_root(struct cgroupfs_root *root)
    1463                 :            : {
    1464         [ +  - ]:          3 :         if (root) {
    1465                 :            :                 /* hierarhcy ID shoulid already have been released */
    1466 [ -  + ][ #  # ]:          3 :                 WARN_ON_ONCE(root->hierarchy_id);
                 [ #  # ]
    1467                 :            : 
    1468                 :          3 :                 idr_destroy(&root->cgroup_idr);
    1469                 :          3 :                 kfree(root);
    1470                 :            :         }
    1471                 :          3 : }
    1472                 :            : 
    1473                 :          0 : static int cgroup_set_super(struct super_block *sb, void *data)
    1474                 :            : {
    1475                 :            :         int ret;
    1476                 :            :         struct cgroup_sb_opts *opts = data;
    1477                 :            : 
    1478                 :            :         /* If we don't have a new root, we can't set up a new sb */
    1479         [ +  - ]:          3 :         if (!opts->new_root)
    1480                 :            :                 return -EINVAL;
    1481                 :            : 
    1482 [ +  - ][ -  + ]:          3 :         BUG_ON(!opts->subsys_mask && !opts->none);
    1483                 :            : 
    1484                 :          3 :         ret = set_anon_super(sb, NULL);
    1485         [ +  - ]:          3 :         if (ret)
    1486                 :            :                 return ret;
    1487                 :            : 
    1488                 :          3 :         sb->s_fs_info = opts->new_root;
    1489                 :          3 :         opts->new_root->sb = sb;
    1490                 :            : 
    1491                 :          3 :         sb->s_blocksize = PAGE_CACHE_SIZE;
    1492                 :          3 :         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
    1493                 :          3 :         sb->s_magic = CGROUP_SUPER_MAGIC;
    1494                 :          3 :         sb->s_op = &cgroup_ops;
    1495                 :            : 
    1496                 :          3 :         return 0;
    1497                 :            : }
    1498                 :            : 
    1499                 :          0 : static int cgroup_get_rootdir(struct super_block *sb)
    1500                 :            : {
    1501                 :            :         static const struct dentry_operations cgroup_dops = {
    1502                 :            :                 .d_iput = cgroup_diput,
    1503                 :            :                 .d_delete = always_delete_dentry,
    1504                 :            :         };
    1505                 :            : 
    1506                 :          3 :         struct inode *inode =
    1507                 :            :                 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
    1508                 :            : 
    1509         [ +  - ]:          3 :         if (!inode)
    1510                 :            :                 return -ENOMEM;
    1511                 :            : 
    1512                 :          3 :         inode->i_fop = &simple_dir_operations;
    1513                 :          3 :         inode->i_op = &cgroup_dir_inode_operations;
    1514                 :            :         /* directories start off with i_nlink == 2 (for "." entry) */
    1515                 :          3 :         inc_nlink(inode);
    1516                 :          3 :         sb->s_root = d_make_root(inode);
    1517         [ +  - ]:          3 :         if (!sb->s_root)
    1518                 :            :                 return -ENOMEM;
    1519                 :            :         /* for everything else we want ->d_op set */
    1520                 :          3 :         sb->s_d_op = &cgroup_dops;
    1521                 :          3 :         return 0;
    1522                 :            : }
    1523                 :            : 
    1524                 :          0 : static struct dentry *cgroup_mount(struct file_system_type *fs_type,
    1525                 :            :                          int flags, const char *unused_dev_name,
    1526                 :            :                          void *data)
    1527                 :            : {
    1528                 :            :         struct cgroup_sb_opts opts;
    1529                 :            :         struct cgroupfs_root *root;
    1530                 :            :         int ret = 0;
    1531                 :            :         struct super_block *sb;
    1532                 :            :         struct cgroupfs_root *new_root;
    1533                 :            :         struct list_head tmp_links;
    1534                 :            :         struct inode *inode;
    1535                 :            :         const struct cred *cred;
    1536                 :            : 
    1537                 :            :         /* First find the desired set of subsystems */
    1538                 :       3193 :         mutex_lock(&cgroup_mutex);
    1539                 :       3193 :         ret = parse_cgroupfs_options(data, &opts);
    1540                 :       3193 :         mutex_unlock(&cgroup_mutex);
    1541         [ +  + ]:       3193 :         if (ret)
    1542                 :            :                 goto out_err;
    1543                 :            : 
    1544                 :            :         /*
    1545                 :            :          * Allocate a new cgroup root. We may not need it if we're
    1546                 :            :          * reusing an existing hierarchy.
    1547                 :            :          */
    1548                 :          3 :         new_root = cgroup_root_from_opts(&opts);
    1549         [ -  + ]:          3 :         if (IS_ERR(new_root)) {
    1550                 :            :                 ret = PTR_ERR(new_root);
    1551                 :          0 :                 goto out_err;
    1552                 :            :         }
    1553                 :          3 :         opts.new_root = new_root;
    1554                 :            : 
    1555                 :            :         /* Locate an existing or new sb for this hierarchy */
    1556                 :          3 :         sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
    1557         [ -  + ]:          3 :         if (IS_ERR(sb)) {
    1558                 :            :                 ret = PTR_ERR(sb);
    1559                 :          0 :                 cgroup_free_root(opts.new_root);
    1560                 :          0 :                 goto out_err;
    1561                 :            :         }
    1562                 :            : 
    1563                 :          3 :         root = sb->s_fs_info;
    1564         [ -  + ]:          3 :         BUG_ON(!root);
    1565         [ +  - ]:          3 :         if (root == opts.new_root) {
    1566                 :            :                 /* We used the new root structure, so this is a new hierarchy */
    1567                 :          3 :                 struct cgroup *root_cgrp = &root->top_cgroup;
    1568                 :            :                 struct cgroupfs_root *existing_root;
    1569                 :            :                 int i;
    1570                 :            :                 struct css_set *cset;
    1571                 :            : 
    1572         [ -  + ]:          3 :                 BUG_ON(sb->s_root != NULL);
    1573                 :            : 
    1574                 :          3 :                 ret = cgroup_get_rootdir(sb);
    1575         [ +  - ]:          3 :                 if (ret)
    1576                 :            :                         goto drop_new_super;
    1577                 :          3 :                 inode = sb->s_root->d_inode;
    1578                 :            : 
    1579                 :          3 :                 mutex_lock(&inode->i_mutex);
    1580                 :          3 :                 mutex_lock(&cgroup_mutex);
    1581                 :          3 :                 mutex_lock(&cgroup_root_mutex);
    1582                 :            : 
    1583                 :          3 :                 root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp,
    1584                 :            :                                            0, 1, GFP_KERNEL);
    1585         [ +  - ]:          3 :                 if (root_cgrp->id < 0)
    1586                 :            :                         goto unlock_drop;
    1587                 :            : 
    1588                 :            :                 /* Check for name clashes with existing mounts */
    1589                 :            :                 ret = -EBUSY;
    1590         [ +  - ]:          3 :                 if (strlen(root->name))
    1591         [ -  + ]:          3 :                         for_each_active_root(existing_root)
    1592         [ #  # ]:          0 :                                 if (!strcmp(existing_root->name, root->name))
    1593                 :            :                                         goto unlock_drop;
    1594                 :            : 
    1595                 :            :                 /*
    1596                 :            :                  * We're accessing css_set_count without locking
    1597                 :            :                  * css_set_lock here, but that's OK - it can only be
    1598                 :            :                  * increased by someone holding cgroup_lock, and
    1599                 :            :                  * that's us. The worst that can happen is that we
    1600                 :            :                  * have some link structures left over
    1601                 :            :                  */
    1602                 :          3 :                 ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
    1603         [ +  - ]:          3 :                 if (ret)
    1604                 :            :                         goto unlock_drop;
    1605                 :            : 
    1606                 :            :                 /* ID 0 is reserved for dummy root, 1 for unified hierarchy */
    1607                 :            :                 ret = cgroup_init_root_id(root, 2, 0);
    1608         [ +  - ]:          3 :                 if (ret)
    1609                 :            :                         goto unlock_drop;
    1610                 :            : 
    1611                 :          3 :                 sb->s_root->d_fsdata = root_cgrp;
    1612                 :          3 :                 root_cgrp->dentry = sb->s_root;
    1613                 :            : 
    1614                 :            :                 /*
    1615                 :            :                  * We're inside get_sb() and will call lookup_one_len() to
    1616                 :            :                  * create the root files, which doesn't work if SELinux is
    1617                 :            :                  * in use.  The following cred dancing somehow works around
    1618                 :            :                  * it.  See 2ce9738ba ("cgroupfs: use init_cred when
    1619                 :            :                  * populating new cgroupfs mount") for more details.
    1620                 :            :                  */
    1621                 :          3 :                 cred = override_creds(&init_cred);
    1622                 :            : 
    1623                 :          3 :                 ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
    1624         [ +  - ]:          3 :                 if (ret)
    1625                 :            :                         goto rm_base_files;
    1626                 :            : 
    1627                 :          3 :                 ret = rebind_subsystems(root, root->subsys_mask, 0);
    1628         [ +  - ]:          3 :                 if (ret)
    1629                 :            :                         goto rm_base_files;
    1630                 :            : 
    1631                 :          3 :                 revert_creds(cred);
    1632                 :            : 
    1633                 :            :                 /*
    1634                 :            :                  * There must be no failure case after here, since rebinding
    1635                 :            :                  * takes care of subsystems' refcounts, which are explicitly
    1636                 :            :                  * dropped in the failure exit path.
    1637                 :            :                  */
    1638                 :            : 
    1639                 :          3 :                 list_add(&root->root_list, &cgroup_roots);
    1640                 :          3 :                 cgroup_root_count++;
    1641                 :            : 
    1642                 :            :                 /* Link the top cgroup in this hierarchy into all
    1643                 :            :                  * the css_set objects */
    1644                 :          3 :                 write_lock(&css_set_lock);
    1645 [ +  + ][ -  + ]:       3583 :                 hash_for_each(css_set_table, i, cset, hlist)
         [ +  + ][ +  + ]
    1646                 :          3 :                         link_css_set(&tmp_links, cset, root_cgrp);
    1647                 :            :                 write_unlock(&css_set_lock);
    1648                 :            : 
    1649                 :          3 :                 free_cgrp_cset_links(&tmp_links);
    1650                 :            : 
    1651         [ -  + ]:          3 :                 BUG_ON(!list_empty(&root_cgrp->children));
    1652         [ -  + ]:          3 :                 BUG_ON(root->number_of_cgroups != 1);
    1653                 :            : 
    1654                 :          3 :                 mutex_unlock(&cgroup_root_mutex);
    1655                 :          3 :                 mutex_unlock(&cgroup_mutex);
    1656                 :          3 :                 mutex_unlock(&inode->i_mutex);
    1657                 :            :         } else {
    1658                 :            :                 /*
    1659                 :            :                  * We re-used an existing hierarchy - the new root (if
    1660                 :            :                  * any) is not needed
    1661                 :            :                  */
    1662                 :          0 :                 cgroup_free_root(opts.new_root);
    1663                 :            : 
    1664         [ #  # ]:          0 :                 if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
    1665         [ #  # ]:          0 :                         if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
    1666                 :          0 :                                 pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
    1667                 :            :                                 ret = -EINVAL;
    1668                 :          0 :                                 goto drop_new_super;
    1669                 :            :                         } else {
    1670                 :          0 :                                 pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
    1671                 :            :                         }
    1672                 :            :                 }
    1673                 :            :         }
    1674                 :            : 
    1675                 :          3 :         kfree(opts.release_agent);
    1676                 :          3 :         kfree(opts.name);
    1677                 :          3 :         return dget(sb->s_root);
    1678                 :            : 
    1679                 :            :  rm_base_files:
    1680                 :          0 :         free_cgrp_cset_links(&tmp_links);
    1681                 :          0 :         cgroup_addrm_files(&root->top_cgroup, cgroup_base_files, false);
    1682                 :          0 :         revert_creds(cred);
    1683                 :            :  unlock_drop:
    1684                 :            :         cgroup_exit_root_id(root);
    1685                 :          0 :         mutex_unlock(&cgroup_root_mutex);
    1686                 :          0 :         mutex_unlock(&cgroup_mutex);
    1687                 :          0 :         mutex_unlock(&inode->i_mutex);
    1688                 :            :  drop_new_super:
    1689                 :          0 :         deactivate_locked_super(sb);
    1690                 :            :  out_err:
    1691                 :       3190 :         kfree(opts.release_agent);
    1692                 :       3190 :         kfree(opts.name);
    1693                 :       3190 :         return ERR_PTR(ret);
    1694                 :            : }
    1695                 :            : 
    1696                 :          0 : static void cgroup_kill_sb(struct super_block *sb) {
    1697                 :          3 :         struct cgroupfs_root *root = sb->s_fs_info;
    1698                 :            :         struct cgroup *cgrp = &root->top_cgroup;
    1699                 :            :         struct cgrp_cset_link *link, *tmp_link;
    1700                 :            :         int ret;
    1701                 :            : 
    1702         [ -  + ]:          3 :         BUG_ON(!root);
    1703                 :            : 
    1704         [ -  + ]:          3 :         BUG_ON(root->number_of_cgroups != 1);
    1705         [ -  + ]:          3 :         BUG_ON(!list_empty(&cgrp->children));
    1706                 :            : 
    1707                 :          3 :         mutex_lock(&cgrp->dentry->d_inode->i_mutex);
    1708                 :          3 :         mutex_lock(&cgroup_mutex);
    1709                 :          3 :         mutex_lock(&cgroup_root_mutex);
    1710                 :            : 
    1711                 :            :         /* Rebind all subsystems back to the default hierarchy */
    1712         [ +  - ]:          3 :         if (root->flags & CGRP_ROOT_SUBSYS_BOUND) {
    1713                 :          3 :                 ret = rebind_subsystems(root, 0, root->subsys_mask);
    1714                 :            :                 /* Shouldn't be able to fail ... */
    1715         [ -  + ]:          3 :                 BUG_ON(ret);
    1716                 :            :         }
    1717                 :            : 
    1718                 :            :         /*
    1719                 :            :          * Release all the links from cset_links to this hierarchy's
    1720                 :            :          * root cgroup
    1721                 :            :          */
    1722                 :          3 :         write_lock(&css_set_lock);
    1723                 :            : 
    1724         [ +  + ]:          6 :         list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
    1725                 :            :                 list_del(&link->cset_link);
    1726                 :            :                 list_del(&link->cgrp_link);
    1727                 :          3 :                 kfree(link);
    1728                 :            :         }
    1729                 :            :         write_unlock(&css_set_lock);
    1730                 :            : 
    1731         [ +  - ]:          3 :         if (!list_empty(&root->root_list)) {
    1732                 :            :                 list_del(&root->root_list);
    1733                 :          3 :                 cgroup_root_count--;
    1734                 :            :         }
    1735                 :            : 
    1736                 :            :         cgroup_exit_root_id(root);
    1737                 :            : 
    1738                 :          3 :         mutex_unlock(&cgroup_root_mutex);
    1739                 :          3 :         mutex_unlock(&cgroup_mutex);
    1740                 :          3 :         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
    1741                 :            : 
    1742                 :          3 :         simple_xattrs_free(&cgrp->xattrs);
    1743                 :            : 
    1744                 :          3 :         kill_litter_super(sb);
    1745                 :          3 :         cgroup_free_root(root);
    1746                 :          3 : }
    1747                 :            : 
    1748                 :            : static struct file_system_type cgroup_fs_type = {
    1749                 :            :         .name = "cgroup",
    1750                 :            :         .mount = cgroup_mount,
    1751                 :            :         .kill_sb = cgroup_kill_sb,
    1752                 :            : };
    1753                 :            : 
    1754                 :            : static struct kobject *cgroup_kobj;
    1755                 :            : 
    1756                 :            : /**
    1757                 :            :  * cgroup_path - generate the path of a cgroup
    1758                 :            :  * @cgrp: the cgroup in question
    1759                 :            :  * @buf: the buffer to write the path into
    1760                 :            :  * @buflen: the length of the buffer
    1761                 :            :  *
    1762                 :            :  * Writes path of cgroup into buf.  Returns 0 on success, -errno on error.
    1763                 :            :  *
    1764                 :            :  * We can't generate cgroup path using dentry->d_name, as accessing
    1765                 :            :  * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
    1766                 :            :  * inode's i_mutex, while on the other hand cgroup_path() can be called
    1767                 :            :  * with some irq-safe spinlocks held.
    1768                 :            :  */
    1769                 :          0 : int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
    1770                 :            : {
    1771                 :            :         int ret = -ENAMETOOLONG;
    1772                 :            :         char *start;
    1773                 :            : 
    1774         [ #  # ]:          0 :         if (!cgrp->parent) {
    1775         [ #  # ]:          0 :                 if (strlcpy(buf, "/", buflen) >= buflen)
    1776                 :            :                         return -ENAMETOOLONG;
    1777                 :          0 :                 return 0;
    1778                 :            :         }
    1779                 :            : 
    1780                 :          0 :         start = buf + buflen - 1;
    1781                 :          0 :         *start = '\0';
    1782                 :            : 
    1783                 :            :         rcu_read_lock();
    1784                 :            :         do {
    1785                 :          0 :                 const char *name = cgroup_name(cgrp);
    1786                 :            :                 int len;
    1787                 :            : 
    1788                 :          0 :                 len = strlen(name);
    1789         [ #  # ]:          0 :                 if ((start -= len) < buf)
    1790                 :            :                         goto out;
    1791                 :          0 :                 memcpy(start, name, len);
    1792                 :            : 
    1793         [ #  # ]:          0 :                 if (--start < buf)
    1794                 :            :                         goto out;
    1795                 :          0 :                 *start = '/';
    1796                 :            : 
    1797                 :          0 :                 cgrp = cgrp->parent;
    1798         [ #  # ]:          0 :         } while (cgrp->parent);
    1799                 :            :         ret = 0;
    1800                 :          0 :         memmove(buf, start, buf + buflen - start);
    1801                 :            : out:
    1802                 :            :         rcu_read_unlock();
    1803                 :          0 :         return ret;
    1804                 :            : }
    1805                 :            : EXPORT_SYMBOL_GPL(cgroup_path);
    1806                 :            : 
    1807                 :            : /**
    1808                 :            :  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
    1809                 :            :  * @task: target task
    1810                 :            :  * @buf: the buffer to write the path into
    1811                 :            :  * @buflen: the length of the buffer
    1812                 :            :  *
    1813                 :            :  * Determine @task's cgroup on the first (the one with the lowest non-zero
    1814                 :            :  * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
    1815                 :            :  * function grabs cgroup_mutex and shouldn't be used inside locks used by
    1816                 :            :  * cgroup controller callbacks.
    1817                 :            :  *
    1818                 :            :  * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
    1819                 :            :  */
    1820                 :          0 : int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
    1821                 :            : {
    1822                 :            :         struct cgroupfs_root *root;
    1823                 :            :         struct cgroup *cgrp;
    1824                 :          0 :         int hierarchy_id = 1, ret = 0;
    1825                 :            : 
    1826         [ #  # ]:          0 :         if (buflen < 2)
    1827                 :            :                 return -ENAMETOOLONG;
    1828                 :            : 
    1829                 :          0 :         mutex_lock(&cgroup_mutex);
    1830                 :            : 
    1831                 :          0 :         root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
    1832                 :            : 
    1833         [ #  # ]:          0 :         if (root) {
    1834                 :          0 :                 cgrp = task_cgroup_from_root(task, root);
    1835                 :          0 :                 ret = cgroup_path(cgrp, buf, buflen);
    1836                 :            :         } else {
    1837                 :            :                 /* if no hierarchy exists, everyone is in "/" */
    1838                 :          0 :                 memcpy(buf, "/", 2);
    1839                 :            :         }
    1840                 :            : 
    1841                 :          0 :         mutex_unlock(&cgroup_mutex);
    1842                 :          0 :         return ret;
    1843                 :            : }
    1844                 :            : EXPORT_SYMBOL_GPL(task_cgroup_path);
    1845                 :            : 
    1846                 :            : /*
    1847                 :            :  * Control Group taskset
    1848                 :            :  */
    1849                 :            : struct task_and_cgroup {
    1850                 :            :         struct task_struct      *task;
    1851                 :            :         struct cgroup           *cgrp;
    1852                 :            :         struct css_set          *cset;
    1853                 :            : };
    1854                 :            : 
    1855                 :            : struct cgroup_taskset {
    1856                 :            :         struct task_and_cgroup  single;
    1857                 :            :         struct flex_array       *tc_array;
    1858                 :            :         int                     tc_array_len;
    1859                 :            :         int                     idx;
    1860                 :            :         struct cgroup           *cur_cgrp;
    1861                 :            : };
    1862                 :            : 
    1863                 :            : /**
    1864                 :            :  * cgroup_taskset_first - reset taskset and return the first task
    1865                 :            :  * @tset: taskset of interest
    1866                 :            :  *
    1867                 :            :  * @tset iteration is initialized and the first task is returned.
    1868                 :            :  */
    1869                 :          0 : struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
    1870                 :            : {
    1871         [ #  # ]:          0 :         if (tset->tc_array) {
    1872                 :          0 :                 tset->idx = 0;
    1873                 :          0 :                 return cgroup_taskset_next(tset);
    1874                 :            :         } else {
    1875                 :          0 :                 tset->cur_cgrp = tset->single.cgrp;
    1876                 :          0 :                 return tset->single.task;
    1877                 :            :         }
    1878                 :            : }
    1879                 :            : EXPORT_SYMBOL_GPL(cgroup_taskset_first);
    1880                 :            : 
    1881                 :            : /**
    1882                 :            :  * cgroup_taskset_next - iterate to the next task in taskset
    1883                 :            :  * @tset: taskset of interest
    1884                 :            :  *
    1885                 :            :  * Return the next task in @tset.  Iteration must have been initialized
    1886                 :            :  * with cgroup_taskset_first().
    1887                 :            :  */
    1888                 :          0 : struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
    1889                 :            : {
    1890                 :            :         struct task_and_cgroup *tc;
    1891                 :            : 
    1892 [ #  # ][ #  # ]:          0 :         if (!tset->tc_array || tset->idx >= tset->tc_array_len)
    1893                 :            :                 return NULL;
    1894                 :            : 
    1895                 :          0 :         tc = flex_array_get(tset->tc_array, tset->idx++);
    1896                 :          0 :         tset->cur_cgrp = tc->cgrp;
    1897                 :          0 :         return tc->task;
    1898                 :            : }
    1899                 :            : EXPORT_SYMBOL_GPL(cgroup_taskset_next);
    1900                 :            : 
    1901                 :            : /**
    1902                 :            :  * cgroup_taskset_cur_css - return the matching css for the current task
    1903                 :            :  * @tset: taskset of interest
    1904                 :            :  * @subsys_id: the ID of the target subsystem
    1905                 :            :  *
    1906                 :            :  * Return the css for the current (last returned) task of @tset for
    1907                 :            :  * subsystem specified by @subsys_id.  This function must be preceded by
    1908                 :            :  * either cgroup_taskset_first() or cgroup_taskset_next().
    1909                 :            :  */
    1910                 :          0 : struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
    1911                 :            :                                                    int subsys_id)
    1912                 :            : {
    1913                 :          0 :         return cgroup_css(tset->cur_cgrp, cgroup_subsys[subsys_id]);
    1914                 :            : }
    1915                 :            : EXPORT_SYMBOL_GPL(cgroup_taskset_cur_css);
    1916                 :            : 
    1917                 :            : /**
    1918                 :            :  * cgroup_taskset_size - return the number of tasks in taskset
    1919                 :            :  * @tset: taskset of interest
    1920                 :            :  */
    1921                 :          0 : int cgroup_taskset_size(struct cgroup_taskset *tset)
    1922                 :            : {
    1923         [ #  # ]:          0 :         return tset->tc_array ? tset->tc_array_len : 1;
    1924                 :            : }
    1925                 :            : EXPORT_SYMBOL_GPL(cgroup_taskset_size);
    1926                 :            : 
    1927                 :            : 
    1928                 :            : /*
    1929                 :            :  * cgroup_task_migrate - move a task from one cgroup to another.
    1930                 :            :  *
    1931                 :            :  * Must be called with cgroup_mutex and threadgroup locked.
    1932                 :            :  */
    1933                 :          0 : static void cgroup_task_migrate(struct cgroup *old_cgrp,
    1934                 :            :                                 struct task_struct *tsk,
    1935                 :            :                                 struct css_set *new_cset)
    1936                 :            : {
    1937                 :            :         struct css_set *old_cset;
    1938                 :            : 
    1939                 :            :         /*
    1940                 :            :          * We are synchronized through threadgroup_lock() against PF_EXITING
    1941                 :            :          * setting such that we can't race against cgroup_exit() changing the
    1942                 :            :          * css_set to init_css_set and dropping the old one.
    1943                 :            :          */
    1944 [ #  # ][ #  # ]:          0 :         WARN_ON_ONCE(tsk->flags & PF_EXITING);
                 [ #  # ]
    1945                 :            :         old_cset = task_css_set(tsk);
    1946                 :            : 
    1947                 :            :         task_lock(tsk);
    1948                 :          0 :         rcu_assign_pointer(tsk->cgroups, new_cset);
    1949                 :            :         task_unlock(tsk);
    1950                 :            : 
    1951                 :            :         /* Update the css_set linked lists if we're using them */
    1952                 :          0 :         write_lock(&css_set_lock);
    1953         [ #  # ]:          0 :         if (!list_empty(&tsk->cg_list))
    1954                 :          0 :                 list_move(&tsk->cg_list, &new_cset->tasks);
    1955                 :            :         write_unlock(&css_set_lock);
    1956                 :            : 
    1957                 :            :         /*
    1958                 :            :          * We just gained a reference on old_cset by taking it from the
    1959                 :            :          * task. As trading it for new_cset is protected by cgroup_mutex,
    1960                 :            :          * we're safe to drop it here; it will be freed under RCU.
    1961                 :            :          */
    1962                 :          0 :         set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
    1963                 :            :         put_css_set(old_cset);
    1964                 :          0 : }
    1965                 :            : 
    1966                 :            : /**
    1967                 :            :  * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
    1968                 :            :  * @cgrp: the cgroup to attach to
    1969                 :            :  * @tsk: the task or the leader of the threadgroup to be attached
    1970                 :            :  * @threadgroup: attach the whole threadgroup?
    1971                 :            :  *
    1972                 :            :  * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
    1973                 :            :  * task_lock of @tsk or each thread in the threadgroup individually in turn.
    1974                 :            :  */
    1975                 :          0 : static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
    1976                 :            :                               bool threadgroup)
    1977                 :            : {
    1978                 :            :         int retval, i, group_size;
    1979                 :            :         struct cgroup_subsys *ss, *failed_ss = NULL;
    1980                 :          0 :         struct cgroupfs_root *root = cgrp->root;
    1981                 :            :         /* threadgroup list cursor and array */
    1982                 :            :         struct task_struct *leader = tsk;
    1983                 :            :         struct task_and_cgroup *tc;
    1984                 :            :         struct flex_array *group;
    1985                 :          0 :         struct cgroup_taskset tset = { };
    1986                 :            : 
    1987                 :            :         /*
    1988                 :            :          * step 0: in order to do expensive, possibly blocking operations for
    1989                 :            :          * every thread, we cannot iterate the thread group list, since it needs
    1990                 :            :          * rcu or tasklist locked. instead, build an array of all threads in the
    1991                 :            :          * group - group_rwsem prevents new threads from appearing, and if
    1992                 :            :          * threads exit, this will just be an over-estimate.
    1993                 :            :          */
    1994         [ #  # ]:          0 :         if (threadgroup)
    1995                 :            :                 group_size = get_nr_threads(tsk);
    1996                 :            :         else
    1997                 :            :                 group_size = 1;
    1998                 :            :         /* flex_array supports very large thread-groups better than kmalloc. */
    1999                 :          0 :         group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
    2000         [ #  # ]:          0 :         if (!group)
    2001                 :            :                 return -ENOMEM;
    2002                 :            :         /* pre-allocate to guarantee space while iterating in rcu read-side. */
    2003                 :          0 :         retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
    2004         [ #  # ]:          0 :         if (retval)
    2005                 :            :                 goto out_free_group_list;
    2006                 :            : 
    2007                 :            :         i = 0;
    2008                 :            :         /*
    2009                 :            :          * Prevent freeing of tasks while we take a snapshot. Tasks that are
    2010                 :            :          * already PF_EXITING could be freed from underneath us unless we
    2011                 :            :          * take an rcu_read_lock.
    2012                 :            :          */
    2013                 :            :         rcu_read_lock();
    2014                 :            :         do {
    2015                 :            :                 struct task_and_cgroup ent;
    2016                 :            : 
    2017                 :            :                 /* @tsk either already exited or can't exit until the end */
    2018         [ #  # ]:          0 :                 if (tsk->flags & PF_EXITING)
    2019                 :            :                         goto next;
    2020                 :            : 
    2021                 :            :                 /* as per above, nr_threads may decrease, but not increase. */
    2022         [ #  # ]:          0 :                 BUG_ON(i >= group_size);
    2023                 :          0 :                 ent.task = tsk;
    2024                 :          0 :                 ent.cgrp = task_cgroup_from_root(tsk, root);
    2025                 :            :                 /* nothing to do if this task is already in the cgroup */
    2026         [ #  # ]:          0 :                 if (ent.cgrp == cgrp)
    2027                 :            :                         goto next;
    2028                 :            :                 /*
    2029                 :            :                  * saying GFP_ATOMIC has no effect here because we did prealloc
    2030                 :            :                  * earlier, but it's good form to communicate our expectations.
    2031                 :            :                  */
    2032                 :          0 :                 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
    2033         [ #  # ]:          0 :                 BUG_ON(retval != 0);
    2034                 :          0 :                 i++;
    2035                 :            :         next:
    2036         [ #  # ]:          0 :                 if (!threadgroup)
    2037                 :            :                         break;
    2038         [ #  # ]:          0 :         } while_each_thread(leader, tsk);
    2039                 :            :         rcu_read_unlock();
    2040                 :            :         /* remember the number of threads in the array for later. */
    2041                 :            :         group_size = i;
    2042                 :          0 :         tset.tc_array = group;
    2043                 :          0 :         tset.tc_array_len = group_size;
    2044                 :            : 
    2045                 :            :         /* methods shouldn't be called if no task is actually migrating */
    2046                 :            :         retval = 0;
    2047         [ #  # ]:          0 :         if (!group_size)
    2048                 :            :                 goto out_free_group_list;
    2049                 :            : 
    2050                 :            :         /*
    2051                 :            :          * step 1: check that we can legitimately attach to the cgroup.
    2052                 :            :          */
    2053         [ #  # ]:          0 :         for_each_root_subsys(root, ss) {
    2054                 :            :                 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
    2055                 :            : 
    2056         [ #  # ]:          0 :                 if (ss->can_attach) {
    2057                 :          0 :                         retval = ss->can_attach(css, &tset);
    2058         [ #  # ]:          0 :                         if (retval) {
    2059                 :            :                                 failed_ss = ss;
    2060                 :            :                                 goto out_cancel_attach;
    2061                 :            :                         }
    2062                 :            :                 }
    2063                 :            :         }
    2064                 :            : 
    2065                 :            :         /*
    2066                 :            :          * step 2: make sure css_sets exist for all threads to be migrated.
    2067                 :            :          * we use find_css_set, which allocates a new one if necessary.
    2068                 :            :          */
    2069         [ #  # ]:          0 :         for (i = 0; i < group_size; i++) {
    2070                 :            :                 struct css_set *old_cset;
    2071                 :            : 
    2072                 :          0 :                 tc = flex_array_get(group, i);
    2073                 :          0 :                 old_cset = task_css_set(tc->task);
    2074                 :          0 :                 tc->cset = find_css_set(old_cset, cgrp);
    2075         [ #  # ]:          0 :                 if (!tc->cset) {
    2076                 :            :                         retval = -ENOMEM;
    2077                 :            :                         goto out_put_css_set_refs;
    2078                 :            :                 }
    2079                 :            :         }
    2080                 :            : 
    2081                 :            :         /*
    2082                 :            :          * step 3: now that we're guaranteed success wrt the css_sets,
    2083                 :            :          * proceed to move all tasks to the new cgroup.  There are no
    2084                 :            :          * failure cases after here, so this is the commit point.
    2085                 :            :          */
    2086         [ #  # ]:          0 :         for (i = 0; i < group_size; i++) {
    2087                 :          0 :                 tc = flex_array_get(group, i);
    2088                 :          0 :                 cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
    2089                 :            :         }
    2090                 :            :         /* nothing is sensitive to fork() after this point. */
    2091                 :            : 
    2092                 :            :         /*
    2093                 :            :          * step 4: do subsystem attach callbacks.
    2094                 :            :          */
    2095         [ #  # ]:          0 :         for_each_root_subsys(root, ss) {
    2096                 :            :                 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
    2097                 :            : 
    2098         [ #  # ]:          0 :                 if (ss->attach)
    2099                 :          0 :                         ss->attach(css, &tset);
    2100                 :            :         }
    2101                 :            : 
    2102                 :            :         /*
    2103                 :            :          * step 5: success! and cleanup
    2104                 :            :          */
    2105                 :            :         retval = 0;
    2106                 :            : out_put_css_set_refs:
    2107         [ #  # ]:          0 :         if (retval) {
    2108         [ #  # ]:          0 :                 for (i = 0; i < group_size; i++) {
    2109                 :          0 :                         tc = flex_array_get(group, i);
    2110         [ #  # ]:          0 :                         if (!tc->cset)
    2111                 :            :                                 break;
    2112                 :            :                         put_css_set(tc->cset);
    2113                 :            :                 }
    2114                 :            :         }
    2115                 :            : out_cancel_attach:
    2116         [ #  # ]:          0 :         if (retval) {
    2117         [ #  # ]:          0 :                 for_each_root_subsys(root, ss) {
    2118                 :            :                         struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
    2119                 :            : 
    2120         [ #  # ]:          0 :                         if (ss == failed_ss)
    2121                 :            :                                 break;
    2122         [ #  # ]:          0 :                         if (ss->cancel_attach)
    2123                 :          0 :                                 ss->cancel_attach(css, &tset);
    2124                 :            :                 }
    2125                 :            :         }
    2126                 :            : out_free_group_list:
    2127                 :          0 :         flex_array_free(group);
    2128                 :          0 :         return retval;
    2129                 :            : }
    2130                 :            : 
    2131                 :            : /*
    2132                 :            :  * Find the task_struct of the task to attach by vpid and pass it along to the
    2133                 :            :  * function to attach either it or all tasks in its threadgroup. Will lock
    2134                 :            :  * cgroup_mutex and threadgroup; may take task_lock of task.
    2135                 :            :  */
    2136                 :          0 : static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
    2137                 :            : {
    2138                 :            :         struct task_struct *tsk;
    2139                 :          0 :         const struct cred *cred = current_cred(), *tcred;
    2140                 :            :         int ret;
    2141                 :            : 
    2142         [ #  # ]:          0 :         if (!cgroup_lock_live_group(cgrp))
    2143                 :            :                 return -ENODEV;
    2144                 :            : 
    2145                 :            : retry_find_task:
    2146                 :            :         rcu_read_lock();
    2147         [ #  # ]:          0 :         if (pid) {
    2148                 :          0 :                 tsk = find_task_by_vpid(pid);
    2149         [ #  # ]:          0 :                 if (!tsk) {
    2150                 :            :                         rcu_read_unlock();
    2151                 :            :                         ret= -ESRCH;
    2152                 :          0 :                         goto out_unlock_cgroup;
    2153                 :            :                 }
    2154                 :            :                 /*
    2155                 :            :                  * even if we're attaching all tasks in the thread group, we
    2156                 :            :                  * only need to check permissions on one of them.
    2157                 :            :                  */
    2158                 :          0 :                 tcred = __task_cred(tsk);
    2159 [ #  # ][ #  # ]:          0 :                 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
    2160         [ #  # ]:          0 :                     !uid_eq(cred->euid, tcred->uid) &&
    2161                 :          0 :                     !uid_eq(cred->euid, tcred->suid)) {
    2162                 :            :                         rcu_read_unlock();
    2163                 :            :                         ret = -EACCES;
    2164                 :          0 :                         goto out_unlock_cgroup;
    2165                 :            :                 }
    2166                 :            :         } else
    2167                 :          0 :                 tsk = current;
    2168                 :            : 
    2169         [ #  # ]:          0 :         if (threadgroup)
    2170                 :          0 :                 tsk = tsk->group_leader;
    2171                 :            : 
    2172                 :            :         /*
    2173                 :            :          * Workqueue threads may acquire PF_NO_SETAFFINITY and become
    2174                 :            :          * trapped in a cpuset, or RT worker may be born in a cgroup
    2175                 :            :          * with no rt_runtime allocated.  Just say no.
    2176                 :            :          */
    2177 [ #  # ][ #  # ]:          0 :         if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
    2178                 :            :                 ret = -EINVAL;
    2179                 :            :                 rcu_read_unlock();
    2180                 :            :                 goto out_unlock_cgroup;
    2181                 :            :         }
    2182                 :            : 
    2183                 :          0 :         get_task_struct(tsk);
    2184                 :            :         rcu_read_unlock();
    2185                 :            : 
    2186                 :            :         threadgroup_lock(tsk);
    2187         [ #  # ]:          0 :         if (threadgroup) {
    2188         [ #  # ]:          0 :                 if (!thread_group_leader(tsk)) {
    2189                 :            :                         /*
    2190                 :            :                          * a race with de_thread from another thread's exec()
    2191                 :            :                          * may strip us of our leadership, if this happens,
    2192                 :            :                          * there is no choice but to throw this task away and
    2193                 :            :                          * try again; this is
    2194                 :            :                          * "double-double-toil-and-trouble-check locking".
    2195                 :            :                          */
    2196                 :            :                         threadgroup_unlock(tsk);
    2197                 :            :                         put_task_struct(tsk);
    2198                 :            :                         goto retry_find_task;
    2199                 :            :                 }
    2200                 :            :         }
    2201                 :            : 
    2202                 :          0 :         ret = cgroup_attach_task(cgrp, tsk, threadgroup);
    2203                 :            : 
    2204                 :            :         threadgroup_unlock(tsk);
    2205                 :            : 
    2206                 :            :         put_task_struct(tsk);
    2207                 :            : out_unlock_cgroup:
    2208                 :          0 :         mutex_unlock(&cgroup_mutex);
    2209                 :          0 :         return ret;
    2210                 :            : }
    2211                 :            : 
    2212                 :            : /**
    2213                 :            :  * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
    2214                 :            :  * @from: attach to all cgroups of a given task
    2215                 :            :  * @tsk: the task to be attached
    2216                 :            :  */
    2217                 :          0 : int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
    2218                 :            : {
    2219                 :            :         struct cgroupfs_root *root;
    2220                 :            :         int retval = 0;
    2221                 :            : 
    2222                 :          0 :         mutex_lock(&cgroup_mutex);
    2223         [ #  # ]:          0 :         for_each_active_root(root) {
    2224                 :          0 :                 struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
    2225                 :            : 
    2226                 :          0 :                 retval = cgroup_attach_task(from_cgrp, tsk, false);
    2227         [ #  # ]:          0 :                 if (retval)
    2228                 :            :                         break;
    2229                 :            :         }
    2230                 :          0 :         mutex_unlock(&cgroup_mutex);
    2231                 :            : 
    2232                 :          0 :         return retval;
    2233                 :            : }
    2234                 :            : EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
    2235                 :            : 
    2236                 :          0 : static int cgroup_tasks_write(struct cgroup_subsys_state *css,
    2237                 :            :                               struct cftype *cft, u64 pid)
    2238                 :            : {
    2239                 :          0 :         return attach_task_by_pid(css->cgroup, pid, false);
    2240                 :            : }
    2241                 :            : 
    2242                 :          0 : static int cgroup_procs_write(struct cgroup_subsys_state *css,
    2243                 :            :                               struct cftype *cft, u64 tgid)
    2244                 :            : {
    2245                 :          0 :         return attach_task_by_pid(css->cgroup, tgid, true);
    2246                 :            : }
    2247                 :            : 
    2248                 :          0 : static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
    2249                 :            :                                       struct cftype *cft, const char *buffer)
    2250                 :            : {
    2251                 :            :         BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX);
    2252         [ #  # ]:          0 :         if (strlen(buffer) >= PATH_MAX)
    2253                 :            :                 return -EINVAL;
    2254         [ #  # ]:          0 :         if (!cgroup_lock_live_group(css->cgroup))
    2255                 :            :                 return -ENODEV;
    2256                 :          0 :         mutex_lock(&cgroup_root_mutex);
    2257                 :          0 :         strcpy(css->cgroup->root->release_agent_path, buffer);
    2258                 :          0 :         mutex_unlock(&cgroup_root_mutex);
    2259                 :          0 :         mutex_unlock(&cgroup_mutex);
    2260                 :          0 :         return 0;
    2261                 :            : }
    2262                 :            : 
    2263                 :          0 : static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
    2264                 :            :                                      struct cftype *cft, struct seq_file *seq)
    2265                 :            : {
    2266                 :          0 :         struct cgroup *cgrp = css->cgroup;
    2267                 :            : 
    2268         [ #  # ]:          0 :         if (!cgroup_lock_live_group(cgrp))
    2269                 :            :                 return -ENODEV;
    2270                 :          0 :         seq_puts(seq, cgrp->root->release_agent_path);
    2271                 :          0 :         seq_putc(seq, '\n');
    2272                 :          0 :         mutex_unlock(&cgroup_mutex);
    2273                 :          0 :         return 0;
    2274                 :            : }
    2275                 :            : 
    2276                 :          0 : static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
    2277                 :            :                                      struct cftype *cft, struct seq_file *seq)
    2278                 :            : {
    2279                 :          0 :         seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
    2280                 :          0 :         return 0;
    2281                 :            : }
    2282                 :            : 
    2283                 :            : /* A buffer size big enough for numbers or short strings */
    2284                 :            : #define CGROUP_LOCAL_BUFFER_SIZE 64
    2285                 :            : 
    2286                 :          2 : static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
    2287                 :            :                                 struct cftype *cft, struct file *file,
    2288                 :            :                                 const char __user *userbuf, size_t nbytes,
    2289                 :            :                                 loff_t *unused_ppos)
    2290                 :            : {
    2291                 :            :         char buffer[CGROUP_LOCAL_BUFFER_SIZE];
    2292                 :            :         int retval = 0;
    2293                 :            :         char *end;
    2294                 :            : 
    2295         [ +  - ]:          2 :         if (!nbytes)
    2296                 :            :                 return -EINVAL;
    2297         [ +  - ]:          2 :         if (nbytes >= sizeof(buffer))
    2298                 :            :                 return -E2BIG;
    2299         [ +  - ]:          2 :         if (copy_from_user(buffer, userbuf, nbytes))
    2300                 :            :                 return -EFAULT;
    2301                 :            : 
    2302                 :          2 :         buffer[nbytes] = 0;     /* nul-terminate */
    2303         [ +  - ]:          2 :         if (cft->write_u64) {
    2304                 :          2 :                 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
    2305         [ +  - ]:          2 :                 if (*end)
    2306                 :            :                         return -EINVAL;
    2307                 :          2 :                 retval = cft->write_u64(css, cft, val);
    2308                 :            :         } else {
    2309                 :          0 :                 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
    2310         [ #  # ]:          0 :                 if (*end)
    2311                 :            :                         return -EINVAL;
    2312                 :          0 :                 retval = cft->write_s64(css, cft, val);
    2313                 :            :         }
    2314         [ +  - ]:          4 :         if (!retval)
    2315                 :          2 :                 retval = nbytes;
    2316                 :            :         return retval;
    2317                 :            : }
    2318                 :            : 
    2319                 :          0 : static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
    2320                 :            :                                    struct cftype *cft, struct file *file,
    2321                 :            :                                    const char __user *userbuf, size_t nbytes,
    2322                 :            :                                    loff_t *unused_ppos)
    2323                 :            : {
    2324                 :            :         char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
    2325                 :            :         int retval = 0;
    2326                 :          0 :         size_t max_bytes = cft->max_write_len;
    2327                 :            :         char *buffer = local_buffer;
    2328                 :            : 
    2329         [ #  # ]:          0 :         if (!max_bytes)
    2330                 :            :                 max_bytes = sizeof(local_buffer) - 1;
    2331         [ #  # ]:          0 :         if (nbytes >= max_bytes)
    2332                 :            :                 return -E2BIG;
    2333                 :            :         /* Allocate a dynamic buffer if we need one */
    2334         [ #  # ]:          0 :         if (nbytes >= sizeof(local_buffer)) {
    2335                 :          0 :                 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
    2336         [ #  # ]:          0 :                 if (buffer == NULL)
    2337                 :            :                         return -ENOMEM;
    2338                 :            :         }
    2339 [ #  # ][ #  # ]:          0 :         if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
    2340                 :            :                 retval = -EFAULT;
    2341                 :            :                 goto out;
    2342                 :            :         }
    2343                 :            : 
    2344                 :          0 :         buffer[nbytes] = 0;     /* nul-terminate */
    2345                 :          0 :         retval = cft->write_string(css, cft, strstrip(buffer));
    2346         [ #  # ]:          0 :         if (!retval)
    2347                 :          0 :                 retval = nbytes;
    2348                 :            : out:
    2349         [ #  # ]:          0 :         if (buffer != local_buffer)
    2350                 :          0 :                 kfree(buffer);
    2351                 :            :         return retval;
    2352                 :            : }
    2353                 :            : 
    2354                 :          0 : static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
    2355                 :            :                                  size_t nbytes, loff_t *ppos)
    2356                 :            : {
    2357                 :          2 :         struct cfent *cfe = __d_cfe(file->f_dentry);
    2358                 :            :         struct cftype *cft = __d_cft(file->f_dentry);
    2359                 :          2 :         struct cgroup_subsys_state *css = cfe->css;
    2360                 :            : 
    2361         [ -  + ]:          2 :         if (cft->write)
    2362                 :          0 :                 return cft->write(css, cft, file, buf, nbytes, ppos);
    2363 [ -  + ][ #  # ]:          2 :         if (cft->write_u64 || cft->write_s64)
    2364                 :          2 :                 return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
    2365         [ #  # ]:          0 :         if (cft->write_string)
    2366                 :          0 :                 return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
    2367         [ #  # ]:          0 :         if (cft->trigger) {
    2368                 :          0 :                 int ret = cft->trigger(css, (unsigned int)cft->private);
    2369         [ #  # ]:          2 :                 return ret ? ret : nbytes;
    2370                 :            :         }
    2371                 :            :         return -EINVAL;
    2372                 :            : }
    2373                 :            : 
    2374                 :          4 : static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
    2375                 :            :                                struct cftype *cft, struct file *file,
    2376                 :            :                                char __user *buf, size_t nbytes, loff_t *ppos)
    2377                 :            : {
    2378                 :            :         char tmp[CGROUP_LOCAL_BUFFER_SIZE];
    2379                 :          4 :         u64 val = cft->read_u64(css, cft);
    2380                 :          4 :         int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
    2381                 :            : 
    2382                 :          4 :         return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
    2383                 :            : }
    2384                 :            : 
    2385                 :          0 : static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
    2386                 :            :                                struct cftype *cft, struct file *file,
    2387                 :            :                                char __user *buf, size_t nbytes, loff_t *ppos)
    2388                 :            : {
    2389                 :            :         char tmp[CGROUP_LOCAL_BUFFER_SIZE];
    2390                 :          0 :         s64 val = cft->read_s64(css, cft);
    2391                 :          0 :         int len = sprintf(tmp, "%lld\n", (long long) val);
    2392                 :            : 
    2393                 :          0 :         return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
    2394                 :            : }
    2395                 :            : 
    2396                 :          0 : static ssize_t cgroup_file_read(struct file *file, char __user *buf,
    2397                 :            :                                 size_t nbytes, loff_t *ppos)
    2398                 :            : {
    2399                 :          4 :         struct cfent *cfe = __d_cfe(file->f_dentry);
    2400                 :            :         struct cftype *cft = __d_cft(file->f_dentry);
    2401                 :          4 :         struct cgroup_subsys_state *css = cfe->css;
    2402                 :            : 
    2403         [ -  + ]:          4 :         if (cft->read)
    2404                 :          0 :                 return cft->read(css, cft, file, buf, nbytes, ppos);
    2405         [ +  - ]:          4 :         if (cft->read_u64)
    2406                 :          4 :                 return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
    2407         [ #  # ]:          0 :         if (cft->read_s64)
    2408                 :          0 :                 return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
    2409                 :            :         return -EINVAL;
    2410                 :            : }
    2411                 :            : 
    2412                 :            : /*
    2413                 :            :  * seqfile ops/methods for returning structured data. Currently just
    2414                 :            :  * supports string->u64 maps, but can be extended in future.
    2415                 :            :  */
    2416                 :            : 
    2417                 :          0 : static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
    2418                 :            : {
    2419                 :          0 :         struct seq_file *sf = cb->state;
    2420                 :          0 :         return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
    2421                 :            : }
    2422                 :            : 
    2423                 :          0 : static int cgroup_seqfile_show(struct seq_file *m, void *arg)
    2424                 :            : {
    2425                 :          0 :         struct cfent *cfe = m->private;
    2426                 :          0 :         struct cftype *cft = cfe->type;
    2427                 :          0 :         struct cgroup_subsys_state *css = cfe->css;
    2428                 :            : 
    2429         [ #  # ]:          0 :         if (cft->read_map) {
    2430                 :          0 :                 struct cgroup_map_cb cb = {
    2431                 :            :                         .fill = cgroup_map_add,
    2432                 :            :                         .state = m,
    2433                 :            :                 };
    2434                 :          0 :                 return cft->read_map(css, cft, &cb);
    2435                 :            :         }
    2436                 :          0 :         return cft->read_seq_string(css, cft, m);
    2437                 :            : }
    2438                 :            : 
    2439                 :            : static const struct file_operations cgroup_seqfile_operations = {
    2440                 :            :         .read = seq_read,
    2441                 :            :         .write = cgroup_file_write,
    2442                 :            :         .llseek = seq_lseek,
    2443                 :            :         .release = cgroup_file_release,
    2444                 :            : };
    2445                 :            : 
    2446                 :          0 : static int cgroup_file_open(struct inode *inode, struct file *file)
    2447                 :            : {
    2448                 :          5 :         struct cfent *cfe = __d_cfe(file->f_dentry);
    2449                 :            :         struct cftype *cft = __d_cft(file->f_dentry);
    2450                 :          5 :         struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
    2451                 :            :         struct cgroup_subsys_state *css;
    2452                 :            :         int err;
    2453                 :            : 
    2454                 :          5 :         err = generic_file_open(inode, file);
    2455         [ +  - ]:          5 :         if (err)
    2456                 :            :                 return err;
    2457                 :            : 
    2458                 :            :         /*
    2459                 :            :          * If the file belongs to a subsystem, pin the css.  Will be
    2460                 :            :          * unpinned either on open failure or release.  This ensures that
    2461                 :            :          * @css stays alive for all file operations.
    2462                 :            :          */
    2463                 :            :         rcu_read_lock();
    2464                 :          5 :         css = cgroup_css(cgrp, cft->ss);
    2465 [ -  + ][ #  # ]:          5 :         if (cft->ss && !css_tryget(css))
    2466                 :            :                 css = NULL;
    2467                 :            :         rcu_read_unlock();
    2468                 :            : 
    2469         [ +  - ]:          5 :         if (!css)
    2470                 :            :                 return -ENODEV;
    2471                 :            : 
    2472                 :            :         /*
    2473                 :            :          * @cfe->css is used by read/write/close to determine the
    2474                 :            :          * associated css.  @file->private_data would be a better place but
    2475                 :            :          * that's already used by seqfile.  Multiple accessors may use it
    2476                 :            :          * simultaneously which is okay as the association never changes.
    2477                 :            :          */
    2478 [ +  + ][ +  - ]:          5 :         WARN_ON_ONCE(cfe->css && cfe->css != css);
         [ -  + ][ #  # ]
                 [ #  # ]
    2479                 :          5 :         cfe->css = css;
    2480                 :            : 
    2481 [ +  - ][ -  + ]:          5 :         if (cft->read_map || cft->read_seq_string) {
    2482                 :          0 :                 file->f_op = &cgroup_seqfile_operations;
    2483                 :          0 :                 err = single_open(file, cgroup_seqfile_show, cfe);
    2484         [ +  + ]:          5 :         } else if (cft->open) {
    2485                 :          1 :                 err = cft->open(inode, file);
    2486                 :            :         }
    2487                 :            : 
    2488 [ -  + ][ #  # ]:          5 :         if (css->ss && err)
    2489                 :            :                 css_put(css);
    2490                 :          5 :         return err;
    2491                 :            : }
    2492                 :            : 
    2493                 :          0 : static int cgroup_file_release(struct inode *inode, struct file *file)
    2494                 :            : {
    2495                 :          4 :         struct cfent *cfe = __d_cfe(file->f_dentry);
    2496                 :            :         struct cftype *cft = __d_cft(file->f_dentry);
    2497                 :          4 :         struct cgroup_subsys_state *css = cfe->css;
    2498                 :            :         int ret = 0;
    2499                 :            : 
    2500         [ -  + ]:          4 :         if (cft->release)
    2501                 :          0 :                 ret = cft->release(inode, file);
    2502         [ -  + ]:          8 :         if (css->ss)
    2503                 :            :                 css_put(css);
    2504         [ -  + ]:          4 :         if (file->f_op == &cgroup_seqfile_operations)
    2505                 :          0 :                 single_release(inode, file);
    2506                 :          4 :         return ret;
    2507                 :            : }
    2508                 :            : 
    2509                 :            : /*
    2510                 :            :  * cgroup_rename - Only allow simple rename of directories in place.
    2511                 :            :  */
    2512                 :          0 : static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
    2513                 :          0 :                             struct inode *new_dir, struct dentry *new_dentry)
    2514                 :            : {
    2515                 :            :         int ret;
    2516                 :            :         struct cgroup_name *name, *old_name;
    2517                 :          0 :         struct cgroup *cgrp;
    2518                 :            : 
    2519                 :            :         /*
    2520                 :            :          * It's convinient to use parent dir's i_mutex to protected
    2521                 :            :          * cgrp->name.
    2522                 :            :          */
    2523                 :            :         lockdep_assert_held(&old_dir->i_mutex);
    2524                 :            : 
    2525         [ #  # ]:          0 :         if (!S_ISDIR(old_dentry->d_inode->i_mode))
    2526                 :            :                 return -ENOTDIR;
    2527         [ #  # ]:          0 :         if (new_dentry->d_inode)
    2528                 :            :                 return -EEXIST;
    2529         [ #  # ]:          0 :         if (old_dir != new_dir)
    2530                 :            :                 return -EIO;
    2531                 :            : 
    2532                 :            :         cgrp = __d_cgrp(old_dentry);
    2533                 :            : 
    2534                 :            :         /*
    2535                 :            :          * This isn't a proper migration and its usefulness is very
    2536                 :            :          * limited.  Disallow if sane_behavior.
    2537                 :            :          */
    2538         [ #  # ]:          0 :         if (cgroup_sane_behavior(cgrp))
    2539                 :            :                 return -EPERM;
    2540                 :            : 
    2541                 :          0 :         name = cgroup_alloc_name(new_dentry);
    2542         [ #  # ]:          0 :         if (!name)
    2543                 :            :                 return -ENOMEM;
    2544                 :            : 
    2545                 :          0 :         ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
    2546         [ #  # ]:          0 :         if (ret) {
    2547                 :          0 :                 kfree(name);
    2548                 :          0 :                 return ret;
    2549                 :            :         }
    2550                 :            : 
    2551                 :          0 :         old_name = rcu_dereference_protected(cgrp->name, true);
    2552                 :          0 :         rcu_assign_pointer(cgrp->name, name);
    2553                 :            : 
    2554                 :          0 :         kfree_rcu(old_name, rcu_head);
    2555                 :          0 :         return 0;
    2556                 :            : }
    2557                 :            : 
    2558                 :          0 : static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
    2559                 :            : {
    2560 [ #  # ][ #  # ]:          0 :         if (S_ISDIR(dentry->d_inode->i_mode))
         [ #  # ][ #  # ]
    2561                 :          0 :                 return &__d_cgrp(dentry)->xattrs;
    2562                 :            :         else
    2563                 :          0 :                 return &__d_cfe(dentry)->xattrs;
    2564                 :            : }
    2565                 :            : 
    2566                 :            : static inline int xattr_enabled(struct dentry *dentry)
    2567                 :            : {
    2568                 :          0 :         struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
    2569                 :          0 :         return root->flags & CGRP_ROOT_XATTR;
    2570                 :            : }
    2571                 :            : 
    2572                 :          0 : static bool is_valid_xattr(const char *name)
    2573                 :            : {
    2574 [ #  # ][ #  # ]:          0 :         if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
    2575                 :          0 :             !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
    2576                 :            :                 return true;
    2577                 :          0 :         return false;
    2578                 :            : }
    2579                 :            : 
    2580                 :          0 : static int cgroup_setxattr(struct dentry *dentry, const char *name,
    2581                 :            :                            const void *val, size_t size, int flags)
    2582                 :            : {
    2583         [ #  # ]:          0 :         if (!xattr_enabled(dentry))
    2584                 :            :                 return -EOPNOTSUPP;
    2585         [ #  # ]:          0 :         if (!is_valid_xattr(name))
    2586                 :            :                 return -EINVAL;
    2587                 :          0 :         return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
    2588                 :            : }
    2589                 :            : 
    2590                 :          0 : static int cgroup_removexattr(struct dentry *dentry, const char *name)
    2591                 :            : {
    2592         [ #  # ]:          0 :         if (!xattr_enabled(dentry))
    2593                 :            :                 return -EOPNOTSUPP;
    2594         [ #  # ]:          0 :         if (!is_valid_xattr(name))
    2595                 :            :                 return -EINVAL;
    2596                 :          0 :         return simple_xattr_remove(__d_xattrs(dentry), name);
    2597                 :            : }
    2598                 :            : 
    2599                 :          0 : static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
    2600                 :            :                                void *buf, size_t size)
    2601                 :            : {
    2602         [ #  # ]:          0 :         if (!xattr_enabled(dentry))
    2603                 :            :                 return -EOPNOTSUPP;
    2604         [ #  # ]:          0 :         if (!is_valid_xattr(name))
    2605                 :            :                 return -EINVAL;
    2606                 :          0 :         return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
    2607                 :            : }
    2608                 :            : 
    2609                 :          0 : static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
    2610                 :            : {
    2611         [ #  # ]:          0 :         if (!xattr_enabled(dentry))
    2612                 :            :                 return -EOPNOTSUPP;
    2613                 :          0 :         return simple_xattr_list(__d_xattrs(dentry), buf, size);
    2614                 :            : }
    2615                 :            : 
    2616                 :            : static const struct file_operations cgroup_file_operations = {
    2617                 :            :         .read = cgroup_file_read,
    2618                 :            :         .write = cgroup_file_write,
    2619                 :            :         .llseek = generic_file_llseek,
    2620                 :            :         .open = cgroup_file_open,
    2621                 :            :         .release = cgroup_file_release,
    2622                 :            : };
    2623                 :            : 
    2624                 :            : static const struct inode_operations cgroup_file_inode_operations = {
    2625                 :            :         .setxattr = cgroup_setxattr,
    2626                 :            :         .getxattr = cgroup_getxattr,
    2627                 :            :         .listxattr = cgroup_listxattr,
    2628                 :            :         .removexattr = cgroup_removexattr,
    2629                 :            : };
    2630                 :            : 
    2631                 :            : static const struct inode_operations cgroup_dir_inode_operations = {
    2632                 :            :         .lookup = simple_lookup,
    2633                 :            :         .mkdir = cgroup_mkdir,
    2634                 :            :         .rmdir = cgroup_rmdir,
    2635                 :            :         .rename = cgroup_rename,
    2636                 :            :         .setxattr = cgroup_setxattr,
    2637                 :            :         .getxattr = cgroup_getxattr,
    2638                 :            :         .listxattr = cgroup_listxattr,
    2639                 :            :         .removexattr = cgroup_removexattr,
    2640                 :            : };
    2641                 :            : 
    2642                 :            : /*
    2643                 :            :  * Check if a file is a control file
    2644                 :            :  */
    2645                 :          0 : static inline struct cftype *__file_cft(struct file *file)
    2646                 :            : {
    2647         [ #  # ]:          0 :         if (file_inode(file)->i_fop != &cgroup_file_operations)
    2648                 :            :                 return ERR_PTR(-EINVAL);
    2649                 :          0 :         return __d_cft(file->f_dentry);
    2650                 :            : }
    2651                 :            : 
    2652                 :          0 : static int cgroup_create_file(struct dentry *dentry, umode_t mode,
    2653                 :            :                                 struct super_block *sb)
    2654                 :            : {
    2655                 :            :         struct inode *inode;
    2656                 :            : 
    2657         [ +  - ]:         33 :         if (!dentry)
    2658                 :            :                 return -ENOENT;
    2659         [ +  - ]:         33 :         if (dentry->d_inode)
    2660                 :            :                 return -EEXIST;
    2661                 :            : 
    2662                 :         33 :         inode = cgroup_new_inode(mode, sb);
    2663         [ +  - ]:         33 :         if (!inode)
    2664                 :            :                 return -ENOMEM;
    2665                 :            : 
    2666         [ +  + ]:         33 :         if (S_ISDIR(mode)) {
    2667                 :          2 :                 inode->i_op = &cgroup_dir_inode_operations;
    2668                 :          2 :                 inode->i_fop = &simple_dir_operations;
    2669                 :            : 
    2670                 :            :                 /* start off with i_nlink == 2 (for "." entry) */
    2671                 :          2 :                 inc_nlink(inode);
    2672                 :          2 :                 inc_nlink(dentry->d_parent->d_inode);
    2673                 :            : 
    2674                 :            :                 /*
    2675                 :            :                  * Control reaches here with cgroup_mutex held.
    2676                 :            :                  * @inode->i_mutex should nest outside cgroup_mutex but we
    2677                 :            :                  * want to populate it immediately without releasing
    2678                 :            :                  * cgroup_mutex.  As @inode isn't visible to anyone else
    2679                 :            :                  * yet, trylock will always succeed without affecting
    2680                 :            :                  * lockdep checks.
    2681                 :            :                  */
    2682 [ -  + ][ #  # ]:          2 :                 WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
                 [ #  # ]
    2683         [ +  - ]:         31 :         } else if (S_ISREG(mode)) {
    2684                 :         31 :                 inode->i_size = 0;
    2685                 :         31 :                 inode->i_fop = &cgroup_file_operations;
    2686                 :         31 :                 inode->i_op = &cgroup_file_inode_operations;
    2687                 :            :         }
    2688                 :         33 :         d_instantiate(dentry, inode);
    2689                 :            :         dget(dentry);   /* Extra count - pin the dentry in core */
    2690                 :            :         return 0;
    2691                 :            : }
    2692                 :            : 
    2693                 :            : /**
    2694                 :            :  * cgroup_file_mode - deduce file mode of a control file
    2695                 :            :  * @cft: the control file in question
    2696                 :            :  *
    2697                 :            :  * returns cft->mode if ->mode is not 0
    2698                 :            :  * returns S_IRUGO|S_IWUSR if it has both a read and a write handler
    2699                 :            :  * returns S_IRUGO if it has only a read handler
    2700                 :            :  * returns S_IWUSR if it has only a write hander
    2701                 :            :  */
    2702                 :          0 : static umode_t cgroup_file_mode(const struct cftype *cft)
    2703                 :            : {
    2704                 :            :         umode_t mode = 0;
    2705                 :            : 
    2706            [ + ]:         31 :         if (cft->mode)
    2707                 :            :                 return cft->mode;
    2708                 :            : 
    2709 [ +  - ][ +  + ]:         47 :         if (cft->read || cft->read_u64 || cft->read_s64 ||
         [ +  - ][ +  - ]
    2710         [ +  - ]:         16 :             cft->read_map || cft->read_seq_string)
    2711                 :            :                 mode |= S_IRUGO;
    2712                 :            : 
    2713 [ +  - ][ +  + ]:         16 :         if (cft->write || cft->write_u64 || cft->write_s64 ||
         [ +  - ][ +  + ]
    2714         [ -  + ]:          3 :             cft->write_string || cft->trigger)
    2715                 :         13 :                 mode |= S_IWUSR;
    2716                 :            : 
    2717                 :         16 :         return mode;
    2718                 :            : }
    2719                 :            : 
    2720                 :          0 : static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
    2721                 :            : {
    2722                 :         31 :         struct dentry *dir = cgrp->dentry;
    2723                 :            :         struct cgroup *parent = __d_cgrp(dir);
    2724                 :            :         struct dentry *dentry;
    2725                 :            :         struct cfent *cfe;
    2726                 :            :         int error;
    2727                 :            :         umode_t mode;
    2728                 :         31 :         char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
    2729                 :            : 
    2730 [ -  + ][ #  # ]:         31 :         if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
                 [ #  # ]
    2731                 :          0 :             !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
    2732                 :          0 :                 strcpy(name, cft->ss->name);
    2733                 :          0 :                 strcat(name, ".");
    2734                 :            :         }
    2735                 :         31 :         strcat(name, cft->name);
    2736                 :            : 
    2737         [ -  + ]:         31 :         BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
    2738                 :            : 
    2739                 :            :         cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
    2740         [ +  - ]:         31 :         if (!cfe)
    2741                 :            :                 return -ENOMEM;
    2742                 :            : 
    2743                 :         31 :         dentry = lookup_one_len(name, dir, strlen(name));
    2744         [ -  + ]:         31 :         if (IS_ERR(dentry)) {
    2745                 :            :                 error = PTR_ERR(dentry);
    2746                 :            :                 goto out;
    2747                 :            :         }
    2748                 :            : 
    2749                 :         31 :         cfe->type = (void *)cft;
    2750                 :         31 :         cfe->dentry = dentry;
    2751                 :         31 :         dentry->d_fsdata = cfe;
    2752                 :            :         simple_xattrs_init(&cfe->xattrs);
    2753                 :            : 
    2754                 :         31 :         mode = cgroup_file_mode(cft);
    2755                 :         31 :         error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
    2756         [ +  - ]:         31 :         if (!error) {
    2757                 :         31 :                 list_add_tail(&cfe->node, &parent->files);
    2758                 :            :                 cfe = NULL;
    2759                 :            :         }
    2760                 :         31 :         dput(dentry);
    2761                 :            : out:
    2762                 :         31 :         kfree(cfe);
    2763                 :            :         return error;
    2764                 :            : }
    2765                 :            : 
    2766                 :            : /**
    2767                 :            :  * cgroup_addrm_files - add or remove files to a cgroup directory
    2768                 :            :  * @cgrp: the target cgroup
    2769                 :            :  * @cfts: array of cftypes to be added
    2770                 :            :  * @is_add: whether to add or remove
    2771                 :            :  *
    2772                 :            :  * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
    2773                 :            :  * For removals, this function never fails.  If addition fails, this
    2774                 :            :  * function doesn't remove files already added.  The caller is responsible
    2775                 :            :  * for cleaning up.
    2776                 :            :  */
    2777                 :          0 : static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
    2778                 :            :                               bool is_add)
    2779                 :            : {
    2780                 :            :         struct cftype *cft;
    2781                 :            :         int ret;
    2782                 :            : 
    2783                 :            :         lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
    2784                 :            :         lockdep_assert_held(&cgroup_mutex);
    2785                 :            : 
    2786         [ +  + ]:         56 :         for (cft = cfts; cft->name[0] != '\0'; cft++) {
    2787                 :            :                 /* does cft->flags tell us to skip this file on @cgrp? */
    2788 [ +  + ][ -  + ]:         49 :                 if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
    2789                 :          0 :                         continue;
    2790 [ -  + ][ #  # ]:         49 :                 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
    2791                 :          0 :                         continue;
    2792 [ +  + ][ +  + ]:         49 :                 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
    2793                 :          8 :                         continue;
    2794                 :            : 
    2795         [ +  + ]:         41 :                 if (is_add) {
    2796                 :         31 :                         ret = cgroup_add_file(cgrp, cft);
    2797         [ -  + ]:         31 :                         if (ret) {
    2798                 :          0 :                                 pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
    2799                 :            :                                         cft->name, ret);
    2800                 :          0 :                                 return ret;
    2801                 :            :                         }
    2802                 :            :                 } else {
    2803                 :         10 :                         cgroup_rm_file(cgrp, cft);
    2804                 :            :                 }
    2805                 :            :         }
    2806                 :            :         return 0;
    2807                 :            : }
    2808                 :            : 
    2809                 :            : static void cgroup_cfts_prepare(void)
    2810                 :            :         __acquires(&cgroup_mutex)
    2811                 :            : {
    2812                 :            :         /*
    2813                 :            :          * Thanks to the entanglement with vfs inode locking, we can't walk
    2814                 :            :          * the existing cgroups under cgroup_mutex and create files.
    2815                 :            :          * Instead, we use css_for_each_descendant_pre() and drop RCU read
    2816                 :            :          * lock before calling cgroup_addrm_files().
    2817                 :            :          */
    2818                 :          0 :         mutex_lock(&cgroup_mutex);
    2819                 :            : }
    2820                 :            : 
    2821                 :          0 : static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
    2822                 :            :         __releases(&cgroup_mutex)
    2823                 :            : {
    2824                 :          0 :         LIST_HEAD(pending);
    2825                 :          0 :         struct cgroup_subsys *ss = cfts[0].ss;
    2826                 :          0 :         struct cgroup *root = &ss->root->top_cgroup;
    2827                 :          0 :         struct super_block *sb = ss->root->sb;
    2828                 :            :         struct dentry *prev = NULL;
    2829                 :            :         struct inode *inode;
    2830                 :            :         struct cgroup_subsys_state *css;
    2831                 :            :         u64 update_before;
    2832                 :            :         int ret = 0;
    2833                 :            : 
    2834                 :            :         /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
    2835 [ #  # ][ #  # ]:          0 :         if (!cfts || ss->root == &cgroup_dummy_root ||
                 [ #  # ]
    2836                 :          0 :             !atomic_inc_not_zero(&sb->s_active)) {
    2837                 :          0 :                 mutex_unlock(&cgroup_mutex);
    2838                 :          0 :                 return 0;
    2839                 :            :         }
    2840                 :            : 
    2841                 :            :         /*
    2842                 :            :          * All cgroups which are created after we drop cgroup_mutex will
    2843                 :            :          * have the updated set of files, so we only need to update the
    2844                 :            :          * cgroups created before the current @cgroup_serial_nr_next.
    2845                 :            :          */
    2846                 :          0 :         update_before = cgroup_serial_nr_next;
    2847                 :            : 
    2848                 :          0 :         mutex_unlock(&cgroup_mutex);
    2849                 :            : 
    2850                 :            :         /* add/rm files for all cgroups created before */
    2851                 :            :         rcu_read_lock();
    2852         [ #  # ]:          0 :         css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
    2853                 :          0 :                 struct cgroup *cgrp = css->cgroup;
    2854                 :            : 
    2855         [ #  # ]:          0 :                 if (cgroup_is_dead(cgrp))
    2856                 :          0 :                         continue;
    2857                 :            : 
    2858                 :          0 :                 inode = cgrp->dentry->d_inode;
    2859                 :            :                 dget(cgrp->dentry);
    2860                 :            :                 rcu_read_unlock();
    2861                 :            : 
    2862                 :          0 :                 dput(prev);
    2863                 :          0 :                 prev = cgrp->dentry;
    2864                 :            : 
    2865                 :          0 :                 mutex_lock(&inode->i_mutex);
    2866                 :          0 :                 mutex_lock(&cgroup_mutex);
    2867 [ #  # ][ #  # ]:          0 :                 if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
    2868                 :          0 :                         ret = cgroup_addrm_files(cgrp, cfts, is_add);
    2869                 :          0 :                 mutex_unlock(&cgroup_mutex);
    2870                 :          0 :                 mutex_unlock(&inode->i_mutex);
    2871                 :            : 
    2872                 :            :                 rcu_read_lock();
    2873         [ #  # ]:          0 :                 if (ret)
    2874                 :            :                         break;
    2875                 :            :         }
    2876                 :            :         rcu_read_unlock();
    2877                 :          0 :         dput(prev);
    2878                 :          0 :         deactivate_super(sb);
    2879                 :          0 :         return ret;
    2880                 :            : }
    2881                 :            : 
    2882                 :            : /**
    2883                 :            :  * cgroup_add_cftypes - add an array of cftypes to a subsystem
    2884                 :            :  * @ss: target cgroup subsystem
    2885                 :            :  * @cfts: zero-length name terminated array of cftypes
    2886                 :            :  *
    2887                 :            :  * Register @cfts to @ss.  Files described by @cfts are created for all
    2888                 :            :  * existing cgroups to which @ss is attached and all future cgroups will
    2889                 :            :  * have them too.  This function can be called anytime whether @ss is
    2890                 :            :  * attached or not.
    2891                 :            :  *
    2892                 :            :  * Returns 0 on successful registration, -errno on failure.  Note that this
    2893                 :            :  * function currently returns 0 as long as @cfts registration is successful
    2894                 :            :  * even if some file creation attempts on existing cgroups fail.
    2895                 :            :  */
    2896                 :          0 : int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
    2897                 :            : {
    2898                 :            :         struct cftype_set *set;
    2899                 :            :         struct cftype *cft;
    2900                 :            :         int ret;
    2901                 :            : 
    2902                 :            :         set = kzalloc(sizeof(*set), GFP_KERNEL);
    2903         [ #  # ]:          0 :         if (!set)
    2904                 :            :                 return -ENOMEM;
    2905                 :            : 
    2906         [ #  # ]:          0 :         for (cft = cfts; cft->name[0] != '\0'; cft++)
    2907                 :          0 :                 cft->ss = ss;
    2908                 :            : 
    2909                 :            :         cgroup_cfts_prepare();
    2910                 :          0 :         set->cfts = cfts;
    2911                 :          0 :         list_add_tail(&set->node, &ss->cftsets);
    2912                 :          0 :         ret = cgroup_cfts_commit(cfts, true);
    2913         [ #  # ]:          0 :         if (ret)
    2914                 :          0 :                 cgroup_rm_cftypes(cfts);
    2915                 :          0 :         return ret;
    2916                 :            : }
    2917                 :            : EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
    2918                 :            : 
    2919                 :            : /**
    2920                 :            :  * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
    2921                 :            :  * @cfts: zero-length name terminated array of cftypes
    2922                 :            :  *
    2923                 :            :  * Unregister @cfts.  Files described by @cfts are removed from all
    2924                 :            :  * existing cgroups and all future cgroups won't have them either.  This
    2925                 :            :  * function can be called anytime whether @cfts' subsys is attached or not.
    2926                 :            :  *
    2927                 :            :  * Returns 0 on successful unregistration, -ENOENT if @cfts is not
    2928                 :            :  * registered.
    2929                 :            :  */
    2930                 :          0 : int cgroup_rm_cftypes(struct cftype *cfts)
    2931                 :            : {
    2932                 :            :         struct cftype_set *set;
    2933                 :            : 
    2934 [ #  # ][ #  # ]:          0 :         if (!cfts || !cfts[0].ss)
    2935                 :            :                 return -ENOENT;
    2936                 :            : 
    2937                 :            :         cgroup_cfts_prepare();
    2938                 :            : 
    2939         [ #  # ]:          0 :         list_for_each_entry(set, &cfts[0].ss->cftsets, node) {
    2940         [ #  # ]:          0 :                 if (set->cfts == cfts) {
    2941                 :            :                         list_del(&set->node);
    2942                 :          0 :                         kfree(set);
    2943                 :          0 :                         cgroup_cfts_commit(cfts, false);
    2944                 :          0 :                         return 0;
    2945                 :            :                 }
    2946                 :            :         }
    2947                 :            : 
    2948                 :          0 :         cgroup_cfts_commit(NULL, false);
    2949                 :          0 :         return -ENOENT;
    2950                 :            : }
    2951                 :            : 
    2952                 :            : /**
    2953                 :            :  * cgroup_task_count - count the number of tasks in a cgroup.
    2954                 :            :  * @cgrp: the cgroup in question
    2955                 :            :  *
    2956                 :            :  * Return the number of tasks in the cgroup.
    2957                 :            :  */
    2958                 :          0 : int cgroup_task_count(const struct cgroup *cgrp)
    2959                 :            : {
    2960                 :            :         int count = 0;
    2961                 :            :         struct cgrp_cset_link *link;
    2962                 :            : 
    2963                 :          1 :         read_lock(&css_set_lock);
    2964         [ +  + ]:          2 :         list_for_each_entry(link, &cgrp->cset_links, cset_link)
    2965                 :          1 :                 count += atomic_read(&link->cset->refcount);
    2966                 :            :         read_unlock(&css_set_lock);
    2967                 :          1 :         return count;
    2968                 :            : }
    2969                 :            : 
    2970                 :            : /*
    2971                 :            :  * To reduce the fork() overhead for systems that are not actually using
    2972                 :            :  * their cgroups capability, we don't maintain the lists running through
    2973                 :            :  * each css_set to its tasks until we see the list actually used - in other
    2974                 :            :  * words after the first call to css_task_iter_start().
    2975                 :            :  */
    2976                 :          0 : static void cgroup_enable_task_cg_lists(void)
    2977                 :            : {
    2978                 :            :         struct task_struct *p, *g;
    2979                 :          1 :         write_lock(&css_set_lock);
    2980                 :          1 :         use_task_css_set_links = 1;
    2981                 :            :         /*
    2982                 :            :          * We need tasklist_lock because RCU is not safe against
    2983                 :            :          * while_each_thread(). Besides, a forking task that has passed
    2984                 :            :          * cgroup_post_fork() without seeing use_task_css_set_links = 1
    2985                 :            :          * is not guaranteed to have its child immediately visible in the
    2986                 :            :          * tasklist if we walk through it with RCU.
    2987                 :            :          */
    2988                 :          1 :         read_lock(&tasklist_lock);
    2989         [ +  + ]:        283 :         do_each_thread(g, p) {
    2990                 :            :                 task_lock(p);
    2991                 :            :                 /*
    2992                 :            :                  * We should check if the process is exiting, otherwise
    2993                 :            :                  * it will race with cgroup_exit() in that the list
    2994                 :            :                  * entry won't be deleted though the process has exited.
    2995                 :            :                  */
    2996 [ +  + ][ +  - ]:        285 :                 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
    2997                 :         92 :                         list_add(&p->cg_list, &task_css_set(p)->tasks);
    2998                 :            :                 task_unlock(p);
    2999         [ +  + ]:        285 :         } while_each_thread(g, p);
    3000                 :            :         read_unlock(&tasklist_lock);
    3001                 :            :         write_unlock(&css_set_lock);
    3002                 :          1 : }
    3003                 :            : 
    3004                 :            : /**
    3005                 :            :  * css_next_child - find the next child of a given css
    3006                 :            :  * @pos_css: the current position (%NULL to initiate traversal)
    3007                 :            :  * @parent_css: css whose children to walk
    3008                 :            :  *
    3009                 :            :  * This function returns the next child of @parent_css and should be called
    3010                 :            :  * under RCU read lock.  The only requirement is that @parent_css and
    3011                 :            :  * @pos_css are accessible.  The next sibling is guaranteed to be returned
    3012                 :            :  * regardless of their states.
    3013                 :            :  */
    3014                 :            : struct cgroup_subsys_state *
    3015                 :          0 : css_next_child(struct cgroup_subsys_state *pos_css,
    3016                 :            :                struct cgroup_subsys_state *parent_css)
    3017                 :            : {
    3018         [ #  # ]:          0 :         struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
    3019                 :          0 :         struct cgroup *cgrp = parent_css->cgroup;
    3020                 :            :         struct cgroup *next;
    3021                 :            : 
    3022                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    3023                 :            : 
    3024                 :            :         /*
    3025                 :            :          * @pos could already have been removed.  Once a cgroup is removed,
    3026                 :            :          * its ->sibling.next is no longer updated when its next sibling
    3027                 :            :          * changes.  As CGRP_DEAD assertion is serialized and happens
    3028                 :            :          * before the cgroup is taken off the ->sibling list, if we see it
    3029                 :            :          * unasserted, it's guaranteed that the next sibling hasn't
    3030                 :            :          * finished its grace period even if it's already removed, and thus
    3031                 :            :          * safe to dereference from this RCU critical section.  If
    3032                 :            :          * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
    3033                 :            :          * to be visible as %true here.
    3034                 :            :          *
    3035                 :            :          * If @pos is dead, its next pointer can't be dereferenced;
    3036                 :            :          * however, as each cgroup is given a monotonically increasing
    3037                 :            :          * unique serial number and always appended to the sibling list,
    3038                 :            :          * the next one can be found by walking the parent's children until
    3039                 :            :          * we see a cgroup with higher serial number than @pos's.  While
    3040                 :            :          * this path can be slower, it's taken only when either the current
    3041                 :            :          * cgroup is removed or iteration and removal race.
    3042                 :            :          */
    3043         [ #  # ]:          0 :         if (!pos) {
    3044                 :          0 :                 next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
    3045         [ #  # ]:          0 :         } else if (likely(!cgroup_is_dead(pos))) {
    3046                 :          0 :                 next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
    3047                 :            :         } else {
    3048         [ #  # ]:          0 :                 list_for_each_entry_rcu(next, &cgrp->children, sibling)
    3049         [ #  # ]:          0 :                         if (next->serial_nr > pos->serial_nr)
    3050                 :            :                                 break;
    3051                 :            :         }
    3052                 :            : 
    3053         [ #  # ]:          0 :         if (&next->sibling == &cgrp->children)
    3054                 :            :                 return NULL;
    3055                 :            : 
    3056                 :          0 :         return cgroup_css(next, parent_css->ss);
    3057                 :            : }
    3058                 :            : EXPORT_SYMBOL_GPL(css_next_child);
    3059                 :            : 
    3060                 :            : /**
    3061                 :            :  * css_next_descendant_pre - find the next descendant for pre-order walk
    3062                 :            :  * @pos: the current position (%NULL to initiate traversal)
    3063                 :            :  * @root: css whose descendants to walk
    3064                 :            :  *
    3065                 :            :  * To be used by css_for_each_descendant_pre().  Find the next descendant
    3066                 :            :  * to visit for pre-order traversal of @root's descendants.  @root is
    3067                 :            :  * included in the iteration and the first node to be visited.
    3068                 :            :  *
    3069                 :            :  * While this function requires RCU read locking, it doesn't require the
    3070                 :            :  * whole traversal to be contained in a single RCU critical section.  This
    3071                 :            :  * function will return the correct next descendant as long as both @pos
    3072                 :            :  * and @root are accessible and @pos is a descendant of @root.
    3073                 :            :  */
    3074                 :            : struct cgroup_subsys_state *
    3075                 :          0 : css_next_descendant_pre(struct cgroup_subsys_state *pos,
    3076                 :            :                         struct cgroup_subsys_state *root)
    3077                 :            : {
    3078                 :            :         struct cgroup_subsys_state *next;
    3079                 :            : 
    3080                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    3081                 :            : 
    3082                 :            :         /* if first iteration, visit @root */
    3083         [ #  # ]:          0 :         if (!pos)
    3084                 :            :                 return root;
    3085                 :            : 
    3086                 :            :         /* visit the first child if exists */
    3087                 :          0 :         next = css_next_child(NULL, pos);
    3088         [ #  # ]:          0 :         if (next)
    3089                 :            :                 return next;
    3090                 :            : 
    3091                 :            :         /* no child, visit my or the closest ancestor's next sibling */
    3092         [ #  # ]:          0 :         while (pos != root) {
    3093                 :          0 :                 next = css_next_child(pos, css_parent(pos));
    3094         [ #  # ]:          0 :                 if (next)
    3095                 :            :                         return next;
    3096                 :            :                 pos = css_parent(pos);
    3097                 :            :         }
    3098                 :            : 
    3099                 :            :         return NULL;
    3100                 :            : }
    3101                 :            : EXPORT_SYMBOL_GPL(css_next_descendant_pre);
    3102                 :            : 
    3103                 :            : /**
    3104                 :            :  * css_rightmost_descendant - return the rightmost descendant of a css
    3105                 :            :  * @pos: css of interest
    3106                 :            :  *
    3107                 :            :  * Return the rightmost descendant of @pos.  If there's no descendant, @pos
    3108                 :            :  * is returned.  This can be used during pre-order traversal to skip
    3109                 :            :  * subtree of @pos.
    3110                 :            :  *
    3111                 :            :  * While this function requires RCU read locking, it doesn't require the
    3112                 :            :  * whole traversal to be contained in a single RCU critical section.  This
    3113                 :            :  * function will return the correct rightmost descendant as long as @pos is
    3114                 :            :  * accessible.
    3115                 :            :  */
    3116                 :            : struct cgroup_subsys_state *
    3117                 :          0 : css_rightmost_descendant(struct cgroup_subsys_state *pos)
    3118                 :            : {
    3119                 :            :         struct cgroup_subsys_state *last, *tmp;
    3120                 :            : 
    3121                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    3122                 :            : 
    3123                 :            :         do {
    3124                 :            :                 last = pos;
    3125                 :            :                 /* ->prev isn't RCU safe, walk ->next till the end */
    3126                 :            :                 pos = NULL;
    3127         [ #  # ]:          0 :                 css_for_each_child(tmp, last)
    3128                 :            :                         pos = tmp;
    3129         [ #  # ]:          0 :         } while (pos);
    3130                 :            : 
    3131                 :          0 :         return last;
    3132                 :            : }
    3133                 :            : EXPORT_SYMBOL_GPL(css_rightmost_descendant);
    3134                 :            : 
    3135                 :            : static struct cgroup_subsys_state *
    3136                 :            : css_leftmost_descendant(struct cgroup_subsys_state *pos)
    3137                 :            : {
    3138                 :            :         struct cgroup_subsys_state *last;
    3139                 :            : 
    3140                 :            :         do {
    3141                 :            :                 last = pos;
    3142                 :          0 :                 pos = css_next_child(NULL, pos);
    3143 [ #  # ][ #  # ]:          0 :         } while (pos);
    3144                 :            : 
    3145                 :            :         return last;
    3146                 :            : }
    3147                 :            : 
    3148                 :            : /**
    3149                 :            :  * css_next_descendant_post - find the next descendant for post-order walk
    3150                 :            :  * @pos: the current position (%NULL to initiate traversal)
    3151                 :            :  * @root: css whose descendants to walk
    3152                 :            :  *
    3153                 :            :  * To be used by css_for_each_descendant_post().  Find the next descendant
    3154                 :            :  * to visit for post-order traversal of @root's descendants.  @root is
    3155                 :            :  * included in the iteration and the last node to be visited.
    3156                 :            :  *
    3157                 :            :  * While this function requires RCU read locking, it doesn't require the
    3158                 :            :  * whole traversal to be contained in a single RCU critical section.  This
    3159                 :            :  * function will return the correct next descendant as long as both @pos
    3160                 :            :  * and @cgroup are accessible and @pos is a descendant of @cgroup.
    3161                 :            :  */
    3162                 :            : struct cgroup_subsys_state *
    3163                 :          0 : css_next_descendant_post(struct cgroup_subsys_state *pos,
    3164                 :            :                          struct cgroup_subsys_state *root)
    3165                 :            : {
    3166                 :            :         struct cgroup_subsys_state *next;
    3167                 :            : 
    3168                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    3169                 :            : 
    3170                 :            :         /* if first iteration, visit leftmost descendant which may be @root */
    3171         [ #  # ]:          0 :         if (!pos)
    3172                 :            :                 return css_leftmost_descendant(root);
    3173                 :            : 
    3174                 :            :         /* if we visited @root, we're done */
    3175         [ #  # ]:          0 :         if (pos == root)
    3176                 :            :                 return NULL;
    3177                 :            : 
    3178                 :            :         /* if there's an unvisited sibling, visit its leftmost descendant */
    3179                 :          0 :         next = css_next_child(pos, css_parent(pos));
    3180         [ #  # ]:          0 :         if (next)
    3181                 :            :                 return css_leftmost_descendant(next);
    3182                 :            : 
    3183                 :            :         /* no sibling left, visit parent */
    3184                 :          0 :         return css_parent(pos);
    3185                 :            : }
    3186                 :            : EXPORT_SYMBOL_GPL(css_next_descendant_post);
    3187                 :            : 
    3188                 :            : /**
    3189                 :            :  * css_advance_task_iter - advance a task itererator to the next css_set
    3190                 :            :  * @it: the iterator to advance
    3191                 :            :  *
    3192                 :            :  * Advance @it to the next css_set to walk.
    3193                 :            :  */
    3194                 :            : static void css_advance_task_iter(struct css_task_iter *it)
    3195                 :            : {
    3196                 :            :         struct list_head *l = it->cset_link;
    3197                 :            :         struct cgrp_cset_link *link;
    3198                 :            :         struct css_set *cset;
    3199                 :            : 
    3200                 :            :         /* Advance to the next non-empty css_set */
    3201                 :            :         do {
    3202                 :          2 :                 l = l->next;
    3203 [ +  - ][ -  + ]:          3 :                 if (l == &it->origin_css->cgroup->cset_links) {
    3204                 :          1 :                         it->cset_link = NULL;
    3205                 :            :                         return;
    3206                 :            :                 }
    3207                 :            :                 link = list_entry(l, struct cgrp_cset_link, cset_link);
    3208                 :          1 :                 cset = link->cset;
    3209 [ #  # ][ -  + ]:          1 :         } while (list_empty(&cset->tasks));
    3210                 :          1 :         it->cset_link = l;
    3211                 :          1 :         it->task = cset->tasks.next;
    3212                 :            : }
    3213                 :            : 
    3214                 :            : /**
    3215                 :            :  * css_task_iter_start - initiate task iteration
    3216                 :            :  * @css: the css to walk tasks of
    3217                 :            :  * @it: the task iterator to use
    3218                 :            :  *
    3219                 :            :  * Initiate iteration through the tasks of @css.  The caller can call
    3220                 :            :  * css_task_iter_next() to walk through the tasks until the function
    3221                 :            :  * returns NULL.  On completion of iteration, css_task_iter_end() must be
    3222                 :            :  * called.
    3223                 :            :  *
    3224                 :            :  * Note that this function acquires a lock which is released when the
    3225                 :            :  * iteration finishes.  The caller can't sleep while iteration is in
    3226                 :            :  * progress.
    3227                 :            :  */
    3228                 :          0 : void css_task_iter_start(struct cgroup_subsys_state *css,
    3229                 :            :                          struct css_task_iter *it)
    3230                 :            :         __acquires(css_set_lock)
    3231                 :            : {
    3232                 :            :         /*
    3233                 :            :          * The first time anyone tries to iterate across a css, we need to
    3234                 :            :          * enable the list linking each css_set to its tasks, and fix up
    3235                 :            :          * all existing tasks.
    3236                 :            :          */
    3237         [ +  - ]:          1 :         if (!use_task_css_set_links)
    3238                 :          1 :                 cgroup_enable_task_cg_lists();
    3239                 :            : 
    3240                 :          1 :         read_lock(&css_set_lock);
    3241                 :            : 
    3242                 :          2 :         it->origin_css = css;
    3243                 :          2 :         it->cset_link = &css->cgroup->cset_links;
    3244                 :            : 
    3245                 :            :         css_advance_task_iter(it);
    3246                 :          1 : }
    3247                 :            : 
    3248                 :            : /**
    3249                 :            :  * css_task_iter_next - return the next task for the iterator
    3250                 :            :  * @it: the task iterator being iterated
    3251                 :            :  *
    3252                 :            :  * The "next" function for task iteration.  @it should have been
    3253                 :            :  * initialized via css_task_iter_start().  Returns NULL when the iteration
    3254                 :            :  * reaches the end.
    3255                 :            :  */
    3256                 :          0 : struct task_struct *css_task_iter_next(struct css_task_iter *it)
    3257                 :            : {
    3258                 :            :         struct task_struct *res;
    3259                 :         93 :         struct list_head *l = it->task;
    3260                 :            :         struct cgrp_cset_link *link;
    3261                 :            : 
    3262                 :            :         /* If the iterator cg is NULL, we have no tasks */
    3263            [ + ]:         93 :         if (!it->cset_link)
    3264                 :            :                 return NULL;
    3265                 :            :         res = list_entry(l, struct task_struct, cg_list);
    3266                 :            :         /* Advance iterator to find next entry */
    3267                 :        185 :         l = l->next;
    3268                 :            :         link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link);
    3269         [ +  + ]:        185 :         if (l == &link->cset->tasks) {
    3270                 :            :                 /*
    3271                 :            :                  * We reached the end of this task list - move on to the
    3272                 :            :                  * next cgrp_cset_link.
    3273                 :            :                  */
    3274                 :            :                 css_advance_task_iter(it);
    3275                 :            :         } else {
    3276                 :         91 :                 it->task = l;
    3277                 :            :         }
    3278                 :         92 :         return res;
    3279                 :            : }
    3280                 :            : 
    3281                 :            : /**
    3282                 :            :  * css_task_iter_end - finish task iteration
    3283                 :            :  * @it: the task iterator to finish
    3284                 :            :  *
    3285                 :            :  * Finish task iteration started by css_task_iter_start().
    3286                 :            :  */
    3287                 :          0 : void css_task_iter_end(struct css_task_iter *it)
    3288                 :            :         __releases(css_set_lock)
    3289                 :            : {
    3290                 :            :         read_unlock(&css_set_lock);
    3291                 :          1 : }
    3292                 :            : 
    3293                 :            : static inline int started_after_time(struct task_struct *t1,
    3294                 :            :                                      struct timespec *time,
    3295                 :            :                                      struct task_struct *t2)
    3296                 :            : {
    3297                 :            :         int start_diff = timespec_compare(&t1->start_time, time);
    3298 [ #  # ][ #  # ]:          0 :         if (start_diff > 0) {
    3299                 :            :                 return 1;
    3300 [ #  # ][ #  # ]:          0 :         } else if (start_diff < 0) {
    3301                 :            :                 return 0;
    3302                 :            :         } else {
    3303                 :            :                 /*
    3304                 :            :                  * Arbitrarily, if two processes started at the same
    3305                 :            :                  * time, we'll say that the lower pointer value
    3306                 :            :                  * started first. Note that t2 may have exited by now
    3307                 :            :                  * so this may not be a valid pointer any longer, but
    3308                 :            :                  * that's fine - it still serves to distinguish
    3309                 :            :                  * between two tasks started (effectively) simultaneously.
    3310                 :            :                  */
    3311                 :          0 :                 return t1 > t2;
    3312                 :            :         }
    3313                 :            : }
    3314                 :            : 
    3315                 :            : /*
    3316                 :            :  * This function is a callback from heap_insert() and is used to order
    3317                 :            :  * the heap.
    3318                 :            :  * In this case we order the heap in descending task start time.
    3319                 :            :  */
    3320                 :          0 : static inline int started_after(void *p1, void *p2)
    3321                 :            : {
    3322                 :            :         struct task_struct *t1 = p1;
    3323                 :            :         struct task_struct *t2 = p2;
    3324                 :          0 :         return started_after_time(t1, &t2->start_time, t2);
    3325                 :            : }
    3326                 :            : 
    3327                 :            : /**
    3328                 :            :  * css_scan_tasks - iterate though all the tasks in a css
    3329                 :            :  * @css: the css to iterate tasks of
    3330                 :            :  * @test: optional test callback
    3331                 :            :  * @process: process callback
    3332                 :            :  * @data: data passed to @test and @process
    3333                 :            :  * @heap: optional pre-allocated heap used for task iteration
    3334                 :            :  *
    3335                 :            :  * Iterate through all the tasks in @css, calling @test for each, and if it
    3336                 :            :  * returns %true, call @process for it also.
    3337                 :            :  *
    3338                 :            :  * @test may be NULL, meaning always true (select all tasks), which
    3339                 :            :  * effectively duplicates css_task_iter_{start,next,end}() but does not
    3340                 :            :  * lock css_set_lock for the call to @process.
    3341                 :            :  *
    3342                 :            :  * It is guaranteed that @process will act on every task that is a member
    3343                 :            :  * of @css for the duration of this call.  This function may or may not
    3344                 :            :  * call @process for tasks that exit or move to a different css during the
    3345                 :            :  * call, or are forked or move into the css during the call.
    3346                 :            :  *
    3347                 :            :  * Note that @test may be called with locks held, and may in some
    3348                 :            :  * situations be called multiple times for the same task, so it should be
    3349                 :            :  * cheap.
    3350                 :            :  *
    3351                 :            :  * If @heap is non-NULL, a heap has been pre-allocated and will be used for
    3352                 :            :  * heap operations (and its "gt" member will be overwritten), else a
    3353                 :            :  * temporary heap will be used (allocation of which may cause this function
    3354                 :            :  * to fail).
    3355                 :            :  */
    3356                 :          0 : int css_scan_tasks(struct cgroup_subsys_state *css,
    3357                 :            :                    bool (*test)(struct task_struct *, void *),
    3358                 :            :                    void (*process)(struct task_struct *, void *),
    3359                 :            :                    void *data, struct ptr_heap *heap)
    3360                 :            : {
    3361                 :            :         int retval, i;
    3362                 :            :         struct css_task_iter it;
    3363                 :            :         struct task_struct *p, *dropped;
    3364                 :            :         /* Never dereference latest_task, since it's not refcounted */
    3365                 :            :         struct task_struct *latest_task = NULL;
    3366                 :            :         struct ptr_heap tmp_heap;
    3367                 :            :         struct timespec latest_time = { 0, 0 };
    3368                 :            : 
    3369         [ #  # ]:          0 :         if (heap) {
    3370                 :            :                 /* The caller supplied our heap and pre-allocated its memory */
    3371                 :          0 :                 heap->gt = &started_after;
    3372                 :            :         } else {
    3373                 :            :                 /* We need to allocate our own heap memory */
    3374                 :            :                 heap = &tmp_heap;
    3375                 :          0 :                 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
    3376         [ #  # ]:          0 :                 if (retval)
    3377                 :            :                         /* cannot allocate the heap */
    3378                 :            :                         return retval;
    3379                 :            :         }
    3380                 :            : 
    3381                 :            :  again:
    3382                 :            :         /*
    3383                 :            :          * Scan tasks in the css, using the @test callback to determine
    3384                 :            :          * which are of interest, and invoking @process callback on the
    3385                 :            :          * ones which need an update.  Since we don't want to hold any
    3386                 :            :          * locks during the task updates, gather tasks to be processed in a
    3387                 :            :          * heap structure.  The heap is sorted by descending task start
    3388                 :            :          * time.  If the statically-sized heap fills up, we overflow tasks
    3389                 :            :          * that started later, and in future iterations only consider tasks
    3390                 :            :          * that started after the latest task in the previous pass. This
    3391                 :            :          * guarantees forward progress and that we don't miss any tasks.
    3392                 :            :          */
    3393                 :          0 :         heap->size = 0;
    3394                 :          0 :         css_task_iter_start(css, &it);
    3395         [ #  # ]:          0 :         while ((p = css_task_iter_next(&it))) {
    3396                 :            :                 /*
    3397                 :            :                  * Only affect tasks that qualify per the caller's callback,
    3398                 :            :                  * if he provided one
    3399                 :            :                  */
    3400 [ #  # ][ #  # ]:          0 :                 if (test && !test(p, data))
    3401                 :          0 :                         continue;
    3402                 :            :                 /*
    3403                 :            :                  * Only process tasks that started after the last task
    3404                 :            :                  * we processed
    3405                 :            :                  */
    3406         [ #  # ]:          0 :                 if (!started_after_time(p, &latest_time, latest_task))
    3407                 :          0 :                         continue;
    3408                 :          0 :                 dropped = heap_insert(heap, p);
    3409         [ #  # ]:          0 :                 if (dropped == NULL) {
    3410                 :            :                         /*
    3411                 :            :                          * The new task was inserted; the heap wasn't
    3412                 :            :                          * previously full
    3413                 :            :                          */
    3414                 :          0 :                         get_task_struct(p);
    3415         [ #  # ]:          0 :                 } else if (dropped != p) {
    3416                 :            :                         /*
    3417                 :            :                          * The new task was inserted, and pushed out a
    3418                 :            :                          * different task
    3419                 :            :                          */
    3420                 :          0 :                         get_task_struct(p);
    3421                 :            :                         put_task_struct(dropped);
    3422                 :            :                 }
    3423                 :            :                 /*
    3424                 :            :                  * Else the new task was newer than anything already in
    3425                 :            :                  * the heap and wasn't inserted
    3426                 :            :                  */
    3427                 :            :         }
    3428                 :          0 :         css_task_iter_end(&it);
    3429                 :            : 
    3430         [ #  # ]:          0 :         if (heap->size) {
    3431         [ #  # ]:          0 :                 for (i = 0; i < heap->size; i++) {
    3432                 :          0 :                         struct task_struct *q = heap->ptrs[i];
    3433         [ #  # ]:          0 :                         if (i == 0) {
    3434                 :          0 :                                 latest_time = q->start_time;
    3435                 :            :                                 latest_task = q;
    3436                 :            :                         }
    3437                 :            :                         /* Process the task per the caller's callback */
    3438                 :          0 :                         process(q, data);
    3439                 :            :                         put_task_struct(q);
    3440                 :            :                 }
    3441                 :            :                 /*
    3442                 :            :                  * If we had to process any tasks at all, scan again
    3443                 :            :                  * in case some of them were in the middle of forking
    3444                 :            :                  * children that didn't get processed.
    3445                 :            :                  * Not the most efficient way to do it, but it avoids
    3446                 :            :                  * having to take callback_mutex in the fork path
    3447                 :            :                  */
    3448                 :            :                 goto again;
    3449                 :            :         }
    3450         [ #  # ]:          0 :         if (heap == &tmp_heap)
    3451                 :          0 :                 heap_free(&tmp_heap);
    3452                 :            :         return 0;
    3453                 :            : }
    3454                 :            : 
    3455                 :          0 : static void cgroup_transfer_one_task(struct task_struct *task, void *data)
    3456                 :            : {
    3457                 :            :         struct cgroup *new_cgroup = data;
    3458                 :            : 
    3459                 :          0 :         mutex_lock(&cgroup_mutex);
    3460                 :          0 :         cgroup_attach_task(new_cgroup, task, false);
    3461                 :          0 :         mutex_unlock(&cgroup_mutex);
    3462                 :          0 : }
    3463                 :            : 
    3464                 :            : /**
    3465                 :            :  * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
    3466                 :            :  * @to: cgroup to which the tasks will be moved
    3467                 :            :  * @from: cgroup in which the tasks currently reside
    3468                 :            :  */
    3469                 :          0 : int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
    3470                 :            : {
    3471                 :          0 :         return css_scan_tasks(&from->dummy_css, NULL, cgroup_transfer_one_task,
    3472                 :            :                               to, NULL);
    3473                 :            : }
    3474                 :            : 
    3475                 :            : /*
    3476                 :            :  * Stuff for reading the 'tasks'/'procs' files.
    3477                 :            :  *
    3478                 :            :  * Reading this file can return large amounts of data if a cgroup has
    3479                 :            :  * *lots* of attached tasks. So it may need several calls to read(),
    3480                 :            :  * but we cannot guarantee that the information we produce is correct
    3481                 :            :  * unless we produce it entirely atomically.
    3482                 :            :  *
    3483                 :            :  */
    3484                 :            : 
    3485                 :            : /* which pidlist file are we talking about? */
    3486                 :            : enum cgroup_filetype {
    3487                 :            :         CGROUP_FILE_PROCS,
    3488                 :            :         CGROUP_FILE_TASKS,
    3489                 :            : };
    3490                 :            : 
    3491                 :            : /*
    3492                 :            :  * A pidlist is a list of pids that virtually represents the contents of one
    3493                 :            :  * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
    3494                 :            :  * a pair (one each for procs, tasks) for each pid namespace that's relevant
    3495                 :            :  * to the cgroup.
    3496                 :            :  */
    3497                 :            : struct cgroup_pidlist {
    3498                 :            :         /*
    3499                 :            :          * used to find which pidlist is wanted. doesn't change as long as
    3500                 :            :          * this particular list stays in the list.
    3501                 :            :         */
    3502                 :            :         struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
    3503                 :            :         /* array of xids */
    3504                 :            :         pid_t *list;
    3505                 :            :         /* how many elements the above list has */
    3506                 :            :         int length;
    3507                 :            :         /* how many files are using the current array */
    3508                 :            :         int use_count;
    3509                 :            :         /* each of these stored in a list by its cgroup */
    3510                 :            :         struct list_head links;
    3511                 :            :         /* pointer to the cgroup we belong to, for list removal purposes */
    3512                 :            :         struct cgroup *owner;
    3513                 :            :         /* protects the other fields */
    3514                 :            :         struct rw_semaphore rwsem;
    3515                 :            : };
    3516                 :            : 
    3517                 :            : /*
    3518                 :            :  * The following two functions "fix" the issue where there are more pids
    3519                 :            :  * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
    3520                 :            :  * TODO: replace with a kernel-wide solution to this problem
    3521                 :            :  */
    3522                 :            : #define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
    3523                 :          0 : static void *pidlist_allocate(int count)
    3524                 :            : {
    3525         [ -  + ]:          1 :         if (PIDLIST_TOO_LARGE(count))
    3526                 :          0 :                 return vmalloc(count * sizeof(pid_t));
    3527                 :            :         else
    3528                 :          1 :                 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
    3529                 :            : }
    3530                 :          0 : static void pidlist_free(void *p)
    3531                 :            : {
    3532         [ -  + ]:          2 :         if (is_vmalloc_addr(p))
    3533                 :          0 :                 vfree(p);
    3534                 :            :         else
    3535                 :          2 :                 kfree(p);
    3536                 :          2 : }
    3537                 :            : 
    3538                 :            : /*
    3539                 :            :  * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
    3540                 :            :  * Returns the number of unique elements.
    3541                 :            :  */
    3542                 :          0 : static int pidlist_uniq(pid_t *list, int length)
    3543                 :            : {
    3544                 :            :         int src, dest = 1;
    3545                 :            : 
    3546                 :            :         /*
    3547                 :            :          * we presume the 0th element is unique, so i starts at 1. trivial
    3548                 :            :          * edge cases first; no work needs to be done for either
    3549                 :            :          */
    3550         [ #  # ]:          0 :         if (length == 0 || length == 1)
    3551                 :            :                 return length;
    3552                 :            :         /* src and dest walk down the list; dest counts unique elements */
    3553         [ #  # ]:          0 :         for (src = 1; src < length; src++) {
    3554                 :            :                 /* find next unique element */
    3555         [ #  # ]:          0 :                 while (list[src] == list[src-1]) {
    3556                 :          0 :                         src++;
    3557         [ #  # ]:          0 :                         if (src == length)
    3558                 :            :                                 goto after;
    3559                 :            :                 }
    3560                 :            :                 /* dest always points to where the next unique element goes */
    3561                 :          0 :                 list[dest] = list[src];
    3562                 :          0 :                 dest++;
    3563                 :            :         }
    3564                 :            : after:
    3565                 :            :         return dest;
    3566                 :            : }
    3567                 :            : 
    3568                 :          0 : static int cmppid(const void *a, const void *b)
    3569                 :            : {
    3570                 :        880 :         return *(pid_t *)a - *(pid_t *)b;
    3571                 :            : }
    3572                 :            : 
    3573                 :            : /*
    3574                 :            :  * find the appropriate pidlist for our purpose (given procs vs tasks)
    3575                 :            :  * returns with the lock on that pidlist already held, and takes care
    3576                 :            :  * of the use count, or returns NULL with no locks held if we're out of
    3577                 :            :  * memory.
    3578                 :            :  */
    3579                 :          0 : static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
    3580                 :            :                                                   enum cgroup_filetype type)
    3581                 :            : {
    3582                 :            :         struct cgroup_pidlist *l;
    3583                 :            :         /* don't need task_nsproxy() if we're looking at ourself */
    3584                 :          1 :         struct pid_namespace *ns = task_active_pid_ns(current);
    3585                 :            : 
    3586                 :            :         /*
    3587                 :            :          * We can't drop the pidlist_mutex before taking the l->rwsem in case
    3588                 :            :          * the last ref-holder is trying to remove l from the list at the same
    3589                 :            :          * time. Holding the pidlist_mutex precludes somebody taking whichever
    3590                 :            :          * list we find out from under us - compare release_pid_array().
    3591                 :            :          */
    3592                 :          1 :         mutex_lock(&cgrp->pidlist_mutex);
    3593         [ -  + ]:          2 :         list_for_each_entry(l, &cgrp->pidlists, links) {
    3594 [ #  # ][ #  # ]:          0 :                 if (l->key.type == type && l->key.ns == ns) {
    3595                 :            :                         /* make sure l doesn't vanish out from under us */
    3596                 :          0 :                         down_write(&l->rwsem);
    3597                 :          0 :                         mutex_unlock(&cgrp->pidlist_mutex);
    3598                 :          0 :                         return l;
    3599                 :            :                 }
    3600                 :            :         }
    3601                 :            :         /* entry not found; create a new one */
    3602                 :            :         l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
    3603         [ -  + ]:          1 :         if (!l) {
    3604                 :          0 :                 mutex_unlock(&cgrp->pidlist_mutex);
    3605                 :          0 :                 return l;
    3606                 :            :         }
    3607                 :          1 :         init_rwsem(&l->rwsem);
    3608                 :          1 :         down_write(&l->rwsem);
    3609                 :          1 :         l->key.type = type;
    3610                 :          1 :         l->key.ns = get_pid_ns(ns);
    3611                 :          1 :         l->owner = cgrp;
    3612                 :          1 :         list_add(&l->links, &cgrp->pidlists);
    3613                 :          1 :         mutex_unlock(&cgrp->pidlist_mutex);
    3614                 :          1 :         return l;
    3615                 :            : }
    3616                 :            : 
    3617                 :            : /*
    3618                 :            :  * Load a cgroup's pidarray with either procs' tgids or tasks' pids
    3619                 :            :  */
    3620                 :          0 : static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
    3621                 :            :                               struct cgroup_pidlist **lp)
    3622                 :            : {
    3623                 :            :         pid_t *array;
    3624                 :            :         int length;
    3625                 :            :         int pid, n = 0; /* used for populating the array */
    3626                 :            :         struct css_task_iter it;
    3627                 :            :         struct task_struct *tsk;
    3628                 :            :         struct cgroup_pidlist *l;
    3629                 :            : 
    3630                 :            :         /*
    3631                 :            :          * If cgroup gets more users after we read count, we won't have
    3632                 :            :          * enough space - tough.  This race is indistinguishable to the
    3633                 :            :          * caller from the case that the additional cgroup users didn't
    3634                 :            :          * show up until sometime later on.
    3635                 :            :          */
    3636                 :          1 :         length = cgroup_task_count(cgrp);
    3637                 :          1 :         array = pidlist_allocate(length);
    3638         [ +  - ]:          1 :         if (!array)
    3639                 :            :                 return -ENOMEM;
    3640                 :            :         /* now, populate the array */
    3641                 :          1 :         css_task_iter_start(&cgrp->dummy_css, &it);
    3642         [ +  + ]:         94 :         while ((tsk = css_task_iter_next(&it))) {
    3643         [ +  - ]:         92 :                 if (unlikely(n == length))
    3644                 :            :                         break;
    3645                 :            :                 /* get tgid or pid for procs or tasks file respectively */
    3646         [ -  + ]:         92 :                 if (type == CGROUP_FILE_PROCS)
    3647                 :            :                         pid = task_tgid_vnr(tsk);
    3648                 :            :                 else
    3649                 :            :                         pid = task_pid_vnr(tsk);
    3650         [ +  - ]:         92 :                 if (pid > 0) /* make sure to only use valid results */
    3651                 :         93 :                         array[n++] = pid;
    3652                 :            :         }
    3653                 :          1 :         css_task_iter_end(&it);
    3654                 :            :         length = n;
    3655                 :            :         /* now sort & (if procs) strip out duplicates */
    3656                 :          1 :         sort(array, length, sizeof(pid_t), cmppid, NULL);
    3657         [ -  + ]:          1 :         if (type == CGROUP_FILE_PROCS)
    3658                 :          0 :                 length = pidlist_uniq(array, length);
    3659                 :          1 :         l = cgroup_pidlist_find(cgrp, type);
    3660         [ -  + ]:          1 :         if (!l) {
    3661                 :          0 :                 pidlist_free(array);
    3662                 :          0 :                 return -ENOMEM;
    3663                 :            :         }
    3664                 :            :         /* store array, freeing old if necessary - lock already held */
    3665                 :          1 :         pidlist_free(l->list);
    3666                 :          1 :         l->list = array;
    3667                 :          1 :         l->length = length;
    3668                 :          1 :         l->use_count++;
    3669                 :          1 :         up_write(&l->rwsem);
    3670                 :          1 :         *lp = l;
    3671                 :          1 :         return 0;
    3672                 :            : }
    3673                 :            : 
    3674                 :            : /**
    3675                 :            :  * cgroupstats_build - build and fill cgroupstats
    3676                 :            :  * @stats: cgroupstats to fill information into
    3677                 :            :  * @dentry: A dentry entry belonging to the cgroup for which stats have
    3678                 :            :  * been requested.
    3679                 :            :  *
    3680                 :            :  * Build and fill cgroupstats so that taskstats can export it to user
    3681                 :            :  * space.
    3682                 :            :  */
    3683                 :          0 : int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
    3684                 :            : {
    3685                 :            :         int ret = -EINVAL;
    3686                 :            :         struct cgroup *cgrp;
    3687                 :            :         struct css_task_iter it;
    3688                 :            :         struct task_struct *tsk;
    3689                 :            : 
    3690                 :            :         /*
    3691                 :            :          * Validate dentry by checking the superblock operations,
    3692                 :            :          * and make sure it's a directory.
    3693                 :            :          */
    3694 [ #  # ][ #  # ]:          0 :         if (dentry->d_sb->s_op != &cgroup_ops ||
    3695                 :          0 :             !S_ISDIR(dentry->d_inode->i_mode))
    3696                 :            :                  goto err;
    3697                 :            : 
    3698                 :            :         ret = 0;
    3699                 :          0 :         cgrp = dentry->d_fsdata;
    3700                 :            : 
    3701                 :          0 :         css_task_iter_start(&cgrp->dummy_css, &it);
    3702         [ #  # ]:          0 :         while ((tsk = css_task_iter_next(&it))) {
    3703   [ #  #  #  #  :          0 :                 switch (tsk->state) {
                      # ]
    3704                 :            :                 case TASK_RUNNING:
    3705                 :          0 :                         stats->nr_running++;
    3706                 :          0 :                         break;
    3707                 :            :                 case TASK_INTERRUPTIBLE:
    3708                 :          0 :                         stats->nr_sleeping++;
    3709                 :          0 :                         break;
    3710                 :            :                 case TASK_UNINTERRUPTIBLE:
    3711                 :          0 :                         stats->nr_uninterruptible++;
    3712                 :          0 :                         break;
    3713                 :            :                 case TASK_STOPPED:
    3714                 :          0 :                         stats->nr_stopped++;
    3715                 :          0 :                         break;
    3716                 :            :                 default:
    3717                 :            :                         if (delayacct_is_task_waiting_on_io(tsk))
    3718                 :            :                                 stats->nr_io_wait++;
    3719                 :            :                         break;
    3720                 :            :                 }
    3721                 :            :         }
    3722                 :          0 :         css_task_iter_end(&it);
    3723                 :            : 
    3724                 :            : err:
    3725                 :          0 :         return ret;
    3726                 :            : }
    3727                 :            : 
    3728                 :            : 
    3729                 :            : /*
    3730                 :            :  * seq_file methods for the tasks/procs files. The seq_file position is the
    3731                 :            :  * next pid to display; the seq_file iterator is a pointer to the pid
    3732                 :            :  * in the cgroup->l->list array.
    3733                 :            :  */
    3734                 :            : 
    3735                 :          0 : static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
    3736                 :            : {
    3737                 :            :         /*
    3738                 :            :          * Initially we receive a position value that corresponds to
    3739                 :            :          * one more than the last pid shown (or 0 on the first call or
    3740                 :            :          * after a seek to the start). Use a binary-search to find the
    3741                 :            :          * next pid to display, if any
    3742                 :            :          */
    3743                 :          2 :         struct cgroup_pidlist *l = s->private;
    3744                 :          2 :         int index = 0, pid = *pos;
    3745                 :            :         int *iter;
    3746                 :            : 
    3747                 :          2 :         down_read(&l->rwsem);
    3748         [ +  + ]:          2 :         if (pid) {
    3749                 :          1 :                 int end = l->length;
    3750                 :            : 
    3751         [ +  + ]:          7 :                 while (index < end) {
    3752                 :          6 :                         int mid = (index + end) / 2;
    3753         [ +  - ]:          6 :                         if (l->list[mid] == pid) {
    3754                 :            :                                 index = mid;
    3755                 :            :                                 break;
    3756         [ +  - ]:          6 :                         } else if (l->list[mid] <= pid)
    3757                 :          6 :                                 index = mid + 1;
    3758                 :            :                         else
    3759                 :            :                                 end = mid;
    3760                 :            :                 }
    3761                 :            :         }
    3762                 :            :         /* If we're off the end of the array, we're done */
    3763         [ #  # ]:          2 :         if (index >= l->length)
    3764                 :            :                 return NULL;
    3765                 :            :         /* Update the abstract position to be the actual pid that we found */
    3766                 :          1 :         iter = l->list + index;
    3767                 :          1 :         *pos = *iter;
    3768                 :          1 :         return iter;
    3769                 :            : }
    3770                 :            : 
    3771                 :          0 : static void cgroup_pidlist_stop(struct seq_file *s, void *v)
    3772                 :            : {
    3773                 :          2 :         struct cgroup_pidlist *l = s->private;
    3774                 :          2 :         up_read(&l->rwsem);
    3775                 :          2 : }
    3776                 :            : 
    3777                 :          0 : static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
    3778                 :            : {
    3779                 :         92 :         struct cgroup_pidlist *l = s->private;
    3780                 :            :         pid_t *p = v;
    3781                 :         92 :         pid_t *end = l->list + l->length;
    3782                 :            :         /*
    3783                 :            :          * Advance to the next pid in the array. If this goes off the
    3784                 :            :          * end, we're done
    3785                 :            :          */
    3786                 :         92 :         p++;
    3787         [ +  + ]:         92 :         if (p >= end) {
    3788                 :            :                 return NULL;
    3789                 :            :         } else {
    3790                 :         91 :                 *pos = *p;
    3791                 :         91 :                 return p;
    3792                 :            :         }
    3793                 :            : }
    3794                 :            : 
    3795                 :          0 : static int cgroup_pidlist_show(struct seq_file *s, void *v)
    3796                 :            : {
    3797                 :         92 :         return seq_printf(s, "%d\n", *(int *)v);
    3798                 :            : }
    3799                 :            : 
    3800                 :            : /*
    3801                 :            :  * seq_operations functions for iterating on pidlists through seq_file -
    3802                 :            :  * independent of whether it's tasks or procs
    3803                 :            :  */
    3804                 :            : static const struct seq_operations cgroup_pidlist_seq_operations = {
    3805                 :            :         .start = cgroup_pidlist_start,
    3806                 :            :         .stop = cgroup_pidlist_stop,
    3807                 :            :         .next = cgroup_pidlist_next,
    3808                 :            :         .show = cgroup_pidlist_show,
    3809                 :            : };
    3810                 :            : 
    3811                 :          0 : static void cgroup_release_pid_array(struct cgroup_pidlist *l)
    3812                 :            : {
    3813                 :            :         /*
    3814                 :            :          * the case where we're the last user of this particular pidlist will
    3815                 :            :          * have us remove it from the cgroup's list, which entails taking the
    3816                 :            :          * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
    3817                 :            :          * pidlist_mutex, we have to take pidlist_mutex first.
    3818                 :            :          */
    3819                 :          1 :         mutex_lock(&l->owner->pidlist_mutex);
    3820                 :          1 :         down_write(&l->rwsem);
    3821         [ -  + ]:          2 :         BUG_ON(!l->use_count);
    3822         [ +  - ]:          1 :         if (!--l->use_count) {
    3823                 :            :                 /* we're the last user if refcount is 0; remove and free */
    3824                 :            :                 list_del(&l->links);
    3825                 :          1 :                 mutex_unlock(&l->owner->pidlist_mutex);
    3826                 :          1 :                 pidlist_free(l->list);
    3827                 :            :                 put_pid_ns(l->key.ns);
    3828                 :          1 :                 up_write(&l->rwsem);
    3829                 :          1 :                 kfree(l);
    3830                 :          1 :                 return;
    3831                 :            :         }
    3832                 :          0 :         mutex_unlock(&l->owner->pidlist_mutex);
    3833                 :          0 :         up_write(&l->rwsem);
    3834                 :            : }
    3835                 :            : 
    3836                 :          0 : static int cgroup_pidlist_release(struct inode *inode, struct file *file)
    3837                 :            : {
    3838                 :            :         struct cgroup_pidlist *l;
    3839         [ +  - ]:          1 :         if (!(file->f_mode & FMODE_READ))
    3840                 :            :                 return 0;
    3841                 :            :         /*
    3842                 :            :          * the seq_file will only be initialized if the file was opened for
    3843                 :            :          * reading; hence we check if it's not null only in that case.
    3844                 :            :          */
    3845                 :          1 :         l = ((struct seq_file *)file->private_data)->private;
    3846                 :          1 :         cgroup_release_pid_array(l);
    3847                 :          1 :         return seq_release(inode, file);
    3848                 :            : }
    3849                 :            : 
    3850                 :            : static const struct file_operations cgroup_pidlist_operations = {
    3851                 :            :         .read = seq_read,
    3852                 :            :         .llseek = seq_lseek,
    3853                 :            :         .write = cgroup_file_write,
    3854                 :            :         .release = cgroup_pidlist_release,
    3855                 :            : };
    3856                 :            : 
    3857                 :            : /*
    3858                 :            :  * The following functions handle opens on a file that displays a pidlist
    3859                 :            :  * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
    3860                 :            :  * in the cgroup.
    3861                 :            :  */
    3862                 :            : /* helper function for the two below it */
    3863                 :          0 : static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
    3864                 :            : {
    3865                 :          1 :         struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
    3866                 :            :         struct cgroup_pidlist *l;
    3867                 :            :         int retval;
    3868                 :            : 
    3869                 :            :         /* Nothing to do for write-only files */
    3870         [ +  - ]:          1 :         if (!(file->f_mode & FMODE_READ))
    3871                 :            :                 return 0;
    3872                 :            : 
    3873                 :            :         /* have the array populated */
    3874                 :          1 :         retval = pidlist_array_load(cgrp, type, &l);
    3875         [ +  - ]:          1 :         if (retval)
    3876                 :            :                 return retval;
    3877                 :            :         /* configure file information */
    3878                 :          1 :         file->f_op = &cgroup_pidlist_operations;
    3879                 :            : 
    3880                 :          1 :         retval = seq_open(file, &cgroup_pidlist_seq_operations);
    3881         [ -  + ]:          2 :         if (retval) {
    3882                 :          0 :                 cgroup_release_pid_array(l);
    3883                 :          0 :                 return retval;
    3884                 :            :         }
    3885                 :          1 :         ((struct seq_file *)file->private_data)->private = l;
    3886                 :          1 :         return 0;
    3887                 :            : }
    3888                 :          0 : static int cgroup_tasks_open(struct inode *unused, struct file *file)
    3889                 :            : {
    3890                 :          1 :         return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
    3891                 :            : }
    3892                 :          0 : static int cgroup_procs_open(struct inode *unused, struct file *file)
    3893                 :            : {
    3894                 :          0 :         return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
    3895                 :            : }
    3896                 :            : 
    3897                 :          0 : static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
    3898                 :            :                                          struct cftype *cft)
    3899                 :            : {
    3900                 :          8 :         return notify_on_release(css->cgroup);
    3901                 :            : }
    3902                 :            : 
    3903                 :          0 : static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
    3904                 :            :                                           struct cftype *cft, u64 val)
    3905                 :            : {
    3906                 :          2 :         clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
    3907         [ +  + ]:          2 :         if (val)
    3908                 :          1 :                 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
    3909                 :            :         else
    3910                 :          1 :                 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
    3911                 :          2 :         return 0;
    3912                 :            : }
    3913                 :            : 
    3914                 :            : /*
    3915                 :            :  * When dput() is called asynchronously, if umount has been done and
    3916                 :            :  * then deactivate_super() in cgroup_free_fn() kills the superblock,
    3917                 :            :  * there's a small window that vfs will see the root dentry with non-zero
    3918                 :            :  * refcnt and trigger BUG().
    3919                 :            :  *
    3920                 :            :  * That's why we hold a reference before dput() and drop it right after.
    3921                 :            :  */
    3922                 :          0 : static void cgroup_dput(struct cgroup *cgrp)
    3923                 :            : {
    3924                 :          0 :         struct super_block *sb = cgrp->root->sb;
    3925                 :            : 
    3926                 :          0 :         atomic_inc(&sb->s_active);
    3927                 :          0 :         dput(cgrp->dentry);
    3928                 :          0 :         deactivate_super(sb);
    3929                 :          0 : }
    3930                 :            : 
    3931                 :            : /*
    3932                 :            :  * Unregister event and free resources.
    3933                 :            :  *
    3934                 :            :  * Gets called from workqueue.
    3935                 :            :  */
    3936                 :          0 : static void cgroup_event_remove(struct work_struct *work)
    3937                 :            : {
    3938                 :          0 :         struct cgroup_event *event = container_of(work, struct cgroup_event,
    3939                 :            :                         remove);
    3940                 :          0 :         struct cgroup_subsys_state *css = event->css;
    3941                 :            : 
    3942                 :          0 :         remove_wait_queue(event->wqh, &event->wait);
    3943                 :            : 
    3944                 :          0 :         event->cft->unregister_event(css, event->cft, event->eventfd);
    3945                 :            : 
    3946                 :            :         /* Notify userspace the event is going away. */
    3947                 :          0 :         eventfd_signal(event->eventfd, 1);
    3948                 :            : 
    3949                 :          0 :         eventfd_ctx_put(event->eventfd);
    3950                 :          0 :         kfree(event);
    3951                 :            :         css_put(css);
    3952                 :          0 : }
    3953                 :            : 
    3954                 :            : /*
    3955                 :            :  * Gets called on POLLHUP on eventfd when user closes it.
    3956                 :            :  *
    3957                 :            :  * Called with wqh->lock held and interrupts disabled.
    3958                 :            :  */
    3959                 :          0 : static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
    3960                 :            :                 int sync, void *key)
    3961                 :            : {
    3962                 :            :         struct cgroup_event *event = container_of(wait,
    3963                 :            :                         struct cgroup_event, wait);
    3964                 :          0 :         struct cgroup *cgrp = event->css->cgroup;
    3965                 :          0 :         unsigned long flags = (unsigned long)key;
    3966                 :            : 
    3967         [ #  # ]:          0 :         if (flags & POLLHUP) {
    3968                 :            :                 /*
    3969                 :            :                  * If the event has been detached at cgroup removal, we
    3970                 :            :                  * can simply return knowing the other side will cleanup
    3971                 :            :                  * for us.
    3972                 :            :                  *
    3973                 :            :                  * We can't race against event freeing since the other
    3974                 :            :                  * side will require wqh->lock via remove_wait_queue(),
    3975                 :            :                  * which we hold.
    3976                 :            :                  */
    3977                 :            :                 spin_lock(&cgrp->event_list_lock);
    3978         [ #  # ]:          0 :                 if (!list_empty(&event->list)) {
    3979                 :            :                         list_del_init(&event->list);
    3980                 :            :                         /*
    3981                 :            :                          * We are in atomic context, but cgroup_event_remove()
    3982                 :            :                          * may sleep, so we have to call it in workqueue.
    3983                 :            :                          */
    3984                 :          0 :                         schedule_work(&event->remove);
    3985                 :            :                 }
    3986                 :            :                 spin_unlock(&cgrp->event_list_lock);
    3987                 :            :         }
    3988                 :            : 
    3989                 :          0 :         return 0;
    3990                 :            : }
    3991                 :            : 
    3992                 :          0 : static void cgroup_event_ptable_queue_proc(struct file *file,
    3993                 :            :                 wait_queue_head_t *wqh, poll_table *pt)
    3994                 :            : {
    3995                 :            :         struct cgroup_event *event = container_of(pt,
    3996                 :            :                         struct cgroup_event, pt);
    3997                 :            : 
    3998                 :          0 :         event->wqh = wqh;
    3999                 :          0 :         add_wait_queue(wqh, &event->wait);
    4000                 :          0 : }
    4001                 :            : 
    4002                 :            : /*
    4003                 :            :  * Parse input and register new cgroup event handler.
    4004                 :            :  *
    4005                 :            :  * Input must be in format '<event_fd> <control_fd> <args>'.
    4006                 :            :  * Interpretation of args is defined by control file implementation.
    4007                 :            :  */
    4008                 :          0 : static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
    4009                 :            :                                       struct cftype *cft, const char *buffer)
    4010                 :            : {
    4011                 :          0 :         struct cgroup *cgrp = dummy_css->cgroup;
    4012                 :            :         struct cgroup_event *event;
    4013                 :            :         struct cgroup_subsys_state *cfile_css;
    4014                 :            :         unsigned int efd, cfd;
    4015                 :            :         struct fd efile;
    4016                 :            :         struct fd cfile;
    4017                 :            :         char *endp;
    4018                 :            :         int ret;
    4019                 :            : 
    4020                 :          0 :         efd = simple_strtoul(buffer, &endp, 10);
    4021         [ #  # ]:          0 :         if (*endp != ' ')
    4022                 :            :                 return -EINVAL;
    4023                 :          0 :         buffer = endp + 1;
    4024                 :            : 
    4025                 :          0 :         cfd = simple_strtoul(buffer, &endp, 10);
    4026         [ #  # ]:          0 :         if ((*endp != ' ') && (*endp != '\0'))
    4027                 :            :                 return -EINVAL;
    4028                 :          0 :         buffer = endp + 1;
    4029                 :            : 
    4030                 :            :         event = kzalloc(sizeof(*event), GFP_KERNEL);
    4031         [ #  # ]:          0 :         if (!event)
    4032                 :            :                 return -ENOMEM;
    4033                 :            : 
    4034                 :          0 :         INIT_LIST_HEAD(&event->list);
    4035                 :            :         init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
    4036                 :            :         init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
    4037                 :          0 :         INIT_WORK(&event->remove, cgroup_event_remove);
    4038                 :            : 
    4039                 :            :         efile = fdget(efd);
    4040         [ #  # ]:          0 :         if (!efile.file) {
    4041                 :            :                 ret = -EBADF;
    4042                 :            :                 goto out_kfree;
    4043                 :            :         }
    4044                 :            : 
    4045                 :          0 :         event->eventfd = eventfd_ctx_fileget(efile.file);
    4046         [ #  # ]:          0 :         if (IS_ERR(event->eventfd)) {
    4047                 :            :                 ret = PTR_ERR(event->eventfd);
    4048                 :          0 :                 goto out_put_efile;
    4049                 :            :         }
    4050                 :            : 
    4051                 :            :         cfile = fdget(cfd);
    4052         [ #  # ]:          0 :         if (!cfile.file) {
    4053                 :            :                 ret = -EBADF;
    4054                 :            :                 goto out_put_eventfd;
    4055                 :            :         }
    4056                 :            : 
    4057                 :            :         /* the process need read permission on control file */
    4058                 :            :         /* AV: shouldn't we check that it's been opened for read instead? */
    4059                 :          0 :         ret = inode_permission(file_inode(cfile.file), MAY_READ);
    4060         [ #  # ]:          0 :         if (ret < 0)
    4061                 :            :                 goto out_put_cfile;
    4062                 :            : 
    4063                 :          0 :         event->cft = __file_cft(cfile.file);
    4064         [ #  # ]:          0 :         if (IS_ERR(event->cft)) {
    4065                 :            :                 ret = PTR_ERR(event->cft);
    4066                 :          0 :                 goto out_put_cfile;
    4067                 :            :         }
    4068                 :            : 
    4069         [ #  # ]:          0 :         if (!event->cft->ss) {
    4070                 :            :                 ret = -EBADF;
    4071                 :            :                 goto out_put_cfile;
    4072                 :            :         }
    4073                 :            : 
    4074                 :            :         /*
    4075                 :            :          * Determine the css of @cfile, verify it belongs to the same
    4076                 :            :          * cgroup as cgroup.event_control, and associate @event with it.
    4077                 :            :          * Remaining events are automatically removed on cgroup destruction
    4078                 :            :          * but the removal is asynchronous, so take an extra ref.
    4079                 :            :          */
    4080                 :            :         rcu_read_lock();
    4081                 :            : 
    4082                 :            :         ret = -EINVAL;
    4083                 :          0 :         event->css = cgroup_css(cgrp, event->cft->ss);
    4084                 :          0 :         cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
    4085 [ #  # ][ #  # ]:          0 :         if (event->css && event->css == cfile_css && css_tryget(event->css))
                 [ #  # ]
    4086                 :            :                 ret = 0;
    4087                 :            : 
    4088                 :            :         rcu_read_unlock();
    4089         [ #  # ]:          0 :         if (ret)
    4090                 :            :                 goto out_put_cfile;
    4091                 :            : 
    4092 [ #  # ][ #  # ]:          0 :         if (!event->cft->register_event || !event->cft->unregister_event) {
    4093                 :            :                 ret = -EINVAL;
    4094                 :            :                 goto out_put_css;
    4095                 :            :         }
    4096                 :            : 
    4097                 :          0 :         ret = event->cft->register_event(event->css, event->cft,
    4098                 :            :                         event->eventfd, buffer);
    4099         [ #  # ]:          0 :         if (ret)
    4100                 :            :                 goto out_put_css;
    4101                 :            : 
    4102                 :          0 :         efile.file->f_op->poll(efile.file, &event->pt);
    4103                 :            : 
    4104                 :            :         spin_lock(&cgrp->event_list_lock);
    4105                 :          0 :         list_add(&event->list, &cgrp->event_list);
    4106                 :            :         spin_unlock(&cgrp->event_list_lock);
    4107                 :            : 
    4108                 :            :         fdput(cfile);
    4109                 :            :         fdput(efile);
    4110                 :            : 
    4111                 :            :         return 0;
    4112                 :            : 
    4113                 :            : out_put_css:
    4114                 :          0 :         css_put(event->css);
    4115                 :            : out_put_cfile:
    4116                 :            :         fdput(cfile);
    4117                 :            : out_put_eventfd:
    4118                 :          0 :         eventfd_ctx_put(event->eventfd);
    4119                 :            : out_put_efile:
    4120                 :            :         fdput(efile);
    4121                 :            : out_kfree:
    4122                 :          0 :         kfree(event);
    4123                 :            : 
    4124                 :          0 :         return ret;
    4125                 :            : }
    4126                 :            : 
    4127                 :          0 : static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
    4128                 :            :                                       struct cftype *cft)
    4129                 :            : {
    4130                 :          0 :         return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
    4131                 :            : }
    4132                 :            : 
    4133                 :          0 : static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
    4134                 :            :                                        struct cftype *cft, u64 val)
    4135                 :            : {
    4136         [ #  # ]:          0 :         if (val)
    4137                 :          0 :                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
    4138                 :            :         else
    4139                 :          0 :                 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
    4140                 :          0 :         return 0;
    4141                 :            : }
    4142                 :            : 
    4143                 :            : static struct cftype cgroup_base_files[] = {
    4144                 :            :         {
    4145                 :            :                 .name = "cgroup.procs",
    4146                 :            :                 .open = cgroup_procs_open,
    4147                 :            :                 .write_u64 = cgroup_procs_write,
    4148                 :            :                 .release = cgroup_pidlist_release,
    4149                 :            :                 .mode = S_IRUGO | S_IWUSR,
    4150                 :            :         },
    4151                 :            :         {
    4152                 :            :                 .name = "cgroup.event_control",
    4153                 :            :                 .write_string = cgroup_write_event_control,
    4154                 :            :                 .mode = S_IWUGO,
    4155                 :            :         },
    4156                 :            :         {
    4157                 :            :                 .name = "cgroup.clone_children",
    4158                 :            :                 .flags = CFTYPE_INSANE,
    4159                 :            :                 .read_u64 = cgroup_clone_children_read,
    4160                 :            :                 .write_u64 = cgroup_clone_children_write,
    4161                 :            :         },
    4162                 :            :         {
    4163                 :            :                 .name = "cgroup.sane_behavior",
    4164                 :            :                 .flags = CFTYPE_ONLY_ON_ROOT,
    4165                 :            :                 .read_seq_string = cgroup_sane_behavior_show,
    4166                 :            :         },
    4167                 :            : 
    4168                 :            :         /*
    4169                 :            :          * Historical crazy stuff.  These don't have "cgroup."  prefix and
    4170                 :            :          * don't exist if sane_behavior.  If you're depending on these, be
    4171                 :            :          * prepared to be burned.
    4172                 :            :          */
    4173                 :            :         {
    4174                 :            :                 .name = "tasks",
    4175                 :            :                 .flags = CFTYPE_INSANE,         /* use "procs" instead */
    4176                 :            :                 .open = cgroup_tasks_open,
    4177                 :            :                 .write_u64 = cgroup_tasks_write,
    4178                 :            :                 .release = cgroup_pidlist_release,
    4179                 :            :                 .mode = S_IRUGO | S_IWUSR,
    4180                 :            :         },
    4181                 :            :         {
    4182                 :            :                 .name = "notify_on_release",
    4183                 :            :                 .flags = CFTYPE_INSANE,
    4184                 :            :                 .read_u64 = cgroup_read_notify_on_release,
    4185                 :            :                 .write_u64 = cgroup_write_notify_on_release,
    4186                 :            :         },
    4187                 :            :         {
    4188                 :            :                 .name = "release_agent",
    4189                 :            :                 .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
    4190                 :            :                 .read_seq_string = cgroup_release_agent_show,
    4191                 :            :                 .write_string = cgroup_release_agent_write,
    4192                 :            :                 .max_write_len = PATH_MAX,
    4193                 :            :         },
    4194                 :            :         { }     /* terminate */
    4195                 :            : };
    4196                 :            : 
    4197                 :            : /**
    4198                 :            :  * cgroup_populate_dir - create subsys files in a cgroup directory
    4199                 :            :  * @cgrp: target cgroup
    4200                 :            :  * @subsys_mask: mask of the subsystem ids whose files should be added
    4201                 :            :  *
    4202                 :            :  * On failure, no file is added.
    4203                 :            :  */
    4204                 :            : static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
    4205                 :            : {
    4206                 :            :         struct cgroup_subsys *ss;
    4207                 :            :         int i, ret = 0;
    4208                 :            : 
    4209                 :            :         /* process cftsets of each subsystem */
    4210                 :            :         for_each_subsys(ss, i) {
    4211                 :            :                 struct cftype_set *set;
    4212                 :            : 
    4213                 :            :                 if (!test_bit(i, &subsys_mask))
    4214                 :            :                         continue;
    4215                 :            : 
    4216                 :            :                 list_for_each_entry(set, &ss->cftsets, node) {
    4217                 :            :                         ret = cgroup_addrm_files(cgrp, set->cfts, true);
    4218                 :            :                         if (ret < 0)
    4219                 :            :                                 goto err;
    4220                 :            :                 }
    4221                 :            :         }
    4222                 :            :         return 0;
    4223                 :            : err:
    4224                 :            :         cgroup_clear_dir(cgrp, subsys_mask);
    4225                 :            :         return ret;
    4226                 :            : }
    4227                 :            : 
    4228                 :            : /*
    4229                 :            :  * css destruction is four-stage process.
    4230                 :            :  *
    4231                 :            :  * 1. Destruction starts.  Killing of the percpu_ref is initiated.
    4232                 :            :  *    Implemented in kill_css().
    4233                 :            :  *
    4234                 :            :  * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
    4235                 :            :  *    and thus css_tryget() is guaranteed to fail, the css can be offlined
    4236                 :            :  *    by invoking offline_css().  After offlining, the base ref is put.
    4237                 :            :  *    Implemented in css_killed_work_fn().
    4238                 :            :  *
    4239                 :            :  * 3. When the percpu_ref reaches zero, the only possible remaining
    4240                 :            :  *    accessors are inside RCU read sections.  css_release() schedules the
    4241                 :            :  *    RCU callback.
    4242                 :            :  *
    4243                 :            :  * 4. After the grace period, the css can be freed.  Implemented in
    4244                 :            :  *    css_free_work_fn().
    4245                 :            :  *
    4246                 :            :  * It is actually hairier because both step 2 and 4 require process context
    4247                 :            :  * and thus involve punting to css->destroy_work adding two additional
    4248                 :            :  * steps to the already complex sequence.
    4249                 :            :  */
    4250                 :          0 : static void css_free_work_fn(struct work_struct *work)
    4251                 :            : {
    4252                 :          0 :         struct cgroup_subsys_state *css =
    4253                 :            :                 container_of(work, struct cgroup_subsys_state, destroy_work);
    4254                 :          0 :         struct cgroup *cgrp = css->cgroup;
    4255                 :            : 
    4256         [ #  # ]:          0 :         if (css->parent)
    4257                 :            :                 css_put(css->parent);
    4258                 :            : 
    4259                 :          0 :         css->ss->css_free(css);
    4260                 :          0 :         cgroup_dput(cgrp);
    4261                 :          0 : }
    4262                 :            : 
    4263                 :          0 : static void css_free_rcu_fn(struct rcu_head *rcu_head)
    4264                 :            : {
    4265                 :            :         struct cgroup_subsys_state *css =
    4266                 :            :                 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
    4267                 :            : 
    4268                 :            :         /*
    4269                 :            :          * css holds an extra ref to @cgrp->dentry which is put on the last
    4270                 :            :          * css_put().  dput() requires process context which we don't have.
    4271                 :            :          */
    4272                 :          0 :         INIT_WORK(&css->destroy_work, css_free_work_fn);
    4273                 :          0 :         queue_work(cgroup_destroy_wq, &css->destroy_work);
    4274                 :          0 : }
    4275                 :            : 
    4276                 :          0 : static void css_release(struct percpu_ref *ref)
    4277                 :            : {
    4278                 :            :         struct cgroup_subsys_state *css =
    4279                 :            :                 container_of(ref, struct cgroup_subsys_state, refcnt);
    4280                 :            : 
    4281                 :          0 :         rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL);
    4282                 :          0 :         call_rcu(&css->rcu_head, css_free_rcu_fn);
    4283                 :          0 : }
    4284                 :            : 
    4285                 :          0 : static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
    4286                 :            :                      struct cgroup *cgrp)
    4287                 :            : {
    4288                 :          0 :         css->cgroup = cgrp;
    4289                 :          0 :         css->ss = ss;
    4290                 :          0 :         css->flags = 0;
    4291                 :            : 
    4292         [ #  # ]:          0 :         if (cgrp->parent)
    4293                 :          0 :                 css->parent = cgroup_css(cgrp->parent, ss);
    4294                 :            :         else
    4295                 :          0 :                 css->flags |= CSS_ROOT;
    4296                 :            : 
    4297         [ #  # ]:          0 :         BUG_ON(cgroup_css(cgrp, ss));
    4298                 :          0 : }
    4299                 :            : 
    4300                 :            : /* invoke ->css_online() on a new CSS and mark it online if successful */
    4301                 :          0 : static int online_css(struct cgroup_subsys_state *css)
    4302                 :            : {
    4303                 :          0 :         struct cgroup_subsys *ss = css->ss;
    4304                 :            :         int ret = 0;
    4305                 :            : 
    4306                 :            :         lockdep_assert_held(&cgroup_mutex);
    4307                 :            : 
    4308         [ #  # ]:          0 :         if (ss->css_online)
    4309                 :          0 :                 ret = ss->css_online(css);
    4310         [ #  # ]:          0 :         if (!ret) {
    4311                 :          0 :                 css->flags |= CSS_ONLINE;
    4312                 :          0 :                 css->cgroup->nr_css++;
    4313                 :          0 :                 rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css);
    4314                 :            :         }
    4315                 :          0 :         return ret;
    4316                 :            : }
    4317                 :            : 
    4318                 :            : /* if the CSS is online, invoke ->css_offline() on it and mark it offline */
    4319                 :          0 : static void offline_css(struct cgroup_subsys_state *css)
    4320                 :            : {
    4321                 :          0 :         struct cgroup_subsys *ss = css->ss;
    4322                 :            : 
    4323                 :            :         lockdep_assert_held(&cgroup_mutex);
    4324                 :            : 
    4325         [ #  # ]:          0 :         if (!(css->flags & CSS_ONLINE))
    4326                 :          0 :                 return;
    4327                 :            : 
    4328         [ #  # ]:          0 :         if (ss->css_offline)
    4329                 :          0 :                 ss->css_offline(css);
    4330                 :            : 
    4331                 :          0 :         css->flags &= ~CSS_ONLINE;
    4332                 :          0 :         css->cgroup->nr_css--;
    4333                 :          0 :         RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
    4334                 :            : }
    4335                 :            : 
    4336                 :            : /*
    4337                 :            :  * cgroup_create - create a cgroup
    4338                 :            :  * @parent: cgroup that will be parent of the new cgroup
    4339                 :            :  * @dentry: dentry of the new cgroup
    4340                 :            :  * @mode: mode to set on new inode
    4341                 :            :  *
    4342                 :            :  * Must be called with the mutex on the parent inode held
    4343                 :            :  */
    4344                 :          0 : static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
    4345                 :            :                              umode_t mode)
    4346                 :            : {
    4347                 :            :         struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { };
    4348                 :            :         struct cgroup *cgrp;
    4349                 :            :         struct cgroup_name *name;
    4350                 :          2 :         struct cgroupfs_root *root = parent->root;
    4351                 :            :         int err = 0;
    4352                 :            :         struct cgroup_subsys *ss;
    4353                 :          2 :         struct super_block *sb = root->sb;
    4354                 :            : 
    4355                 :            :         /* allocate the cgroup and its ID, 0 is reserved for the root */
    4356                 :            :         cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
    4357         [ +  - ]:          2 :         if (!cgrp)
    4358                 :            :                 return -ENOMEM;
    4359                 :            : 
    4360                 :          2 :         name = cgroup_alloc_name(dentry);
    4361         [ +  - ]:          2 :         if (!name)
    4362                 :            :                 goto err_free_cgrp;
    4363                 :          2 :         rcu_assign_pointer(cgrp->name, name);
    4364                 :            : 
    4365                 :            :         /*
    4366                 :            :          * Temporarily set the pointer to NULL, so idr_find() won't return
    4367                 :            :          * a half-baked cgroup.
    4368                 :            :          */
    4369                 :          2 :         cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
    4370         [ +  - ]:          2 :         if (cgrp->id < 0)
    4371                 :            :                 goto err_free_name;
    4372                 :            : 
    4373                 :            :         /*
    4374                 :            :          * Only live parents can have children.  Note that the liveliness
    4375                 :            :          * check isn't strictly necessary because cgroup_mkdir() and
    4376                 :            :          * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
    4377                 :            :          * anyway so that locking is contained inside cgroup proper and we
    4378                 :            :          * don't get nasty surprises if we ever grow another caller.
    4379                 :            :          */
    4380         [ +  - ]:          2 :         if (!cgroup_lock_live_group(parent)) {
    4381                 :            :                 err = -ENODEV;
    4382                 :            :                 goto err_free_id;
    4383                 :            :         }
    4384                 :            : 
    4385                 :            :         /* Grab a reference on the superblock so the hierarchy doesn't
    4386                 :            :          * get deleted on unmount if there are child cgroups.  This
    4387                 :            :          * can be done outside cgroup_mutex, since the sb can't
    4388                 :            :          * disappear while someone has an open control file on the
    4389                 :            :          * fs */
    4390                 :          2 :         atomic_inc(&sb->s_active);
    4391                 :            : 
    4392                 :          2 :         init_cgroup_housekeeping(cgrp);
    4393                 :            : 
    4394                 :          2 :         dentry->d_fsdata = cgrp;
    4395                 :          2 :         cgrp->dentry = dentry;
    4396                 :            : 
    4397                 :          2 :         cgrp->parent = parent;
    4398                 :          2 :         cgrp->dummy_css.parent = &parent->dummy_css;
    4399                 :          2 :         cgrp->root = parent->root;
    4400                 :            : 
    4401         [ +  + ]:          2 :         if (notify_on_release(parent))
    4402                 :          1 :                 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
    4403                 :            : 
    4404         [ -  + ]:          2 :         if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
    4405                 :          0 :                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
    4406                 :            : 
    4407         [ -  + ]:          2 :         for_each_root_subsys(root, ss) {
    4408                 :            :                 struct cgroup_subsys_state *css;
    4409                 :            : 
    4410                 :          0 :                 css = ss->css_alloc(cgroup_css(parent, ss));
    4411         [ #  # ]:          2 :                 if (IS_ERR(css)) {
    4412                 :            :                         err = PTR_ERR(css);
    4413                 :          0 :                         goto err_free_all;
    4414                 :            :                 }
    4415                 :            :                 css_ar[ss->subsys_id] = css;
    4416                 :            : 
    4417                 :          0 :                 err = percpu_ref_init(&css->refcnt, css_release);
    4418         [ #  # ]:          0 :                 if (err)
    4419                 :            :                         goto err_free_all;
    4420                 :            : 
    4421                 :          0 :                 init_css(css, ss, cgrp);
    4422                 :            :         }
    4423                 :            : 
    4424                 :            :         /*
    4425                 :            :          * Create directory.  cgroup_create_file() returns with the new
    4426                 :            :          * directory locked on success so that it can be populated without
    4427                 :            :          * dropping cgroup_mutex.
    4428                 :            :          */
    4429                 :          2 :         err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
    4430         [ +  - ]:          2 :         if (err < 0)
    4431                 :            :                 goto err_free_all;
    4432                 :            :         lockdep_assert_held(&dentry->d_inode->i_mutex);
    4433                 :            : 
    4434                 :          2 :         cgrp->serial_nr = cgroup_serial_nr_next++;
    4435                 :            : 
    4436                 :            :         /* allocation complete, commit to creation */
    4437                 :          2 :         list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
    4438                 :          2 :         root->number_of_cgroups++;
    4439                 :            : 
    4440                 :            :         /* hold a ref to the parent's dentry */
    4441                 :          2 :         dget(parent->dentry);
    4442                 :            : 
    4443                 :            :         /* creation succeeded, notify subsystems */
    4444         [ -  + ]:          2 :         for_each_root_subsys(root, ss) {
    4445                 :          0 :                 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
    4446                 :            : 
    4447                 :          0 :                 err = online_css(css);
    4448         [ #  # ]:          0 :                 if (err)
    4449                 :            :                         goto err_destroy;
    4450                 :            : 
    4451                 :            :                 /* each css holds a ref to the cgroup's dentry and parent css */
    4452                 :            :                 dget(dentry);
    4453                 :          0 :                 css_get(css->parent);
    4454                 :            : 
    4455                 :            :                 /* mark it consumed for error path */
    4456                 :            :                 css_ar[ss->subsys_id] = NULL;
    4457                 :            : 
    4458 [ #  # ][ #  # ]:          0 :                 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
                 [ #  # ]
    4459                 :          0 :                     parent->parent) {
    4460                 :          0 :                         pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
    4461                 :            :                                    current->comm, current->pid, ss->name);
    4462         [ #  # ]:          0 :                         if (!strcmp(ss->name, "memory"))
    4463                 :          0 :                                 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
    4464                 :          0 :                         ss->warned_broken_hierarchy = true;
    4465                 :            :                 }
    4466                 :            :         }
    4467                 :            : 
    4468                 :          2 :         idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
    4469                 :            : 
    4470                 :          2 :         err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
    4471         [ +  - ]:          2 :         if (err)
    4472                 :            :                 goto err_destroy;
    4473                 :            : 
    4474                 :            :         err = cgroup_populate_dir(cgrp, root->subsys_mask);
    4475                 :            :         if (err)
    4476                 :            :                 goto err_destroy;
    4477                 :            : 
    4478                 :          2 :         mutex_unlock(&cgroup_mutex);
    4479                 :          2 :         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
    4480                 :            : 
    4481                 :          2 :         return 0;
    4482                 :            : 
    4483                 :            : err_free_all:
    4484         [ #  # ]:          0 :         for_each_root_subsys(root, ss) {
    4485                 :          0 :                 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
    4486                 :            : 
    4487         [ #  # ]:          0 :                 if (css) {
    4488                 :          0 :                         percpu_ref_cancel_init(&css->refcnt);
    4489                 :          0 :                         ss->css_free(css);
    4490                 :            :                 }
    4491                 :            :         }
    4492                 :          0 :         mutex_unlock(&cgroup_mutex);
    4493                 :            :         /* Release the reference count that we took on the superblock */
    4494                 :          0 :         deactivate_super(sb);
    4495                 :            : err_free_id:
    4496                 :          0 :         idr_remove(&root->cgroup_idr, cgrp->id);
    4497                 :            : err_free_name:
    4498                 :          0 :         kfree(rcu_dereference_raw(cgrp->name));
    4499                 :            : err_free_cgrp:
    4500                 :          0 :         kfree(cgrp);
    4501                 :          0 :         return err;
    4502                 :            : 
    4503                 :            : err_destroy:
    4504         [ #  # ]:          0 :         for_each_root_subsys(root, ss) {
    4505                 :          0 :                 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
    4506                 :            : 
    4507         [ #  # ]:          0 :                 if (css) {
    4508                 :          0 :                         percpu_ref_cancel_init(&css->refcnt);
    4509                 :          0 :                         ss->css_free(css);
    4510                 :            :                 }
    4511                 :            :         }
    4512                 :          0 :         cgroup_destroy_locked(cgrp);
    4513                 :          0 :         mutex_unlock(&cgroup_mutex);
    4514                 :          0 :         mutex_unlock(&dentry->d_inode->i_mutex);
    4515                 :          0 :         return err;
    4516                 :            : }
    4517                 :            : 
    4518                 :          0 : static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
    4519                 :            : {
    4520                 :          2 :         struct cgroup *c_parent = dentry->d_parent->d_fsdata;
    4521                 :            : 
    4522                 :            :         /* the vfs holds inode->i_mutex already */
    4523                 :          2 :         return cgroup_create(c_parent, dentry, mode | S_IFDIR);
    4524                 :            : }
    4525                 :            : 
    4526                 :            : /*
    4527                 :            :  * This is called when the refcnt of a css is confirmed to be killed.
    4528                 :            :  * css_tryget() is now guaranteed to fail.
    4529                 :            :  */
    4530                 :          0 : static void css_killed_work_fn(struct work_struct *work)
    4531                 :            : {
    4532                 :          0 :         struct cgroup_subsys_state *css =
    4533                 :            :                 container_of(work, struct cgroup_subsys_state, destroy_work);
    4534                 :          0 :         struct cgroup *cgrp = css->cgroup;
    4535                 :            : 
    4536                 :          0 :         mutex_lock(&cgroup_mutex);
    4537                 :            : 
    4538                 :            :         /*
    4539                 :            :          * css_tryget() is guaranteed to fail now.  Tell subsystems to
    4540                 :            :          * initate destruction.
    4541                 :            :          */
    4542                 :          0 :         offline_css(css);
    4543                 :            : 
    4544                 :            :         /*
    4545                 :            :          * If @cgrp is marked dead, it's waiting for refs of all css's to
    4546                 :            :          * be disabled before proceeding to the second phase of cgroup
    4547                 :            :          * destruction.  If we are the last one, kick it off.
    4548                 :            :          */
    4549 [ #  # ][ #  # ]:          0 :         if (!cgrp->nr_css && cgroup_is_dead(cgrp))
    4550                 :          0 :                 cgroup_destroy_css_killed(cgrp);
    4551                 :            : 
    4552                 :          0 :         mutex_unlock(&cgroup_mutex);
    4553                 :            : 
    4554                 :            :         /*
    4555                 :            :          * Put the css refs from kill_css().  Each css holds an extra
    4556                 :            :          * reference to the cgroup's dentry and cgroup removal proceeds
    4557                 :            :          * regardless of css refs.  On the last put of each css, whenever
    4558                 :            :          * that may be, the extra dentry ref is put so that dentry
    4559                 :            :          * destruction happens only after all css's are released.
    4560                 :            :          */
    4561                 :            :         css_put(css);
    4562                 :          0 : }
    4563                 :            : 
    4564                 :            : /* css kill confirmation processing requires process context, bounce */
    4565                 :          0 : static void css_killed_ref_fn(struct percpu_ref *ref)
    4566                 :            : {
    4567                 :            :         struct cgroup_subsys_state *css =
    4568                 :            :                 container_of(ref, struct cgroup_subsys_state, refcnt);
    4569                 :            : 
    4570                 :          0 :         INIT_WORK(&css->destroy_work, css_killed_work_fn);
    4571                 :          0 :         queue_work(cgroup_destroy_wq, &css->destroy_work);
    4572                 :          0 : }
    4573                 :            : 
    4574                 :            : /**
    4575                 :            :  * kill_css - destroy a css
    4576                 :            :  * @css: css to destroy
    4577                 :            :  *
    4578                 :            :  * This function initiates destruction of @css by removing cgroup interface
    4579                 :            :  * files and putting its base reference.  ->css_offline() will be invoked
    4580                 :            :  * asynchronously once css_tryget() is guaranteed to fail and when the
    4581                 :            :  * reference count reaches zero, @css will be released.
    4582                 :            :  */
    4583                 :          0 : static void kill_css(struct cgroup_subsys_state *css)
    4584                 :            : {
    4585                 :            :         cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
    4586                 :            : 
    4587                 :            :         /*
    4588                 :            :          * Killing would put the base ref, but we need to keep it alive
    4589                 :            :          * until after ->css_offline().
    4590                 :            :          */
    4591                 :            :         css_get(css);
    4592                 :            : 
    4593                 :            :         /*
    4594                 :            :          * cgroup core guarantees that, by the time ->css_offline() is
    4595                 :            :          * invoked, no new css reference will be given out via
    4596                 :            :          * css_tryget().  We can't simply call percpu_ref_kill() and
    4597                 :            :          * proceed to offlining css's because percpu_ref_kill() doesn't
    4598                 :            :          * guarantee that the ref is seen as killed on all CPUs on return.
    4599                 :            :          *
    4600                 :            :          * Use percpu_ref_kill_and_confirm() to get notifications as each
    4601                 :            :          * css is confirmed to be seen as killed on all CPUs.
    4602                 :            :          */
    4603                 :          0 :         percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
    4604                 :          0 : }
    4605                 :            : 
    4606                 :            : /**
    4607                 :            :  * cgroup_destroy_locked - the first stage of cgroup destruction
    4608                 :            :  * @cgrp: cgroup to be destroyed
    4609                 :            :  *
    4610                 :            :  * css's make use of percpu refcnts whose killing latency shouldn't be
    4611                 :            :  * exposed to userland and are RCU protected.  Also, cgroup core needs to
    4612                 :            :  * guarantee that css_tryget() won't succeed by the time ->css_offline() is
    4613                 :            :  * invoked.  To satisfy all the requirements, destruction is implemented in
    4614                 :            :  * the following two steps.
    4615                 :            :  *
    4616                 :            :  * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
    4617                 :            :  *     userland visible parts and start killing the percpu refcnts of
    4618                 :            :  *     css's.  Set up so that the next stage will be kicked off once all
    4619                 :            :  *     the percpu refcnts are confirmed to be killed.
    4620                 :            :  *
    4621                 :            :  * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
    4622                 :            :  *     rest of destruction.  Once all cgroup references are gone, the
    4623                 :            :  *     cgroup is RCU-freed.
    4624                 :            :  *
    4625                 :            :  * This function implements s1.  After this step, @cgrp is gone as far as
    4626                 :            :  * the userland is concerned and a new cgroup with the same name may be
    4627                 :            :  * created.  As cgroup doesn't care about the names internally, this
    4628                 :            :  * doesn't cause any problem.
    4629                 :            :  */
    4630                 :          0 : static int cgroup_destroy_locked(struct cgroup *cgrp)
    4631                 :            :         __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
    4632                 :            : {
    4633                 :          2 :         struct dentry *d = cgrp->dentry;
    4634                 :            :         struct cgroup_event *event, *tmp;
    4635                 :            :         struct cgroup_subsys *ss;
    4636                 :            :         struct cgroup *child;
    4637                 :            :         bool empty;
    4638                 :            : 
    4639                 :            :         lockdep_assert_held(&d->d_inode->i_mutex);
    4640                 :            :         lockdep_assert_held(&cgroup_mutex);
    4641                 :            : 
    4642                 :            :         /*
    4643                 :            :          * css_set_lock synchronizes access to ->cset_links and prevents
    4644                 :            :          * @cgrp from being removed while __put_css_set() is in progress.
    4645                 :            :          */
    4646                 :          2 :         read_lock(&css_set_lock);
    4647                 :          2 :         empty = list_empty(&cgrp->cset_links);
    4648                 :            :         read_unlock(&css_set_lock);
    4649         [ +  - ]:          2 :         if (!empty)
    4650                 :            :                 return -EBUSY;
    4651                 :            : 
    4652                 :            :         /*
    4653                 :            :          * Make sure there's no live children.  We can't test ->children
    4654                 :            :          * emptiness as dead children linger on it while being destroyed;
    4655                 :            :          * otherwise, "rmdir parent/child parent" may fail with -EBUSY.
    4656                 :            :          */
    4657                 :            :         empty = true;
    4658                 :            :         rcu_read_lock();
    4659         [ -  + ]:          2 :         list_for_each_entry_rcu(child, &cgrp->children, sibling) {
    4660                 :            :                 empty = cgroup_is_dead(child);
    4661         [ #  # ]:          0 :                 if (!empty)
    4662                 :            :                         break;
    4663                 :            :         }
    4664                 :            :         rcu_read_unlock();
    4665         [ +  - ]:          2 :         if (!empty)
    4666                 :            :                 return -EBUSY;
    4667                 :            : 
    4668                 :            :         /*
    4669                 :            :          * Initiate massacre of all css's.  cgroup_destroy_css_killed()
    4670                 :            :          * will be invoked to perform the rest of destruction once the
    4671                 :            :          * percpu refs of all css's are confirmed to be killed.
    4672                 :            :          */
    4673         [ -  + ]:          2 :         for_each_root_subsys(cgrp->root, ss) {
    4674                 :            :                 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
    4675                 :            : 
    4676         [ #  # ]:          0 :                 if (css)
    4677                 :          0 :                         kill_css(css);
    4678                 :            :         }
    4679                 :            : 
    4680                 :            :         /*
    4681                 :            :          * Mark @cgrp dead.  This prevents further task migration and child
    4682                 :            :          * creation by disabling cgroup_lock_live_group().  Note that
    4683                 :            :          * CGRP_DEAD assertion is depended upon by css_next_child() to
    4684                 :            :          * resume iteration after dropping RCU read lock.  See
    4685                 :            :          * css_next_child() for details.
    4686                 :            :          */
    4687                 :          2 :         set_bit(CGRP_DEAD, &cgrp->flags);
    4688                 :            : 
    4689                 :            :         /* CGRP_DEAD is set, remove from ->release_list for the last time */
    4690                 :          2 :         raw_spin_lock(&release_list_lock);
    4691         [ -  + ]:          2 :         if (!list_empty(&cgrp->release_list))
    4692                 :            :                 list_del_init(&cgrp->release_list);
    4693                 :            :         raw_spin_unlock(&release_list_lock);
    4694                 :            : 
    4695                 :            :         /*
    4696                 :            :          * If @cgrp has css's attached, the second stage of cgroup
    4697                 :            :          * destruction is kicked off from css_killed_work_fn() after the
    4698                 :            :          * refs of all attached css's are killed.  If @cgrp doesn't have
    4699                 :            :          * any css, we kick it off here.
    4700                 :            :          */
    4701         [ +  - ]:          2 :         if (!cgrp->nr_css)
    4702                 :          2 :                 cgroup_destroy_css_killed(cgrp);
    4703                 :            : 
    4704                 :            :         /*
    4705                 :            :          * Clear the base files and remove @cgrp directory.  The removal
    4706                 :            :          * puts the base ref but we aren't quite done with @cgrp yet, so
    4707                 :            :          * hold onto it.
    4708                 :            :          */
    4709                 :          2 :         cgroup_addrm_files(cgrp, cgroup_base_files, false);
    4710                 :            :         dget(d);
    4711                 :          2 :         cgroup_d_remove_dir(d);
    4712                 :            : 
    4713                 :            :         /*
    4714                 :            :          * Unregister events and notify userspace.
    4715                 :            :          * Notify userspace about cgroup removing only after rmdir of cgroup
    4716                 :            :          * directory to avoid race between userspace and kernelspace.
    4717                 :            :          */
    4718                 :            :         spin_lock(&cgrp->event_list_lock);
    4719         [ -  + ]:          2 :         list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
    4720                 :            :                 list_del_init(&event->list);
    4721                 :          0 :                 schedule_work(&event->remove);
    4722                 :            :         }
    4723                 :            :         spin_unlock(&cgrp->event_list_lock);
    4724                 :            : 
    4725                 :          2 :         return 0;
    4726                 :            : };
    4727                 :            : 
    4728                 :            : /**
    4729                 :            :  * cgroup_destroy_css_killed - the second step of cgroup destruction
    4730                 :            :  * @work: cgroup->destroy_free_work
    4731                 :            :  *
    4732                 :            :  * This function is invoked from a work item for a cgroup which is being
    4733                 :            :  * destroyed after all css's are offlined and performs the rest of
    4734                 :            :  * destruction.  This is the second step of destruction described in the
    4735                 :            :  * comment above cgroup_destroy_locked().
    4736                 :            :  */
    4737                 :          0 : static void cgroup_destroy_css_killed(struct cgroup *cgrp)
    4738                 :            : {
    4739                 :          2 :         struct cgroup *parent = cgrp->parent;
    4740                 :          2 :         struct dentry *d = cgrp->dentry;
    4741                 :            : 
    4742                 :            :         lockdep_assert_held(&cgroup_mutex);
    4743                 :            : 
    4744                 :            :         /* delete this cgroup from parent->children */
    4745                 :            :         list_del_rcu(&cgrp->sibling);
    4746                 :            : 
    4747                 :          2 :         dput(d);
    4748                 :            : 
    4749                 :          2 :         set_bit(CGRP_RELEASABLE, &parent->flags);
    4750                 :          2 :         check_for_release(parent);
    4751                 :          2 : }
    4752                 :            : 
    4753                 :          0 : static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
    4754                 :            : {
    4755                 :            :         int ret;
    4756                 :            : 
    4757                 :          2 :         mutex_lock(&cgroup_mutex);
    4758                 :          2 :         ret = cgroup_destroy_locked(dentry->d_fsdata);
    4759                 :          2 :         mutex_unlock(&cgroup_mutex);
    4760                 :            : 
    4761                 :          2 :         return ret;
    4762                 :            : }
    4763                 :            : 
    4764                 :            : static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
    4765                 :            : {
    4766                 :          0 :         INIT_LIST_HEAD(&ss->cftsets);
    4767                 :            : 
    4768                 :            :         /*
    4769                 :            :          * base_cftset is embedded in subsys itself, no need to worry about
    4770                 :            :          * deregistration.
    4771                 :            :          */
    4772         [ #  # ]:          0 :         if (ss->base_cftypes) {
    4773                 :            :                 struct cftype *cft;
    4774                 :            : 
    4775         [ #  # ]:          0 :                 for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++)
    4776                 :          0 :                         cft->ss = ss;
    4777                 :            : 
    4778                 :          0 :                 ss->base_cftset.cfts = ss->base_cftypes;
    4779                 :          0 :                 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
    4780                 :            :         }
    4781                 :            : }
    4782                 :            : 
    4783                 :            : static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
    4784                 :            : {
    4785                 :            :         struct cgroup_subsys_state *css;
    4786                 :            : 
    4787                 :            :         printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
    4788                 :            : 
    4789                 :            :         mutex_lock(&cgroup_mutex);
    4790                 :            : 
    4791                 :            :         /* init base cftset */
    4792                 :            :         cgroup_init_cftsets(ss);
    4793                 :            : 
    4794                 :            :         /* Create the top cgroup state for this subsystem */
    4795                 :            :         list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
    4796                 :            :         ss->root = &cgroup_dummy_root;
    4797                 :            :         css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
    4798                 :            :         /* We don't handle early failures gracefully */
    4799                 :            :         BUG_ON(IS_ERR(css));
    4800                 :            :         init_css(css, ss, cgroup_dummy_top);
    4801                 :            : 
    4802                 :            :         /* Update the init_css_set to contain a subsys
    4803                 :            :          * pointer to this state - since the subsystem is
    4804                 :            :          * newly registered, all tasks and hence the
    4805                 :            :          * init_css_set is in the subsystem's top cgroup. */
    4806                 :            :         init_css_set.subsys[ss->subsys_id] = css;
    4807                 :            : 
    4808                 :            :         need_forkexit_callback |= ss->fork || ss->exit;
    4809                 :            : 
    4810                 :            :         /* At system boot, before all subsystems have been
    4811                 :            :          * registered, no tasks have been forked, so we don't
    4812                 :            :          * need to invoke fork callbacks here. */
    4813                 :            :         BUG_ON(!list_empty(&init_task.tasks));
    4814                 :            : 
    4815                 :            :         BUG_ON(online_css(css));
    4816                 :            : 
    4817                 :            :         mutex_unlock(&cgroup_mutex);
    4818                 :            : 
    4819                 :            :         /* this function shouldn't be used with modular subsystems, since they
    4820                 :            :          * need to register a subsys_id, among other things */
    4821                 :            :         BUG_ON(ss->module);
    4822                 :            : }
    4823                 :            : 
    4824                 :            : /**
    4825                 :            :  * cgroup_load_subsys: load and register a modular subsystem at runtime
    4826                 :            :  * @ss: the subsystem to load
    4827                 :            :  *
    4828                 :            :  * This function should be called in a modular subsystem's initcall. If the
    4829                 :            :  * subsystem is built as a module, it will be assigned a new subsys_id and set
    4830                 :            :  * up for use. If the subsystem is built-in anyway, work is delegated to the
    4831                 :            :  * simpler cgroup_init_subsys.
    4832                 :            :  */
    4833                 :          0 : int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
    4834                 :            : {
    4835                 :            :         struct cgroup_subsys_state *css;
    4836                 :            :         int i, ret;
    4837                 :            :         struct hlist_node *tmp;
    4838                 :            :         struct css_set *cset;
    4839                 :            :         unsigned long key;
    4840                 :            : 
    4841                 :            :         /* check name and function validity */
    4842 [ #  # ][ #  # ]:          0 :         if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
                 [ #  # ]
    4843         [ #  # ]:          0 :             ss->css_alloc == NULL || ss->css_free == NULL)
    4844                 :            :                 return -EINVAL;
    4845                 :            : 
    4846                 :            :         /*
    4847                 :            :          * we don't support callbacks in modular subsystems. this check is
    4848                 :            :          * before the ss->module check for consistency; a subsystem that could
    4849                 :            :          * be a module should still have no callbacks even if the user isn't
    4850                 :            :          * compiling it as one.
    4851                 :            :          */
    4852 [ #  # ][ #  # ]:          0 :         if (ss->fork || ss->exit)
    4853                 :            :                 return -EINVAL;
    4854                 :            : 
    4855                 :            :         /*
    4856                 :            :          * an optionally modular subsystem is built-in: we want to do nothing,
    4857                 :            :          * since cgroup_init_subsys will have already taken care of it.
    4858                 :            :          */
    4859         [ #  # ]:          0 :         if (ss->module == NULL) {
    4860                 :            :                 /* a sanity check */
    4861         [ #  # ]:          0 :                 BUG_ON(cgroup_subsys[ss->subsys_id] != ss);
    4862                 :            :                 return 0;
    4863                 :            :         }
    4864                 :            : 
    4865                 :            :         /* init base cftset */
    4866                 :            :         cgroup_init_cftsets(ss);
    4867                 :            : 
    4868                 :          0 :         mutex_lock(&cgroup_mutex);
    4869                 :          0 :         cgroup_subsys[ss->subsys_id] = ss;
    4870                 :            : 
    4871                 :            :         /*
    4872                 :            :          * no ss->css_alloc seems to need anything important in the ss
    4873                 :            :          * struct, so this can happen first (i.e. before the dummy root
    4874                 :            :          * attachment).
    4875                 :            :          */
    4876                 :          0 :         css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
    4877         [ #  # ]:          0 :         if (IS_ERR(css)) {
    4878                 :            :                 /* failure case - need to deassign the cgroup_subsys[] slot. */
    4879                 :          0 :                 cgroup_subsys[ss->subsys_id] = NULL;
    4880                 :          0 :                 mutex_unlock(&cgroup_mutex);
    4881                 :          0 :                 return PTR_ERR(css);
    4882                 :            :         }
    4883                 :            : 
    4884                 :          0 :         list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
    4885                 :          0 :         ss->root = &cgroup_dummy_root;
    4886                 :            : 
    4887                 :            :         /* our new subsystem will be attached to the dummy hierarchy. */
    4888                 :          0 :         init_css(css, ss, cgroup_dummy_top);
    4889                 :            : 
    4890                 :            :         /*
    4891                 :            :          * Now we need to entangle the css into the existing css_sets. unlike
    4892                 :            :          * in cgroup_init_subsys, there are now multiple css_sets, so each one
    4893                 :            :          * will need a new pointer to it; done by iterating the css_set_table.
    4894                 :            :          * furthermore, modifying the existing css_sets will corrupt the hash
    4895                 :            :          * table state, so each changed css_set will need its hash recomputed.
    4896                 :            :          * this is all done under the css_set_lock.
    4897                 :            :          */
    4898                 :          0 :         write_lock(&css_set_lock);
    4899 [ #  # ][ #  # ]:          0 :         hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
         [ #  # ][ #  # ]
    4900                 :            :                 /* skip entries that we already rehashed */
    4901         [ #  # ]:          0 :                 if (cset->subsys[ss->subsys_id])
    4902                 :          0 :                         continue;
    4903                 :            :                 /* remove existing entry */
    4904                 :            :                 hash_del(&cset->hlist);
    4905                 :            :                 /* set new value */
    4906                 :          0 :                 cset->subsys[ss->subsys_id] = css;
    4907                 :            :                 /* recompute hash and restore entry */
    4908                 :            :                 key = css_set_hash(cset->subsys);
    4909                 :          0 :                 hash_add(css_set_table, &cset->hlist, key);
    4910                 :            :         }
    4911                 :            :         write_unlock(&css_set_lock);
    4912                 :            : 
    4913                 :          0 :         ret = online_css(css);
    4914         [ #  # ]:          0 :         if (ret)
    4915                 :            :                 goto err_unload;
    4916                 :            : 
    4917                 :            :         /* success! */
    4918                 :          0 :         mutex_unlock(&cgroup_mutex);
    4919                 :          0 :         return 0;
    4920                 :            : 
    4921                 :            : err_unload:
    4922                 :          0 :         mutex_unlock(&cgroup_mutex);
    4923                 :            :         /* @ss can't be mounted here as try_module_get() would fail */
    4924                 :          0 :         cgroup_unload_subsys(ss);
    4925                 :          0 :         return ret;
    4926                 :            : }
    4927                 :            : EXPORT_SYMBOL_GPL(cgroup_load_subsys);
    4928                 :            : 
    4929                 :            : /**
    4930                 :            :  * cgroup_unload_subsys: unload a modular subsystem
    4931                 :            :  * @ss: the subsystem to unload
    4932                 :            :  *
    4933                 :            :  * This function should be called in a modular subsystem's exitcall. When this
    4934                 :            :  * function is invoked, the refcount on the subsystem's module will be 0, so
    4935                 :            :  * the subsystem will not be attached to any hierarchy.
    4936                 :            :  */
    4937                 :          0 : void cgroup_unload_subsys(struct cgroup_subsys *ss)
    4938                 :            : {
    4939                 :            :         struct cgrp_cset_link *link;
    4940                 :            : 
    4941         [ #  # ]:          0 :         BUG_ON(ss->module == NULL);
    4942                 :            : 
    4943                 :            :         /*
    4944                 :            :          * we shouldn't be called if the subsystem is in use, and the use of
    4945                 :            :          * try_module_get() in rebind_subsystems() should ensure that it
    4946                 :            :          * doesn't start being used while we're killing it off.
    4947                 :            :          */
    4948         [ #  # ]:          0 :         BUG_ON(ss->root != &cgroup_dummy_root);
    4949                 :            : 
    4950                 :          0 :         mutex_lock(&cgroup_mutex);
    4951                 :            : 
    4952                 :          0 :         offline_css(cgroup_css(cgroup_dummy_top, ss));
    4953                 :            : 
    4954                 :            :         /* deassign the subsys_id */
    4955                 :          0 :         cgroup_subsys[ss->subsys_id] = NULL;
    4956                 :            : 
    4957                 :            :         /* remove subsystem from the dummy root's list of subsystems */
    4958                 :          0 :         list_del_init(&ss->sibling);
    4959                 :            : 
    4960                 :            :         /*
    4961                 :            :          * disentangle the css from all css_sets attached to the dummy
    4962                 :            :          * top. as in loading, we need to pay our respects to the hashtable
    4963                 :            :          * gods.
    4964                 :            :          */
    4965                 :          0 :         write_lock(&css_set_lock);
    4966         [ #  # ]:          0 :         list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) {
    4967                 :          0 :                 struct css_set *cset = link->cset;
    4968                 :            :                 unsigned long key;
    4969                 :            : 
    4970                 :            :                 hash_del(&cset->hlist);
    4971                 :          0 :                 cset->subsys[ss->subsys_id] = NULL;
    4972                 :            :                 key = css_set_hash(cset->subsys);
    4973                 :          0 :                 hash_add(css_set_table, &cset->hlist, key);
    4974                 :            :         }
    4975                 :            :         write_unlock(&css_set_lock);
    4976                 :            : 
    4977                 :            :         /*
    4978                 :            :          * remove subsystem's css from the cgroup_dummy_top and free it -
    4979                 :            :          * need to free before marking as null because ss->css_free needs
    4980                 :            :          * the cgrp->subsys pointer to find their state.
    4981                 :            :          */
    4982                 :          0 :         ss->css_free(cgroup_css(cgroup_dummy_top, ss));
    4983                 :          0 :         RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
    4984                 :            : 
    4985                 :          0 :         mutex_unlock(&cgroup_mutex);
    4986                 :          0 : }
    4987                 :            : EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
    4988                 :            : 
    4989                 :            : /**
    4990                 :            :  * cgroup_init_early - cgroup initialization at system boot
    4991                 :            :  *
    4992                 :            :  * Initialize cgroups at system boot, and initialize any
    4993                 :            :  * subsystems that request early init.
    4994                 :            :  */
    4995                 :          0 : int __init cgroup_init_early(void)
    4996                 :            : {
    4997                 :            :         struct cgroup_subsys *ss;
    4998                 :            :         int i;
    4999                 :            : 
    5000                 :          0 :         atomic_set(&init_css_set.refcount, 1);
    5001                 :            :         INIT_LIST_HEAD(&init_css_set.cgrp_links);
    5002                 :            :         INIT_LIST_HEAD(&init_css_set.tasks);
    5003                 :            :         INIT_HLIST_NODE(&init_css_set.hlist);
    5004                 :          0 :         css_set_count = 1;
    5005                 :          0 :         init_cgroup_root(&cgroup_dummy_root);
    5006                 :          0 :         cgroup_root_count = 1;
    5007                 :          0 :         RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
    5008                 :            : 
    5009                 :          0 :         init_cgrp_cset_link.cset = &init_css_set;
    5010                 :          0 :         init_cgrp_cset_link.cgrp = cgroup_dummy_top;
    5011                 :            :         list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
    5012                 :            :         list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
    5013                 :            : 
    5014                 :            :         /* at bootup time, we don't worry about modular subsystems */
    5015                 :            :         for_each_builtin_subsys(ss, i) {
    5016                 :            :                 BUG_ON(!ss->name);
    5017                 :            :                 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
    5018                 :            :                 BUG_ON(!ss->css_alloc);
    5019                 :            :                 BUG_ON(!ss->css_free);
    5020                 :            :                 if (ss->subsys_id != i) {
    5021                 :            :                         printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
    5022                 :            :                                ss->name, ss->subsys_id);
    5023                 :            :                         BUG();
    5024                 :            :                 }
    5025                 :            : 
    5026                 :            :                 if (ss->early_init)
    5027                 :            :                         cgroup_init_subsys(ss);
    5028                 :            :         }
    5029                 :          0 :         return 0;
    5030                 :            : }
    5031                 :            : 
    5032                 :            : /**
    5033                 :            :  * cgroup_init - cgroup initialization
    5034                 :            :  *
    5035                 :            :  * Register cgroup filesystem and /proc file, and initialize
    5036                 :            :  * any subsystems that didn't request early init.
    5037                 :            :  */
    5038                 :          0 : int __init cgroup_init(void)
    5039                 :            : {
    5040                 :            :         struct cgroup_subsys *ss;
    5041                 :            :         unsigned long key;
    5042                 :            :         int i, err;
    5043                 :            : 
    5044                 :          0 :         err = bdi_init(&cgroup_backing_dev_info);
    5045         [ #  # ]:          0 :         if (err)
    5046                 :            :                 return err;
    5047                 :            : 
    5048                 :            :         for_each_builtin_subsys(ss, i) {
    5049                 :            :                 if (!ss->early_init)
    5050                 :            :                         cgroup_init_subsys(ss);
    5051                 :            :         }
    5052                 :            : 
    5053                 :            :         /* allocate id for the dummy hierarchy */
    5054                 :          0 :         mutex_lock(&cgroup_mutex);
    5055                 :          0 :         mutex_lock(&cgroup_root_mutex);
    5056                 :            : 
    5057                 :            :         /* Add init_css_set to the hash table */
    5058                 :            :         key = css_set_hash(init_css_set.subsys);
    5059                 :            :         hash_add(css_set_table, &init_css_set.hlist, key);
    5060                 :            : 
    5061         [ #  # ]:          0 :         BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
    5062                 :            : 
    5063                 :          0 :         err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
    5064                 :            :                         0, 1, GFP_KERNEL);
    5065         [ #  # ]:          0 :         BUG_ON(err < 0);
    5066                 :            : 
    5067                 :          0 :         mutex_unlock(&cgroup_root_mutex);
    5068                 :          0 :         mutex_unlock(&cgroup_mutex);
    5069                 :            : 
    5070                 :          0 :         cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
    5071         [ #  # ]:          0 :         if (!cgroup_kobj) {
    5072                 :            :                 err = -ENOMEM;
    5073                 :            :                 goto out;
    5074                 :            :         }
    5075                 :            : 
    5076                 :          0 :         err = register_filesystem(&cgroup_fs_type);
    5077         [ #  # ]:          0 :         if (err < 0) {
    5078                 :          0 :                 kobject_put(cgroup_kobj);
    5079                 :          0 :                 goto out;
    5080                 :            :         }
    5081                 :            : 
    5082                 :            :         proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
    5083                 :            : 
    5084                 :            : out:
    5085         [ #  # ]:          0 :         if (err)
    5086                 :          0 :                 bdi_destroy(&cgroup_backing_dev_info);
    5087                 :            : 
    5088                 :          0 :         return err;
    5089                 :            : }
    5090                 :            : 
    5091                 :          0 : static int __init cgroup_wq_init(void)
    5092                 :            : {
    5093                 :            :         /*
    5094                 :            :          * There isn't much point in executing destruction path in
    5095                 :            :          * parallel.  Good chunk is serialized with cgroup_mutex anyway.
    5096                 :            :          * Use 1 for @max_active.
    5097                 :            :          *
    5098                 :            :          * We would prefer to do this in cgroup_init() above, but that
    5099                 :            :          * is called before init_workqueues(): so leave this until after.
    5100                 :            :          */
    5101                 :          0 :         cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
    5102         [ #  # ]:          0 :         BUG_ON(!cgroup_destroy_wq);
    5103                 :          0 :         return 0;
    5104                 :            : }
    5105                 :            : core_initcall(cgroup_wq_init);
    5106                 :            : 
    5107                 :            : /*
    5108                 :            :  * proc_cgroup_show()
    5109                 :            :  *  - Print task's cgroup paths into seq_file, one line for each hierarchy
    5110                 :            :  *  - Used for /proc/<pid>/cgroup.
    5111                 :            :  *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
    5112                 :            :  *    doesn't really matter if tsk->cgroup changes after we read it,
    5113                 :            :  *    and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
    5114                 :            :  *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
    5115                 :            :  *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
    5116                 :            :  *    cgroup to top_cgroup.
    5117                 :            :  */
    5118                 :            : 
    5119                 :            : /* TODO: Use a proper seq_file iterator */
    5120                 :          0 : int proc_cgroup_show(struct seq_file *m, void *v)
    5121                 :            : {
    5122                 :            :         struct pid *pid;
    5123                 :            :         struct task_struct *tsk;
    5124                 :            :         char *buf;
    5125                 :            :         int retval;
    5126                 :            :         struct cgroupfs_root *root;
    5127                 :            : 
    5128                 :            :         retval = -ENOMEM;
    5129                 :            :         buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
    5130         [ +  - ]:          2 :         if (!buf)
    5131                 :            :                 goto out;
    5132                 :            : 
    5133                 :            :         retval = -ESRCH;
    5134                 :          2 :         pid = m->private;
    5135                 :          2 :         tsk = get_pid_task(pid, PIDTYPE_PID);
    5136         [ +  - ]:          2 :         if (!tsk)
    5137                 :            :                 goto out_free;
    5138                 :            : 
    5139                 :            :         retval = 0;
    5140                 :            : 
    5141                 :          2 :         mutex_lock(&cgroup_mutex);
    5142                 :            : 
    5143         [ -  + ]:          2 :         for_each_active_root(root) {
    5144                 :            :                 struct cgroup_subsys *ss;
    5145                 :            :                 struct cgroup *cgrp;
    5146                 :            :                 int count = 0;
    5147                 :            : 
    5148                 :          0 :                 seq_printf(m, "%d:", root->hierarchy_id);
    5149         [ #  # ]:          0 :                 for_each_root_subsys(root, ss)
    5150         [ #  # ]:          0 :                         seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
    5151         [ #  # ]:          0 :                 if (strlen(root->name))
    5152         [ #  # ]:          0 :                         seq_printf(m, "%sname=%s", count ? "," : "",
    5153                 :          0 :                                    root->name);
    5154                 :          0 :                 seq_putc(m, ':');
    5155                 :          0 :                 cgrp = task_cgroup_from_root(tsk, root);
    5156                 :          0 :                 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
    5157         [ #  # ]:          0 :                 if (retval < 0)
    5158                 :            :                         goto out_unlock;
    5159                 :          0 :                 seq_puts(m, buf);
    5160                 :          0 :                 seq_putc(m, '\n');
    5161                 :            :         }
    5162                 :            : 
    5163                 :            : out_unlock:
    5164                 :          2 :         mutex_unlock(&cgroup_mutex);
    5165                 :            :         put_task_struct(tsk);
    5166                 :            : out_free:
    5167                 :          2 :         kfree(buf);
    5168                 :            : out:
    5169                 :          2 :         return retval;
    5170                 :            : }
    5171                 :            : 
    5172                 :            : /* Display information about each subsystem and each hierarchy */
    5173                 :          0 : static int proc_cgroupstats_show(struct seq_file *m, void *v)
    5174                 :            : {
    5175                 :            :         struct cgroup_subsys *ss;
    5176                 :            :         int i;
    5177                 :            : 
    5178                 :         56 :         seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
    5179                 :            :         /*
    5180                 :            :          * ideally we don't want subsystems moving around while we do this.
    5181                 :            :          * cgroup_mutex is also necessary to guarantee an atomic snapshot of
    5182                 :            :          * subsys/hierarchy state.
    5183                 :            :          */
    5184                 :         56 :         mutex_lock(&cgroup_mutex);
    5185                 :            : 
    5186                 :            :         for_each_subsys(ss, i)
    5187                 :            :                 seq_printf(m, "%s\t%d\t%d\t%d\n",
    5188                 :            :                            ss->name, ss->root->hierarchy_id,
    5189                 :            :                            ss->root->number_of_cgroups, !ss->disabled);
    5190                 :            : 
    5191                 :         56 :         mutex_unlock(&cgroup_mutex);
    5192                 :         56 :         return 0;
    5193                 :            : }
    5194                 :            : 
    5195                 :          0 : static int cgroupstats_open(struct inode *inode, struct file *file)
    5196                 :            : {
    5197                 :         56 :         return single_open(file, proc_cgroupstats_show, NULL);
    5198                 :            : }
    5199                 :            : 
    5200                 :            : static const struct file_operations proc_cgroupstats_operations = {
    5201                 :            :         .open = cgroupstats_open,
    5202                 :            :         .read = seq_read,
    5203                 :            :         .llseek = seq_lseek,
    5204                 :            :         .release = single_release,
    5205                 :            : };
    5206                 :            : 
    5207                 :            : /**
    5208                 :            :  * cgroup_fork - attach newly forked task to its parents cgroup.
    5209                 :            :  * @child: pointer to task_struct of forking parent process.
    5210                 :            :  *
    5211                 :            :  * Description: A task inherits its parent's cgroup at fork().
    5212                 :            :  *
    5213                 :            :  * A pointer to the shared css_set was automatically copied in
    5214                 :            :  * fork.c by dup_task_struct().  However, we ignore that copy, since
    5215                 :            :  * it was not made under the protection of RCU or cgroup_mutex, so
    5216                 :            :  * might no longer be a valid cgroup pointer.  cgroup_attach_task() might
    5217                 :            :  * have already changed current->cgroups, allowing the previously
    5218                 :            :  * referenced cgroup group to be removed and freed.
    5219                 :            :  *
    5220                 :            :  * At the point that cgroup_fork() is called, 'current' is the parent
    5221                 :            :  * task, and the passed argument 'child' points to the child task.
    5222                 :            :  */
    5223                 :          0 : void cgroup_fork(struct task_struct *child)
    5224                 :            : {
    5225                 :    1122967 :         task_lock(current);
    5226                 :    1122974 :         get_css_set(task_css_set(current));
    5227                 :    1122984 :         child->cgroups = current->cgroups;
    5228                 :    1122984 :         task_unlock(current);
    5229                 :    1122983 :         INIT_LIST_HEAD(&child->cg_list);
    5230                 :    1122983 : }
    5231                 :            : 
    5232                 :            : /**
    5233                 :            :  * cgroup_post_fork - called on a new task after adding it to the task list
    5234                 :            :  * @child: the task in question
    5235                 :            :  *
    5236                 :            :  * Adds the task to the list running through its css_set if necessary and
    5237                 :            :  * call the subsystem fork() callbacks.  Has to be after the task is
    5238                 :            :  * visible on the task list in case we race with the first call to
    5239                 :            :  * cgroup_task_iter_start() - to guarantee that the new task ends up on its
    5240                 :            :  * list.
    5241                 :            :  */
    5242                 :          0 : void cgroup_post_fork(struct task_struct *child)
    5243                 :            : {
    5244                 :            :         struct cgroup_subsys *ss;
    5245                 :            :         int i;
    5246                 :            : 
    5247                 :            :         /*
    5248                 :            :          * use_task_css_set_links is set to 1 before we walk the tasklist
    5249                 :            :          * under the tasklist_lock and we read it here after we added the child
    5250                 :            :          * to the tasklist under the tasklist_lock as well. If the child wasn't
    5251                 :            :          * yet in the tasklist when we walked through it from
    5252                 :            :          * cgroup_enable_task_cg_lists(), then use_task_css_set_links value
    5253                 :            :          * should be visible now due to the paired locking and barriers implied
    5254                 :            :          * by LOCK/UNLOCK: it is written before the tasklist_lock unlock
    5255                 :            :          * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
    5256                 :            :          * lock on fork.
    5257                 :            :          */
    5258         [ +  + ]:    1122972 :         if (use_task_css_set_links) {
    5259                 :      14720 :                 write_lock(&css_set_lock);
    5260                 :            :                 task_lock(child);
    5261         [ +  - ]:      14720 :                 if (list_empty(&child->cg_list))
    5262                 :      14720 :                         list_add(&child->cg_list, &task_css_set(child)->tasks);
    5263                 :            :                 task_unlock(child);
    5264                 :            :                 write_unlock(&css_set_lock);
    5265                 :            :         }
    5266                 :            : 
    5267                 :            :         /*
    5268                 :            :          * Call ss->fork().  This must happen after @child is linked on
    5269                 :            :          * css_set; otherwise, @child might change state between ->fork()
    5270                 :            :          * and addition to css_set.
    5271                 :            :          */
    5272                 :            :         if (need_forkexit_callback) {
    5273                 :            :                 /*
    5274                 :            :                  * fork/exit callbacks are supported only for builtin
    5275                 :            :                  * subsystems, and the builtin section of the subsys
    5276                 :            :                  * array is immutable, so we don't need to lock the
    5277                 :            :                  * subsys array here. On the other hand, modular section
    5278                 :            :                  * of the array can be freed at module unload, so we
    5279                 :            :                  * can't touch that.
    5280                 :            :                  */
    5281                 :            :                 for_each_builtin_subsys(ss, i)
    5282                 :            :                         if (ss->fork)
    5283                 :            :                                 ss->fork(child);
    5284                 :            :         }
    5285                 :    1122972 : }
    5286                 :            : 
    5287                 :            : /**
    5288                 :            :  * cgroup_exit - detach cgroup from exiting task
    5289                 :            :  * @tsk: pointer to task_struct of exiting process
    5290                 :            :  * @run_callback: run exit callbacks?
    5291                 :            :  *
    5292                 :            :  * Description: Detach cgroup from @tsk and release it.
    5293                 :            :  *
    5294                 :            :  * Note that cgroups marked notify_on_release force every task in
    5295                 :            :  * them to take the global cgroup_mutex mutex when exiting.
    5296                 :            :  * This could impact scaling on very large systems.  Be reluctant to
    5297                 :            :  * use notify_on_release cgroups where very high task exit scaling
    5298                 :            :  * is required on large systems.
    5299                 :            :  *
    5300                 :            :  * the_top_cgroup_hack:
    5301                 :            :  *
    5302                 :            :  *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
    5303                 :            :  *
    5304                 :            :  *    We call cgroup_exit() while the task is still competent to
    5305                 :            :  *    handle notify_on_release(), then leave the task attached to the
    5306                 :            :  *    root cgroup in each hierarchy for the remainder of its exit.
    5307                 :            :  *
    5308                 :            :  *    To do this properly, we would increment the reference count on
    5309                 :            :  *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
    5310                 :            :  *    code we would add a second cgroup function call, to drop that
    5311                 :            :  *    reference.  This would just create an unnecessary hot spot on
    5312                 :            :  *    the top_cgroup reference count, to no avail.
    5313                 :            :  *
    5314                 :            :  *    Normally, holding a reference to a cgroup without bumping its
    5315                 :            :  *    count is unsafe.   The cgroup could go away, or someone could
    5316                 :            :  *    attach us to a different cgroup, decrementing the count on
    5317                 :            :  *    the first cgroup that we never incremented.  But in this case,
    5318                 :            :  *    top_cgroup isn't going away, and either task has PF_EXITING set,
    5319                 :            :  *    which wards off any cgroup_attach_task() attempts, or task is a failed
    5320                 :            :  *    fork, never visible to cgroup_attach_task.
    5321                 :            :  */
    5322                 :          0 : void cgroup_exit(struct task_struct *tsk, int run_callbacks)
    5323                 :            : {
    5324                 :            :         struct cgroup_subsys *ss;
    5325                 :            :         struct css_set *cset;
    5326                 :            :         int i;
    5327                 :            : 
    5328                 :            :         /*
    5329                 :            :          * Unlink from the css_set task list if necessary.
    5330                 :            :          * Optimistically check cg_list before taking
    5331                 :            :          * css_set_lock
    5332                 :            :          */
    5333         [ +  + ]:    1122968 :         if (!list_empty(&tsk->cg_list)) {
    5334                 :      14726 :                 write_lock(&css_set_lock);
    5335         [ +  - ]:      14726 :                 if (!list_empty(&tsk->cg_list))
    5336                 :            :                         list_del_init(&tsk->cg_list);
    5337                 :            :                 write_unlock(&css_set_lock);
    5338                 :            :         }
    5339                 :            : 
    5340                 :            :         /* Reassign the task to the init_css_set. */
    5341                 :            :         task_lock(tsk);
    5342                 :            :         cset = task_css_set(tsk);
    5343                 :    1122948 :         RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
    5344                 :            : 
    5345                 :            :         if (run_callbacks && need_forkexit_callback) {
    5346                 :            :                 /*
    5347                 :            :                  * fork/exit callbacks are supported only for builtin
    5348                 :            :                  * subsystems, see cgroup_post_fork() for details.
    5349                 :            :                  */
    5350                 :            :                 for_each_builtin_subsys(ss, i) {
    5351                 :            :                         if (ss->exit) {
    5352                 :            :                                 struct cgroup_subsys_state *old_css = cset->subsys[i];
    5353                 :            :                                 struct cgroup_subsys_state *css = task_css(tsk, i);
    5354                 :            : 
    5355                 :            :                                 ss->exit(css, old_css, tsk);
    5356                 :            :                         }
    5357                 :            :                 }
    5358                 :            :         }
    5359                 :            :         task_unlock(tsk);
    5360                 :            : 
    5361                 :            :         put_css_set_taskexit(cset);
    5362                 :    1122983 : }
    5363                 :            : 
    5364                 :          0 : static void check_for_release(struct cgroup *cgrp)
    5365                 :            : {
    5366 [ +  - ][ -  + ]:          2 :         if (cgroup_is_releasable(cgrp) &&
    5367         [ #  # ]:          0 :             list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) {
    5368                 :            :                 /*
    5369                 :            :                  * Control Group is currently removeable. If it's not
    5370                 :            :                  * already queued for a userspace notification, queue
    5371                 :            :                  * it now
    5372                 :            :                  */
    5373                 :            :                 int need_schedule_work = 0;
    5374                 :            : 
    5375                 :          0 :                 raw_spin_lock(&release_list_lock);
    5376 [ #  # ][ #  # ]:          0 :                 if (!cgroup_is_dead(cgrp) &&
    5377                 :          0 :                     list_empty(&cgrp->release_list)) {
    5378                 :            :                         list_add(&cgrp->release_list, &release_list);
    5379                 :            :                         need_schedule_work = 1;
    5380                 :            :                 }
    5381                 :            :                 raw_spin_unlock(&release_list_lock);
    5382         [ #  # ]:          0 :                 if (need_schedule_work)
    5383                 :            :                         schedule_work(&release_agent_work);
    5384                 :            :         }
    5385                 :          2 : }
    5386                 :            : 
    5387                 :            : /*
    5388                 :            :  * Notify userspace when a cgroup is released, by running the
    5389                 :            :  * configured release agent with the name of the cgroup (path
    5390                 :            :  * relative to the root of cgroup file system) as the argument.
    5391                 :            :  *
    5392                 :            :  * Most likely, this user command will try to rmdir this cgroup.
    5393                 :            :  *
    5394                 :            :  * This races with the possibility that some other task will be
    5395                 :            :  * attached to this cgroup before it is removed, or that some other
    5396                 :            :  * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
    5397                 :            :  * The presumed 'rmdir' will fail quietly if this cgroup is no longer
    5398                 :            :  * unused, and this cgroup will be reprieved from its death sentence,
    5399                 :            :  * to continue to serve a useful existence.  Next time it's released,
    5400                 :            :  * we will get notified again, if it still has 'notify_on_release' set.
    5401                 :            :  *
    5402                 :            :  * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
    5403                 :            :  * means only wait until the task is successfully execve()'d.  The
    5404                 :            :  * separate release agent task is forked by call_usermodehelper(),
    5405                 :            :  * then control in this thread returns here, without waiting for the
    5406                 :            :  * release agent task.  We don't bother to wait because the caller of
    5407                 :            :  * this routine has no use for the exit status of the release agent
    5408                 :            :  * task, so no sense holding our caller up for that.
    5409                 :            :  */
    5410                 :          0 : static void cgroup_release_agent(struct work_struct *work)
    5411                 :            : {
    5412         [ #  # ]:          0 :         BUG_ON(work != &release_agent_work);
    5413                 :          0 :         mutex_lock(&cgroup_mutex);
    5414                 :          0 :         raw_spin_lock(&release_list_lock);
    5415         [ #  # ]:          0 :         while (!list_empty(&release_list)) {
    5416                 :            :                 char *argv[3], *envp[3];
    5417                 :            :                 int i;
    5418                 :            :                 char *pathbuf = NULL, *agentbuf = NULL;
    5419                 :          0 :                 struct cgroup *cgrp = list_entry(release_list.next,
    5420                 :            :                                                     struct cgroup,
    5421                 :            :                                                     release_list);
    5422                 :          0 :                 list_del_init(&cgrp->release_list);
    5423                 :            :                 raw_spin_unlock(&release_list_lock);
    5424                 :            :                 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
    5425         [ #  # ]:          0 :                 if (!pathbuf)
    5426                 :            :                         goto continue_free;
    5427         [ #  # ]:          0 :                 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
    5428                 :            :                         goto continue_free;
    5429                 :          0 :                 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
    5430         [ #  # ]:          0 :                 if (!agentbuf)
    5431                 :            :                         goto continue_free;
    5432                 :            : 
    5433                 :            :                 i = 0;
    5434                 :          0 :                 argv[i++] = agentbuf;
    5435                 :          0 :                 argv[i++] = pathbuf;
    5436                 :          0 :                 argv[i] = NULL;
    5437                 :            : 
    5438                 :            :                 i = 0;
    5439                 :            :                 /* minimal command environment */
    5440                 :          0 :                 envp[i++] = "HOME=/";
    5441                 :          0 :                 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
    5442                 :          0 :                 envp[i] = NULL;
    5443                 :            : 
    5444                 :            :                 /* Drop the lock while we invoke the usermode helper,
    5445                 :            :                  * since the exec could involve hitting disk and hence
    5446                 :            :                  * be a slow process */
    5447                 :          0 :                 mutex_unlock(&cgroup_mutex);
    5448                 :          0 :                 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
    5449                 :          0 :                 mutex_lock(&cgroup_mutex);
    5450                 :            :  continue_free:
    5451                 :          0 :                 kfree(pathbuf);
    5452                 :          0 :                 kfree(agentbuf);
    5453                 :          0 :                 raw_spin_lock(&release_list_lock);
    5454                 :            :         }
    5455                 :            :         raw_spin_unlock(&release_list_lock);
    5456                 :          0 :         mutex_unlock(&cgroup_mutex);
    5457                 :          0 : }
    5458                 :            : 
    5459                 :          0 : static int __init cgroup_disable(char *str)
    5460                 :            : {
    5461                 :            :         struct cgroup_subsys *ss;
    5462                 :            :         char *token;
    5463                 :            :         int i;
    5464                 :            : 
    5465         [ #  # ]:          0 :         while ((token = strsep(&str, ",")) != NULL) {
    5466                 :            :                 if (!*token)
    5467                 :            :                         continue;
    5468                 :            : 
    5469                 :            :                 /*
    5470                 :            :                  * cgroup_disable, being at boot time, can't know about
    5471                 :            :                  * module subsystems, so we don't worry about them.
    5472                 :            :                  */
    5473                 :            :                 for_each_builtin_subsys(ss, i) {
    5474                 :            :                         if (!strcmp(token, ss->name)) {
    5475                 :            :                                 ss->disabled = 1;
    5476                 :            :                                 printk(KERN_INFO "Disabling %s control group"
    5477                 :            :                                         " subsystem\n", ss->name);
    5478                 :            :                                 break;
    5479                 :            :                         }
    5480                 :            :                 }
    5481                 :            :         }
    5482                 :          0 :         return 1;
    5483                 :            : }
    5484                 :            : __setup("cgroup_disable=", cgroup_disable);
    5485                 :            : 
    5486                 :            : /**
    5487                 :            :  * css_from_dir - get corresponding css from the dentry of a cgroup dir
    5488                 :            :  * @dentry: directory dentry of interest
    5489                 :            :  * @ss: subsystem of interest
    5490                 :            :  *
    5491                 :            :  * Must be called under RCU read lock.  The caller is responsible for
    5492                 :            :  * pinning the returned css if it needs to be accessed outside the RCU
    5493                 :            :  * critical section.
    5494                 :            :  */
    5495                 :          0 : struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
    5496                 :            :                                          struct cgroup_subsys *ss)
    5497                 :            : {
    5498                 :            :         struct cgroup *cgrp;
    5499                 :            : 
    5500                 :            :         WARN_ON_ONCE(!rcu_read_lock_held());
    5501                 :            : 
    5502                 :            :         /* is @dentry a cgroup dir? */
    5503 [ #  # ][ #  # ]:          0 :         if (!dentry->d_inode ||
         [ #  # ][ #  # ]
    5504                 :          0 :             dentry->d_inode->i_op != &cgroup_dir_inode_operations)
    5505                 :            :                 return ERR_PTR(-EBADF);
    5506                 :            : 
    5507                 :            :         cgrp = __d_cgrp(dentry);
    5508 [ #  # ][ #  # ]:          0 :         return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT);
    5509                 :            : }
    5510                 :            : 
    5511                 :            : /**
    5512                 :            :  * css_from_id - lookup css by id
    5513                 :            :  * @id: the cgroup id
    5514                 :            :  * @ss: cgroup subsys to be looked into
    5515                 :            :  *
    5516                 :            :  * Returns the css if there's valid one with @id, otherwise returns NULL.
    5517                 :            :  * Should be called under rcu_read_lock().
    5518                 :            :  */
    5519                 :          0 : struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
    5520                 :            : {
    5521                 :            :         struct cgroup *cgrp;
    5522                 :            : 
    5523                 :            :         rcu_lockdep_assert(rcu_read_lock_held() ||
    5524                 :            :                            lockdep_is_held(&cgroup_mutex),
    5525                 :            :                            "css_from_id() needs proper protection");
    5526                 :            : 
    5527                 :          0 :         cgrp = idr_find(&ss->root->cgroup_idr, id);
    5528         [ #  # ]:          0 :         if (cgrp)
    5529                 :          0 :                 return cgroup_css(cgrp, ss);
    5530                 :            :         return NULL;
    5531                 :            : }
    5532                 :            : 
    5533                 :            : #ifdef CONFIG_CGROUP_DEBUG
    5534                 :            : static struct cgroup_subsys_state *
    5535                 :            : debug_css_alloc(struct cgroup_subsys_state *parent_css)
    5536                 :            : {
    5537                 :            :         struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
    5538                 :            : 
    5539                 :            :         if (!css)
    5540                 :            :                 return ERR_PTR(-ENOMEM);
    5541                 :            : 
    5542                 :            :         return css;
    5543                 :            : }
    5544                 :            : 
    5545                 :            : static void debug_css_free(struct cgroup_subsys_state *css)
    5546                 :            : {
    5547                 :            :         kfree(css);
    5548                 :            : }
    5549                 :            : 
    5550                 :            : static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
    5551                 :            :                                 struct cftype *cft)
    5552                 :            : {
    5553                 :            :         return cgroup_task_count(css->cgroup);
    5554                 :            : }
    5555                 :            : 
    5556                 :            : static u64 current_css_set_read(struct cgroup_subsys_state *css,
    5557                 :            :                                 struct cftype *cft)
    5558                 :            : {
    5559                 :            :         return (u64)(unsigned long)current->cgroups;
    5560                 :            : }
    5561                 :            : 
    5562                 :            : static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
    5563                 :            :                                          struct cftype *cft)
    5564                 :            : {
    5565                 :            :         u64 count;
    5566                 :            : 
    5567                 :            :         rcu_read_lock();
    5568                 :            :         count = atomic_read(&task_css_set(current)->refcount);
    5569                 :            :         rcu_read_unlock();
    5570                 :            :         return count;
    5571                 :            : }
    5572                 :            : 
    5573                 :            : static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
    5574                 :            :                                          struct cftype *cft,
    5575                 :            :                                          struct seq_file *seq)
    5576                 :            : {
    5577                 :            :         struct cgrp_cset_link *link;
    5578                 :            :         struct css_set *cset;
    5579                 :            : 
    5580                 :            :         read_lock(&css_set_lock);
    5581                 :            :         rcu_read_lock();
    5582                 :            :         cset = rcu_dereference(current->cgroups);
    5583                 :            :         list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
    5584                 :            :                 struct cgroup *c = link->cgrp;
    5585                 :            :                 const char *name;
    5586                 :            : 
    5587                 :            :                 if (c->dentry)
    5588                 :            :                         name = c->dentry->d_name.name;
    5589                 :            :                 else
    5590                 :            :                         name = "?";
    5591                 :            :                 seq_printf(seq, "Root %d group %s\n",
    5592                 :            :                            c->root->hierarchy_id, name);
    5593                 :            :         }
    5594                 :            :         rcu_read_unlock();
    5595                 :            :         read_unlock(&css_set_lock);
    5596                 :            :         return 0;
    5597                 :            : }
    5598                 :            : 
    5599                 :            : #define MAX_TASKS_SHOWN_PER_CSS 25
    5600                 :            : static int cgroup_css_links_read(struct cgroup_subsys_state *css,
    5601                 :            :                                  struct cftype *cft, struct seq_file *seq)
    5602                 :            : {
    5603                 :            :         struct cgrp_cset_link *link;
    5604                 :            : 
    5605                 :            :         read_lock(&css_set_lock);
    5606                 :            :         list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
    5607                 :            :                 struct css_set *cset = link->cset;
    5608                 :            :                 struct task_struct *task;
    5609                 :            :                 int count = 0;
    5610                 :            :                 seq_printf(seq, "css_set %p\n", cset);
    5611                 :            :                 list_for_each_entry(task, &cset->tasks, cg_list) {
    5612                 :            :                         if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
    5613                 :            :                                 seq_puts(seq, "  ...\n");
    5614                 :            :                                 break;
    5615                 :            :                         } else {
    5616                 :            :                                 seq_printf(seq, "  task %d\n",
    5617                 :            :                                            task_pid_vnr(task));
    5618                 :            :                         }
    5619                 :            :                 }
    5620                 :            :         }
    5621                 :            :         read_unlock(&css_set_lock);
    5622                 :            :         return 0;
    5623                 :            : }
    5624                 :            : 
    5625                 :            : static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
    5626                 :            : {
    5627                 :            :         return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
    5628                 :            : }
    5629                 :            : 
    5630                 :            : static struct cftype debug_files[] =  {
    5631                 :            :         {
    5632                 :            :                 .name = "taskcount",
    5633                 :            :                 .read_u64 = debug_taskcount_read,
    5634                 :            :         },
    5635                 :            : 
    5636                 :            :         {
    5637                 :            :                 .name = "current_css_set",
    5638                 :            :                 .read_u64 = current_css_set_read,
    5639                 :            :         },
    5640                 :            : 
    5641                 :            :         {
    5642                 :            :                 .name = "current_css_set_refcount",
    5643                 :            :                 .read_u64 = current_css_set_refcount_read,
    5644                 :            :         },
    5645                 :            : 
    5646                 :            :         {
    5647                 :            :                 .name = "current_css_set_cg_links",
    5648                 :            :                 .read_seq_string = current_css_set_cg_links_read,
    5649                 :            :         },
    5650                 :            : 
    5651                 :            :         {
    5652                 :            :                 .name = "cgroup_css_links",
    5653                 :            :                 .read_seq_string = cgroup_css_links_read,
    5654                 :            :         },
    5655                 :            : 
    5656                 :            :         {
    5657                 :            :                 .name = "releasable",
    5658                 :            :                 .read_u64 = releasable_read,
    5659                 :            :         },
    5660                 :            : 
    5661                 :            :         { }     /* terminate */
    5662                 :            : };
    5663                 :            : 
    5664                 :            : struct cgroup_subsys debug_subsys = {
    5665                 :            :         .name = "debug",
    5666                 :            :         .css_alloc = debug_css_alloc,
    5667                 :            :         .css_free = debug_css_free,
    5668                 :            :         .subsys_id = debug_subsys_id,
    5669                 :            :         .base_cftypes = debug_files,
    5670                 :            : };
    5671                 :            : #endif /* CONFIG_CGROUP_DEBUG */

Generated by: LCOV version 1.9