LCOV - coverage.info - kernel/sched/proc.c

LCOV - code coverage report

Current view:	top level - kernel/sched - proc.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	81	124	65.3 %
Date:	2014-02-18	Functions:	11	14	78.6 %
		Branches:	41	56	73.2 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *  kernel/sched/proc.c
       3                 :            :  *
       4                 :            :  *  Kernel load calculations, forked from sched/core.c
       5                 :            :  */
       6                 :            : 
       7                 :            : #include <linux/export.h>
       8                 :            : 
       9                 :            : #include "sched.h"
      10                 :            : 
      11                 :          0 : unsigned long this_cpu_load(void)
      12                 :            : {
      13                 :          0 :         struct rq *this = this_rq();
      14                 :          0 :         return this->cpu_load[0];
      15                 :            : }
      16                 :            : 
      17                 :            : 
      18                 :            : /*
      19                 :            :  * Global load-average calculations
      20                 :            :  *
      21                 :            :  * We take a distributed and async approach to calculating the global load-avg
      22                 :            :  * in order to minimize overhead.
      23                 :            :  *
      24                 :            :  * The global load average is an exponentially decaying average of nr_running +
      25                 :            :  * nr_uninterruptible.
      26                 :            :  *
      27                 :            :  * Once every LOAD_FREQ:
      28                 :            :  *
      29                 :            :  *   nr_active = 0;
      30                 :            :  *   for_each_possible_cpu(cpu)
      31                 :            :  *      nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
      32                 :            :  *
      33                 :            :  *   avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
      34                 :            :  *
      35                 :            :  * Due to a number of reasons the above turns in the mess below:
      36                 :            :  *
      37                 :            :  *  - for_each_possible_cpu() is prohibitively expensive on machines with
      38                 :            :  *    serious number of cpus, therefore we need to take a distributed approach
      39                 :            :  *    to calculating nr_active.
      40                 :            :  *
      41                 :            :  *        \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
      42                 :            :  *                      = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
      43                 :            :  *
      44                 :            :  *    So assuming nr_active := 0 when we start out -- true per definition, we
      45                 :            :  *    can simply take per-cpu deltas and fold those into a global accumulate
      46                 :            :  *    to obtain the same result. See calc_load_fold_active().
      47                 :            :  *
      48                 :            :  *    Furthermore, in order to avoid synchronizing all per-cpu delta folding
      49                 :            :  *    across the machine, we assume 10 ticks is sufficient time for every
      50                 :            :  *    cpu to have completed this task.
      51                 :            :  *
      52                 :            :  *    This places an upper-bound on the IRQ-off latency of the machine. Then
      53                 :            :  *    again, being late doesn't loose the delta, just wrecks the sample.
      54                 :            :  *
      55                 :            :  *  - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
      56                 :            :  *    this would add another cross-cpu cacheline miss and atomic operation
      57                 :            :  *    to the wakeup path. Instead we increment on whatever cpu the task ran
      58                 :            :  *    when it went into uninterruptible state and decrement on whatever cpu
      59                 :            :  *    did the wakeup. This means that only the sum of nr_uninterruptible over
      60                 :            :  *    all cpus yields the correct result.
      61                 :            :  *
      62                 :            :  *  This covers the NO_HZ=n code, for extra head-aches, see the comment below.
      63                 :            :  */
      64                 :            : 
      65                 :            : /* Variables and functions for calc_load */
      66                 :            : atomic_long_t calc_load_tasks;
      67                 :            : unsigned long calc_load_update;
      68                 :            : unsigned long avenrun[3];
      69                 :            : EXPORT_SYMBOL(avenrun); /* should be removed */
      70                 :            : 
      71                 :            : /**
      72                 :            :  * get_avenrun - get the load average array
      73                 :            :  * @loads:      pointer to dest load array
      74                 :            :  * @offset:     offset to add
      75                 :            :  * @shift:      shift count to shift the result left
      76                 :            :  *
      77                 :            :  * These values are estimates at best, so no need for locking.
      78                 :            :  */
      79                 :          0 : void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
      80                 :            : {
      81                 :         29 :         loads[0] = (avenrun[0] + offset) << shift;
      82                 :         29 :         loads[1] = (avenrun[1] + offset) << shift;
      83                 :         29 :         loads[2] = (avenrun[2] + offset) << shift;
      84                 :         29 : }
      85                 :            : 
      86                 :          0 : long calc_load_fold_active(struct rq *this_rq)
      87                 :            : {
      88                 :            :         long nr_active, delta = 0;
      89                 :            : 
      90                 :    2017209 :         nr_active = this_rq->nr_running;
      91                 :    2017209 :         nr_active += (long) this_rq->nr_uninterruptible;
      92                 :            : 
      93 [ +  + ][ +  + ]:    2017209 :         if (nr_active != this_rq->calc_load_active) {
                 [ #  # ]
      94                 :     534177 :                 delta = nr_active - this_rq->calc_load_active;
      95                 :     534177 :                 this_rq->calc_load_active = nr_active;
      96                 :            :         }
      97                 :            : 
      98                 :          0 :         return delta;
      99                 :            : }
     100                 :            : 
     101                 :            : /*
     102                 :            :  * a1 = a0 * e + a * (1 - e)
     103                 :            :  */
     104                 :            : static unsigned long
     105                 :            : calc_load(unsigned long load, unsigned long exp, unsigned long active)
     106                 :            : {
     107                 :      52236 :         load *= exp;
     108                 :      52236 :         load += active * (FIXED_1 - exp);
     109                 :      52236 :         load += 1UL << (FSHIFT - 1);
     110                 :      52236 :         return load >> FSHIFT;
     111                 :            : }
     112                 :            : 
     113                 :            : #ifdef CONFIG_NO_HZ_COMMON
     114                 :            : /*
     115                 :            :  * Handle NO_HZ for the global load-average.
     116                 :            :  *
     117                 :            :  * Since the above described distributed algorithm to compute the global
     118                 :            :  * load-average relies on per-cpu sampling from the tick, it is affected by
     119                 :            :  * NO_HZ.
     120                 :            :  *
     121                 :            :  * The basic idea is to fold the nr_active delta into a global idle-delta upon
     122                 :            :  * entering NO_HZ state such that we can include this as an 'extra' cpu delta
     123                 :            :  * when we read the global state.
     124                 :            :  *
     125                 :            :  * Obviously reality has to ruin such a delightfully simple scheme:
     126                 :            :  *
     127                 :            :  *  - When we go NO_HZ idle during the window, we can negate our sample
     128                 :            :  *    contribution, causing under-accounting.
     129                 :            :  *
     130                 :            :  *    We avoid this by keeping two idle-delta counters and flipping them
     131                 :            :  *    when the window starts, thus separating old and new NO_HZ load.
     132                 :            :  *
     133                 :            :  *    The only trick is the slight shift in index flip for read vs write.
     134                 :            :  *
     135                 :            :  *        0s            5s            10s           15s
     136                 :            :  *          +10           +10           +10           +10
     137                 :            :  *        |-|-----------|-|-----------|-|-----------|-|
     138                 :            :  *    r:0 0 1           1 0           0 1           1 0
     139                 :            :  *    w:0 1 1           0 0           1 1           0 0
     140                 :            :  *
     141                 :            :  *    This ensures we'll fold the old idle contribution in this window while
     142                 :            :  *    accumlating the new one.
     143                 :            :  *
     144                 :            :  *  - When we wake up from NO_HZ idle during the window, we push up our
     145                 :            :  *    contribution, since we effectively move our sample point to a known
     146                 :            :  *    busy state.
     147                 :            :  *
     148                 :            :  *    This is solved by pushing the window forward, and thus skipping the
     149                 :            :  *    sample, for this cpu (effectively using the idle-delta for this cpu which
     150                 :            :  *    was in effect at the time the window opened). This also solves the issue
     151                 :            :  *    of having to deal with a cpu having been in NOHZ idle for multiple
     152                 :            :  *    LOAD_FREQ intervals.
     153                 :            :  *
     154                 :            :  * When making the ILB scale, we should try to pull this in as well.
     155                 :            :  */
     156                 :            : static atomic_long_t calc_load_idle[2];
     157                 :            : static int calc_load_idx;
     158                 :            : 
     159                 :            : static inline int calc_load_write_idx(void)
     160                 :            : {
     161                 :     532405 :         int idx = calc_load_idx;
     162                 :            : 
     163                 :            :         /*
     164                 :            :          * See calc_global_nohz(), if we observe the new index, we also
     165                 :            :          * need to observe the new update time.
     166                 :            :          */
     167                 :     532405 :         smp_rmb();
     168                 :            : 
     169                 :            :         /*
     170                 :            :          * If the folding window started, make sure we start writing in the
     171                 :            :          * next idle-delta.
     172                 :            :          */
     173         [ +  + ]:     532396 :         if (!time_before(jiffies, calc_load_update))
     174                 :      11094 :                 idx++;
     175                 :            : 
     176                 :     532396 :         return idx & 1;
     177                 :            : }
     178                 :            : 
     179                 :            : static inline int calc_load_read_idx(void)
     180                 :            : {
     181                 :      17412 :         return calc_load_idx & 1;
     182                 :            : }
     183                 :            : 
     184                 :          0 : void calc_load_enter_idle(void)
     185                 :            : {
     186                 :    3979896 :         struct rq *this_rq = this_rq();
     187                 :            :         long delta;
     188                 :            : 
     189                 :            :         /*
     190                 :            :          * We're going into NOHZ mode, if there's any pending delta, fold it
     191                 :            :          * into the pending idle delta.
     192                 :            :          */
     193                 :            :         delta = calc_load_fold_active(this_rq);
     194         [ +  + ]:    1989948 :         if (delta) {
     195                 :            :                 int idx = calc_load_write_idx();
     196                 :     532396 :                 atomic_long_add(delta, &calc_load_idle[idx]);
     197                 :            :         }
     198                 :         27 : }
     199                 :            : 
     200                 :          0 : void calc_load_exit_idle(void)
     201                 :            : {
     202                 :    3980098 :         struct rq *this_rq = this_rq();
     203                 :            : 
     204                 :            :         /*
     205                 :            :          * If we're still before the sample window, we're done.
     206                 :            :          */
     207         [ +  + ]:    1990049 :         if (time_before(jiffies, this_rq->calc_load_update))
     208                 :          0 :                 return;
     209                 :            : 
     210                 :            :         /*
     211                 :            :          * We woke inside or after the sample window, this means we're already
     212                 :            :          * accounted through the nohz accounting, so skip the entire deal and
     213                 :            :          * sync up for the next window.
     214                 :            :          */
     215                 :       4299 :         this_rq->calc_load_update = calc_load_update;
     216         [ +  - ]:       4299 :         if (time_before(jiffies, this_rq->calc_load_update + 10))
     217                 :       4299 :                 this_rq->calc_load_update += LOAD_FREQ;
     218                 :            : }
     219                 :            : 
     220                 :            : static long calc_load_fold_idle(void)
     221                 :            : {
     222                 :            :         int idx = calc_load_read_idx();
     223                 :            :         long delta = 0;
     224                 :            : 
     225         [ +  + ]:      17412 :         if (atomic_long_read(&calc_load_idle[idx]))
     226                 :       1586 :                 delta = atomic_long_xchg(&calc_load_idle[idx], 0);
     227                 :            : 
     228                 :            :         return delta;
     229                 :            : }
     230                 :            : 
     231                 :            : /**
     232                 :            :  * fixed_power_int - compute: x^n, in O(log n) time
     233                 :            :  *
     234                 :            :  * @x:         base of the power
     235                 :            :  * @frac_bits: fractional bits of @x
     236                 :            :  * @n:         power to raise @x to.
     237                 :            :  *
     238                 :            :  * By exploiting the relation between the definition of the natural power
     239                 :            :  * function: x^n := x*x*...*x (x multiplied by itself for n times), and
     240                 :            :  * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
     241                 :            :  * (where: n_i \elem {0, 1}, the binary vector representing n),
     242                 :            :  * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
     243                 :            :  * of course trivially computable in O(log_2 n), the length of our binary
     244                 :            :  * vector.
     245                 :            :  */
     246                 :            : static unsigned long
     247                 :          0 : fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
     248                 :            : {
     249                 :          0 :         unsigned long result = 1UL << frac_bits;
     250                 :            : 
     251         [ #  # ]:          0 :         if (n) for (;;) {
     252         [ #  # ]:          0 :                 if (n & 1) {
     253                 :          0 :                         result *= x;
     254                 :          0 :                         result += 1UL << (frac_bits - 1);
     255                 :          0 :                         result >>= frac_bits;
     256                 :            :                 }
     257                 :          0 :                 n >>= 1;
     258         [ #  # ]:          0 :                 if (!n)
     259                 :            :                         break;
     260                 :          0 :                 x *= x;
     261                 :          0 :                 x += 1UL << (frac_bits - 1);
     262                 :          0 :                 x >>= frac_bits;
     263                 :          0 :         }
     264                 :            : 
     265                 :          0 :         return result;
     266                 :            : }
     267                 :            : 
     268                 :            : /*
     269                 :            :  * a1 = a0 * e + a * (1 - e)
     270                 :            :  *
     271                 :            :  * a2 = a1 * e + a * (1 - e)
     272                 :            :  *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
     273                 :            :  *    = a0 * e^2 + a * (1 - e) * (1 + e)
     274                 :            :  *
     275                 :            :  * a3 = a2 * e + a * (1 - e)
     276                 :            :  *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
     277                 :            :  *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
     278                 :            :  *
     279                 :            :  *  ...
     280                 :            :  *
     281                 :            :  * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
     282                 :            :  *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
     283                 :            :  *    = a0 * e^n + a * (1 - e^n)
     284                 :            :  *
     285                 :            :  * [1] application of the geometric series:
     286                 :            :  *
     287                 :            :  *              n         1 - x^(n+1)
     288                 :            :  *     S_n := \Sum x^i = -------------
     289                 :            :  *             i=0          1 - x
     290                 :            :  */
     291                 :            : static unsigned long
     292                 :            : calc_load_n(unsigned long load, unsigned long exp,
     293                 :            :             unsigned long active, unsigned int n)
     294                 :            : {
     295                 :            : 
     296                 :          0 :         return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
     297                 :            : }
     298                 :            : 
     299                 :            : /*
     300                 :            :  * NO_HZ can leave us missing all per-cpu ticks calling
     301                 :            :  * calc_load_account_active(), but since an idle CPU folds its delta into
     302                 :            :  * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
     303                 :            :  * in the pending idle delta if our idle period crossed a load cycle boundary.
     304                 :            :  *
     305                 :            :  * Once we've updated the global active value, we need to apply the exponential
     306                 :            :  * weights adjusted to the number of cycles missed.
     307                 :            :  */
     308                 :          0 : static void calc_global_nohz(void)
     309                 :            : {
     310                 :            :         long delta, active, n;
     311                 :            : 
     312         [ -  + ]:      17412 :         if (!time_before(jiffies, calc_load_update + 10)) {
     313                 :            :                 /*
     314                 :            :                  * Catch-up, fold however many we are behind still
     315                 :            :                  */
     316                 :          0 :                 delta = jiffies - calc_load_update - 10;
     317                 :          0 :                 n = 1 + (delta / LOAD_FREQ);
     318                 :            : 
     319                 :            :                 active = atomic_long_read(&calc_load_tasks);
     320         [ #  # ]:          0 :                 active = active > 0 ? active * FIXED_1 : 0;
     321                 :            : 
     322                 :          0 :                 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
     323                 :          0 :                 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
     324                 :          0 :                 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
     325                 :            : 
     326                 :          0 :                 calc_load_update += n * LOAD_FREQ;
     327                 :            :         }
     328                 :            : 
     329                 :            :         /*
     330                 :            :          * Flip the idle index...
     331                 :            :          *
     332                 :            :          * Make sure we first write the new time then flip the index, so that
     333                 :            :          * calc_load_write_idx() will see the new time when it reads the new
     334                 :            :          * index, this avoids a double flip messing things up.
     335                 :            :          */
     336                 :      17412 :         smp_wmb();
     337                 :      17412 :         calc_load_idx++;
     338                 :      17412 : }
     339                 :            : #else /* !CONFIG_NO_HZ_COMMON */
     340                 :            : 
     341                 :            : static inline long calc_load_fold_idle(void) { return 0; }
     342                 :            : static inline void calc_global_nohz(void) { }
     343                 :            : 
     344                 :            : #endif /* CONFIG_NO_HZ_COMMON */
     345                 :            : 
     346                 :            : /*
     347                 :            :  * calc_load - update the avenrun load estimates 10 ticks after the
     348                 :            :  * CPUs have updated calc_load_tasks.
     349                 :            :  */
     350                 :          0 : void calc_global_load(unsigned long ticks)
     351                 :            : {
     352                 :            :         long active, delta;
     353                 :            : 
     354         [ +  + ]:    7264402 :         if (time_before(jiffies, calc_load_update + 10))
     355                 :    7264402 :                 return;
     356                 :            : 
     357                 :            :         /*
     358                 :            :          * Fold the 'old' idle-delta to include all NO_HZ cpus.
     359                 :            :          */
     360                 :            :         delta = calc_load_fold_idle();
     361         [ +  + ]:      17412 :         if (delta)
     362                 :            :                 atomic_long_add(delta, &calc_load_tasks);
     363                 :            : 
     364                 :            :         active = atomic_long_read(&calc_load_tasks);
     365         [ +  + ]:    7281814 :         active = active > 0 ? active * FIXED_1 : 0;
     366                 :            : 
     367                 :      34824 :         avenrun[0] = calc_load(avenrun[0], EXP_1, active);
     368                 :      34824 :         avenrun[1] = calc_load(avenrun[1], EXP_5, active);
     369                 :      34824 :         avenrun[2] = calc_load(avenrun[2], EXP_15, active);
     370                 :            : 
     371                 :      17412 :         calc_load_update += LOAD_FREQ;
     372                 :            : 
     373                 :            :         /*
     374                 :            :          * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
     375                 :            :          */
     376                 :      17412 :         calc_global_nohz();
     377                 :            : }
     378                 :            : 
     379                 :            : /*
     380                 :            :  * Called from update_cpu_load() to periodically update this CPU's
     381                 :            :  * active count.
     382                 :            :  */
     383                 :          0 : static void calc_load_account_active(struct rq *this_rq)
     384                 :            : {
     385                 :            :         long delta;
     386                 :            : 
     387         [ +  + ]:    7847846 :         if (time_before(jiffies, this_rq->calc_load_update))
     388                 :    7847853 :                 return;
     389                 :            : 
     390                 :            :         delta  = calc_load_fold_active(this_rq);
     391            [ + ]:      27261 :         if (delta)
     392                 :            :                 atomic_long_add(delta, &calc_load_tasks);
     393                 :            : 
     394                 :      27268 :         this_rq->calc_load_update += LOAD_FREQ;
     395                 :            : }
     396                 :            : 
     397                 :            : /*
     398                 :            :  * End of global load-average stuff
     399                 :            :  */
     400                 :            : 
     401                 :            : /*
     402                 :            :  * The exact cpuload at various idx values, calculated at every tick would be
     403                 :            :  * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
     404                 :            :  *
     405                 :            :  * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
     406                 :            :  * on nth tick when cpu may be busy, then we have:
     407                 :            :  * load = ((2^idx - 1) / 2^idx)^(n-1) * load
     408                 :            :  * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
     409                 :            :  *
     410                 :            :  * decay_load_missed() below does efficient calculation of
     411                 :            :  * load = ((2^idx - 1) / 2^idx)^(n-1) * load
     412                 :            :  * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
     413                 :            :  *
     414                 :            :  * The calculation is approximated on a 128 point scale.
     415                 :            :  * degrade_zero_ticks is the number of ticks after which load at any
     416                 :            :  * particular idx is approximated to be zero.
     417                 :            :  * degrade_factor is a precomputed table, a row for each load idx.
     418                 :            :  * Each column corresponds to degradation factor for a power of two ticks,
     419                 :            :  * based on 128 point scale.
     420                 :            :  * Example:
     421                 :            :  * row 2, col 3 (=12) says that the degradation at load idx 2 after
     422                 :            :  * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
     423                 :            :  *
     424                 :            :  * With this power of 2 load factors, we can degrade the load n times
     425                 :            :  * by looking at 1 bits in n and doing as many mult/shift instead of
     426                 :            :  * n mult/shifts needed by the exact degradation.
     427                 :            :  */
     428                 :            : #define DEGRADE_SHIFT           7
     429                 :            : static const unsigned char
     430                 :            :                 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
     431                 :            : static const unsigned char
     432                 :            :                 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
     433                 :            :                                         {0, 0, 0, 0, 0, 0, 0, 0},
     434                 :            :                                         {64, 32, 8, 0, 0, 0, 0, 0},
     435                 :            :                                         {96, 72, 40, 12, 1, 0, 0},
     436                 :            :                                         {112, 98, 75, 43, 15, 1, 0},
     437                 :            :                                         {120, 112, 98, 76, 45, 16, 2} };
     438                 :            : 
     439                 :            : /*
     440                 :            :  * Update cpu_load for any missed ticks, due to tickless idle. The backlog
     441                 :            :  * would be when CPU is idle and so we just decay the old load without
     442                 :            :  * adding any new load.
     443                 :            :  */
     444                 :            : static unsigned long
     445                 :          0 : decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
     446                 :            : {
     447                 :            :         int j = 0;
     448                 :            : 
     449         [ +  + ]:   32532752 :         if (!missed_updates)
     450                 :            :                 return load;
     451                 :            : 
     452         [ +  + ]:     258168 :         if (missed_updates >= degrade_zero_ticks[idx])
     453                 :            :                 return 0;
     454                 :            : 
     455         [ +  + ]:     170941 :         if (idx == 1)
     456                 :      25595 :                 return load >> missed_updates;
     457                 :            : 
     458            [ + ]:     641075 :         while (missed_updates) {
     459         [ +  + ]:     495729 :                 if (missed_updates % 2)
     460                 :     289639 :                         load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
     461                 :            : 
     462                 :     495729 :                 missed_updates >>= 1;
     463                 :     495729 :                 j++;
     464                 :            :         }
     465                 :            :         return load;
     466                 :            : }
     467                 :            : 
     468                 :            : /*
     469                 :            :  * Update rq->cpu_load[] statistics. This function is usually called every
     470                 :            :  * scheduler tick (TICK_NSEC). With tickless idle this will not be called
     471                 :            :  * every tick. We fix it up based on jiffies.
     472                 :            :  */
     473                 :          0 : static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
     474                 :            :                               unsigned long pending_updates)
     475                 :            : {
     476                 :            :         int i, scale;
     477                 :            : 
     478                 :    8149644 :         this_rq->nr_load_updates++;
     479                 :            : 
     480                 :            :         /* Update our load: */
     481                 :    8149644 :         this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
     482         [ +  + ]:   40704508 :         for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
     483                 :            :                 unsigned long old_load, new_load;
     484                 :            : 
     485                 :            :                 /* scale is effectively 1 << i now, and >> i divides by scale */
     486                 :            : 
     487                 :   32561741 :                 old_load = this_rq->cpu_load[i];
     488                 :   32561741 :                 old_load = decay_load_missed(old_load, pending_updates - 1, i);
     489                 :            :                 new_load = this_load;
     490                 :            :                 /*
     491                 :            :                  * Round up the averaging division if load is increasing. This
     492                 :            :                  * prevents us from getting stuck on 9 if the load is 10, for
     493                 :            :                  * example.
     494                 :            :                  */
     495         [ +  + ]:   40704508 :                 if (new_load > old_load)
     496                 :    2067911 :                         new_load += scale - 1;
     497                 :            : 
     498                 :   32554864 :                 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
     499                 :            :         }
     500                 :            : 
     501                 :    8142767 :         sched_avg_update(this_rq);
     502                 :    8140182 : }
     503                 :            : 
     504                 :            : #ifdef CONFIG_SMP
     505                 :            : static inline unsigned long get_rq_runnable_load(struct rq *rq)
     506                 :            : {
     507                 :            :         return rq->cfs.runnable_load_avg;
     508                 :            : }
     509                 :            : #else
     510                 :            : static inline unsigned long get_rq_runnable_load(struct rq *rq)
     511                 :            : {
     512                 :            :         return rq->load.weight;
     513                 :            : }
     514                 :            : #endif
     515                 :            : 
     516                 :            : #ifdef CONFIG_NO_HZ_COMMON
     517                 :            : /*
     518                 :            :  * There is no sane way to deal with nohz on smp when using jiffies because the
     519                 :            :  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
     520                 :            :  * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
     521                 :            :  *
     522                 :            :  * Therefore we cannot use the delta approach from the regular tick since that
     523                 :            :  * would seriously skew the load calculation. However we'll make do for those
     524                 :            :  * updates happening while idle (nohz_idle_balance) or coming out of idle
     525                 :            :  * (tick_nohz_idle_exit).
     526                 :            :  *
     527                 :            :  * This means we might still be one tick off for nohz periods.
     528                 :            :  */
     529                 :            : 
     530                 :            : /*
     531                 :            :  * Called from nohz_idle_balance() to update the load ratings before doing the
     532                 :            :  * idle balance.
     533                 :            :  */
     534                 :          0 : void update_idle_cpu_load(struct rq *this_rq)
     535                 :            : {
     536                 :         10 :         unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
     537                 :            :         unsigned long load = get_rq_runnable_load(this_rq);
     538                 :            :         unsigned long pending_updates;
     539                 :            : 
     540                 :            :         /*
     541                 :            :          * bail if there's load or we're actually up-to-date.
     542                 :            :          */
     543 [ +  - ][ -  + ]:         10 :         if (load || curr_jiffies == this_rq->last_load_update_tick)
     544                 :          0 :                 return;
     545                 :            : 
     546                 :          0 :         pending_updates = curr_jiffies - this_rq->last_load_update_tick;
     547                 :          0 :         this_rq->last_load_update_tick = curr_jiffies;
     548                 :            : 
     549                 :          0 :         __update_cpu_load(this_rq, load, pending_updates);
     550                 :            : }
     551                 :            : 
     552                 :            : /*
     553                 :            :  * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
     554                 :            :  */
     555                 :          0 : void update_cpu_load_nohz(void)
     556                 :            : {
     557                 :    3980098 :         struct rq *this_rq = this_rq();
     558                 :    1990049 :         unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
     559                 :            :         unsigned long pending_updates;
     560                 :            : 
     561         [ +  + ]:    1990049 :         if (curr_jiffies == this_rq->last_load_update_tick)
     562                 :    1990049 :                 return;
     563                 :            : 
     564                 :     292726 :         raw_spin_lock(&this_rq->lock);
     565                 :     292726 :         pending_updates = curr_jiffies - this_rq->last_load_update_tick;
     566         [ +  - ]:     292726 :         if (pending_updates) {
     567                 :     292726 :                 this_rq->last_load_update_tick = curr_jiffies;
     568                 :            :                 /*
     569                 :            :                  * We were idle, this means load 0, the current load might be
     570                 :            :                  * !0 due to remote wakeups and the sort.
     571                 :            :                  */
     572                 :     292726 :                 __update_cpu_load(this_rq, 0, pending_updates);
     573                 :            :         }
     574                 :            :         raw_spin_unlock(&this_rq->lock);
     575                 :            : }
     576                 :            : #endif /* CONFIG_NO_HZ */
     577                 :            : 
     578                 :            : /*
     579                 :            :  * Called from scheduler_tick()
     580                 :            :  */
     581                 :          0 : void update_cpu_load_active(struct rq *this_rq)
     582                 :            : {
     583                 :            :         unsigned long load = get_rq_runnable_load(this_rq);
     584                 :            :         /*
     585                 :            :          * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
     586                 :            :          */
     587                 :    7849436 :         this_rq->last_load_update_tick = jiffies;
     588                 :    7849436 :         __update_cpu_load(this_rq, load, 1);
     589                 :            : 
     590                 :    7848011 :         calc_load_account_active(this_rq);
     591                 :    7848208 : }

Generated by: LCOV version 1.9