Branch data Line data Source code
1 : : /*
2 : : * Read-Copy Update mechanism for mutual exclusion
3 : : *
4 : : * This program is free software; you can redistribute it and/or modify
5 : : * it under the terms of the GNU General Public License as published by
6 : : * the Free Software Foundation; either version 2 of the License, or
7 : : * (at your option) any later version.
8 : : *
9 : : * This program is distributed in the hope that it will be useful,
10 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : : * GNU General Public License for more details.
13 : : *
14 : : * You should have received a copy of the GNU General Public License
15 : : * along with this program; if not, write to the Free Software
16 : : * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 : : *
18 : : * Copyright IBM Corporation, 2008
19 : : *
20 : : * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 : : * Manfred Spraul <manfred@colorfullife.com>
22 : : * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 : : *
24 : : * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 : : * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 : : *
27 : : * For detailed explanation of Read-Copy Update mechanism see -
28 : : * Documentation/RCU
29 : : */
30 : : #include <linux/types.h>
31 : : #include <linux/kernel.h>
32 : : #include <linux/init.h>
33 : : #include <linux/spinlock.h>
34 : : #include <linux/smp.h>
35 : : #include <linux/rcupdate.h>
36 : : #include <linux/interrupt.h>
37 : : #include <linux/sched.h>
38 : : #include <linux/nmi.h>
39 : : #include <linux/atomic.h>
40 : : #include <linux/bitops.h>
41 : : #include <linux/export.h>
42 : : #include <linux/completion.h>
43 : : #include <linux/moduleparam.h>
44 : : #include <linux/module.h>
45 : : #include <linux/percpu.h>
46 : : #include <linux/notifier.h>
47 : : #include <linux/cpu.h>
48 : : #include <linux/mutex.h>
49 : : #include <linux/time.h>
50 : : #include <linux/kernel_stat.h>
51 : : #include <linux/wait.h>
52 : : #include <linux/kthread.h>
53 : : #include <linux/prefetch.h>
54 : : #include <linux/delay.h>
55 : : #include <linux/stop_machine.h>
56 : : #include <linux/random.h>
57 : : #include <linux/ftrace_event.h>
58 : : #include <linux/suspend.h>
59 : :
60 : : #include "tree.h"
61 : : #include <trace/events/rcu.h>
62 : :
63 : : #include "rcu.h"
64 : :
65 : : MODULE_ALIAS("rcutree");
66 : : #ifdef MODULE_PARAM_PREFIX
67 : : #undef MODULE_PARAM_PREFIX
68 : : #endif
69 : : #define MODULE_PARAM_PREFIX "rcutree."
70 : :
71 : : /* Data structures. */
72 : :
73 : : static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
74 : : static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
75 : :
76 : : /*
77 : : * In order to export the rcu_state name to the tracing tools, it
78 : : * needs to be added in the __tracepoint_string section.
79 : : * This requires defining a separate variable tp_<sname>_varname
80 : : * that points to the string being used, and this will allow
81 : : * the tracing userspace tools to be able to decipher the string
82 : : * address to the matching string.
83 : : */
84 : : #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
85 : : static char sname##_varname[] = #sname; \
86 : : static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
87 : : struct rcu_state sname##_state = { \
88 : : .level = { &sname##_state.node[0] }, \
89 : : .call = cr, \
90 : : .fqs_state = RCU_GP_IDLE, \
91 : : .gpnum = 0UL - 300UL, \
92 : : .completed = 0UL - 300UL, \
93 : : .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
94 : : .orphan_nxttail = &sname##_state.orphan_nxtlist, \
95 : : .orphan_donetail = &sname##_state.orphan_donelist, \
96 : : .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
97 : : .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
98 : : .name = sname##_varname, \
99 : : .abbr = sabbr, \
100 : : }; \
101 : : DEFINE_PER_CPU(struct rcu_data, sname##_data)
102 : :
103 : : RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
104 : : RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
105 : :
106 : : static struct rcu_state *rcu_state;
107 : : LIST_HEAD(rcu_struct_flavors);
108 : :
109 : : /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
110 : : static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
111 : : module_param(rcu_fanout_leaf, int, 0444);
112 : : int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
113 : : static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
114 : : NUM_RCU_LVL_0,
115 : : NUM_RCU_LVL_1,
116 : : NUM_RCU_LVL_2,
117 : : NUM_RCU_LVL_3,
118 : : NUM_RCU_LVL_4,
119 : : };
120 : : int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
121 : :
122 : : /*
123 : : * The rcu_scheduler_active variable transitions from zero to one just
124 : : * before the first task is spawned. So when this variable is zero, RCU
125 : : * can assume that there is but one task, allowing RCU to (for example)
126 : : * optimize synchronize_sched() to a simple barrier(). When this variable
127 : : * is one, RCU must actually do all the hard work required to detect real
128 : : * grace periods. This variable is also used to suppress boot-time false
129 : : * positives from lockdep-RCU error checking.
130 : : */
131 : : int rcu_scheduler_active __read_mostly;
132 : : EXPORT_SYMBOL_GPL(rcu_scheduler_active);
133 : :
134 : : /*
135 : : * The rcu_scheduler_fully_active variable transitions from zero to one
136 : : * during the early_initcall() processing, which is after the scheduler
137 : : * is capable of creating new tasks. So RCU processing (for example,
138 : : * creating tasks for RCU priority boosting) must be delayed until after
139 : : * rcu_scheduler_fully_active transitions from zero to one. We also
140 : : * currently delay invocation of any RCU callbacks until after this point.
141 : : *
142 : : * It might later prove better for people registering RCU callbacks during
143 : : * early boot to take responsibility for these callbacks, but one step at
144 : : * a time.
145 : : */
146 : : static int rcu_scheduler_fully_active __read_mostly;
147 : :
148 : : #ifdef CONFIG_RCU_BOOST
149 : :
150 : : /*
151 : : * Control variables for per-CPU and per-rcu_node kthreads. These
152 : : * handle all flavors of RCU.
153 : : */
154 : : static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
155 : : DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
156 : : DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
157 : : DEFINE_PER_CPU(char, rcu_cpu_has_work);
158 : :
159 : : #endif /* #ifdef CONFIG_RCU_BOOST */
160 : :
161 : : static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
162 : : static void invoke_rcu_core(void);
163 : : static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
164 : :
165 : : /*
166 : : * Track the rcutorture test sequence number and the update version
167 : : * number within a given test. The rcutorture_testseq is incremented
168 : : * on every rcutorture module load and unload, so has an odd value
169 : : * when a test is running. The rcutorture_vernum is set to zero
170 : : * when rcutorture starts and is incremented on each rcutorture update.
171 : : * These variables enable correlating rcutorture output with the
172 : : * RCU tracing information.
173 : : */
174 : : unsigned long rcutorture_testseq;
175 : : unsigned long rcutorture_vernum;
176 : :
177 : : /*
178 : : * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
179 : : * permit this function to be invoked without holding the root rcu_node
180 : : * structure's ->lock, but of course results can be subject to change.
181 : : */
182 : : static int rcu_gp_in_progress(struct rcu_state *rsp)
183 : : {
184 : 11859159 : return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
185 : : }
186 : :
187 : : /*
188 : : * Note a quiescent state. Because we do not need to know
189 : : * how many quiescent states passed, just if there was at least
190 : : * one since the start of the grace period, this just sets a flag.
191 : : * The caller must have disabled preemption.
192 : : */
193 : 0 : void rcu_sched_qs(int cpu)
194 : : {
195 : 103008766 : struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
196 : :
197 : : if (rdp->passed_quiesce == 0)
198 : : trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
199 : 103008766 : rdp->passed_quiesce = 1;
200 : 0 : }
201 : :
202 : 0 : void rcu_bh_qs(int cpu)
203 : : {
204 : 8367434 : struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
205 : :
206 : : if (rdp->passed_quiesce == 0)
207 : : trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
208 : 2586641 : rdp->passed_quiesce = 1;
209 : 5780793 : }
210 : :
211 : : /*
212 : : * Note a context switch. This is a quiescent state for RCU-sched,
213 : : * and requires special handling for preemptible RCU.
214 : : * The caller must have disabled preemption.
215 : : */
216 : 0 : void rcu_note_context_switch(int cpu)
217 : : {
218 : 101054276 : trace_rcu_utilization(TPS("Start context switch"));
219 : : rcu_sched_qs(cpu);
220 : : rcu_preempt_note_context_switch(cpu);
221 : 101031131 : trace_rcu_utilization(TPS("End context switch"));
222 : 101031131 : }
223 : : EXPORT_SYMBOL_GPL(rcu_note_context_switch);
224 : :
225 : : static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
226 : : .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
227 : : .dynticks = ATOMIC_INIT(1),
228 : : #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
229 : : .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
230 : : .dynticks_idle = ATOMIC_INIT(1),
231 : : #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
232 : : };
233 : :
234 : : static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
235 : : static long qhimark = 10000; /* If this many pending, ignore blimit. */
236 : : static long qlowmark = 100; /* Once only this many pending, use blimit. */
237 : :
238 : : module_param(blimit, long, 0444);
239 : : module_param(qhimark, long, 0444);
240 : : module_param(qlowmark, long, 0444);
241 : :
242 : : static ulong jiffies_till_first_fqs = ULONG_MAX;
243 : : static ulong jiffies_till_next_fqs = ULONG_MAX;
244 : :
245 : : module_param(jiffies_till_first_fqs, ulong, 0644);
246 : : module_param(jiffies_till_next_fqs, ulong, 0644);
247 : :
248 : : static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
249 : : struct rcu_data *rdp);
250 : : static void force_qs_rnp(struct rcu_state *rsp,
251 : : int (*f)(struct rcu_data *rsp, bool *isidle,
252 : : unsigned long *maxj),
253 : : bool *isidle, unsigned long *maxj);
254 : : static void force_quiescent_state(struct rcu_state *rsp);
255 : : static int rcu_pending(int cpu);
256 : :
257 : : /*
258 : : * Return the number of RCU-sched batches processed thus far for debug & stats.
259 : : */
260 : 0 : long rcu_batches_completed_sched(void)
261 : : {
262 : 0 : return rcu_sched_state.completed;
263 : : }
264 : : EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
265 : :
266 : : /*
267 : : * Return the number of RCU BH batches processed thus far for debug & stats.
268 : : */
269 : 0 : long rcu_batches_completed_bh(void)
270 : : {
271 : 0 : return rcu_bh_state.completed;
272 : : }
273 : : EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
274 : :
275 : : /*
276 : : * Force a quiescent state for RCU BH.
277 : : */
278 : 0 : void rcu_bh_force_quiescent_state(void)
279 : : {
280 : 0 : force_quiescent_state(&rcu_bh_state);
281 : 0 : }
282 : : EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
283 : :
284 : : /*
285 : : * Record the number of times rcutorture tests have been initiated and
286 : : * terminated. This information allows the debugfs tracing stats to be
287 : : * correlated to the rcutorture messages, even when the rcutorture module
288 : : * is being repeatedly loaded and unloaded. In other words, we cannot
289 : : * store this state in rcutorture itself.
290 : : */
291 : 0 : void rcutorture_record_test_transition(void)
292 : : {
293 : 0 : rcutorture_testseq++;
294 : 0 : rcutorture_vernum = 0;
295 : 0 : }
296 : : EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
297 : :
298 : : /*
299 : : * Record the number of writer passes through the current rcutorture test.
300 : : * This is also used to correlate debugfs tracing stats with the rcutorture
301 : : * messages.
302 : : */
303 : 0 : void rcutorture_record_progress(unsigned long vernum)
304 : : {
305 : 0 : rcutorture_vernum++;
306 : 0 : }
307 : : EXPORT_SYMBOL_GPL(rcutorture_record_progress);
308 : :
309 : : /*
310 : : * Force a quiescent state for RCU-sched.
311 : : */
312 : 0 : void rcu_sched_force_quiescent_state(void)
313 : : {
314 : 0 : force_quiescent_state(&rcu_sched_state);
315 : 0 : }
316 : : EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
317 : :
318 : : /*
319 : : * Does the CPU have callbacks ready to be invoked?
320 : : */
321 : : static int
322 : : cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
323 : : {
324 [ + + ][ + ]: 12041464 : return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
[ + + ][ + + ]
[ + + ][ + ]
[ + + ][ + + ]
325 : : rdp->nxttail[RCU_DONE_TAIL] != NULL;
326 : : }
327 : :
328 : : /*
329 : : * Does the current CPU require a not-yet-started grace period?
330 : : * The caller must have disabled interrupts to prevent races with
331 : : * normal callback registry.
332 : : */
333 : : static int
334 : 0 : cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
335 : : {
336 : : int i;
337 : :
338 [ + + ]: 6996704 : if (rcu_gp_in_progress(rsp))
339 : : return 0; /* No, a grace period is already in progress. */
340 : : if (rcu_nocb_needs_gp(rsp))
341 : : return 1; /* Yes, a no-CBs CPU needs one. */
342 [ + ]: 4968117 : if (!rdp->nxttail[RCU_NEXT_TAIL])
343 : : return 0; /* No, this is a no-CBs (or offline) CPU. */
344 [ + + ]: 4970612 : if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
345 : : return 1; /* Yes, this CPU has newly registered callbacks. */
346 [ + + ]: 13331355 : for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
347 [ + + ][ + + ]: 9094087 : if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
348 : 658031 : ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
349 : : rdp->nxtcompleted[i]))
350 : : return 1; /* Yes, CBs for future grace period. */
351 : : return 0; /* No grace period needed. */
352 : : }
353 : :
354 : : /*
355 : : * Return the root node of the specified rcu_state structure.
356 : : */
357 : : static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
358 : : {
359 : : return &rsp->node[0];
360 : : }
361 : :
362 : : /*
363 : : * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
364 : : *
365 : : * If the new value of the ->dynticks_nesting counter now is zero,
366 : : * we really have entered idle, and must do the appropriate accounting.
367 : : * The caller must have disabled interrupts.
368 : : */
369 : 0 : static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
370 : : bool user)
371 : : {
372 : : struct rcu_state *rsp;
373 : : struct rcu_data *rdp;
374 : :
375 : : trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
376 [ + + ][ - + ]: 12460720 : if (!user && !is_idle_task(current)) {
377 : 0 : struct task_struct *idle __maybe_unused =
378 : 0 : idle_task(smp_processor_id());
379 : :
380 : : trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
381 : 0 : ftrace_dump(DUMP_ORIG);
382 [ # # ][ - + ]: 0 : WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
383 : : current->pid, current->comm,
384 : : idle->pid, idle->comm); /* must be idle task! */
385 : : }
386 [ + + ]: 36561877 : for_each_rcu_flavor(rsp) {
387 : 24093803 : rdp = this_cpu_ptr(rsp->rda);
388 : : do_nocb_deferred_wakeup(rdp);
389 : : }
390 : : rcu_prepare_for_idle(smp_processor_id());
391 : : /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
392 : 12468074 : smp_mb__before_atomic_inc(); /* See above. */
393 : 12068171 : atomic_inc(&rdtp->dynticks);
394 : 12352955 : smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
395 [ - + ][ # # ]: 12435203 : WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
[ # # ]
396 : :
397 : : /*
398 : : * It is illegal to enter an extended quiescent state while
399 : : * in an RCU read-side critical section.
400 : : */
401 : : rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
402 : : "Illegal idle entry in RCU read-side critical section.");
403 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
404 : : "Illegal idle entry in RCU-bh read-side critical section.");
405 : : rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
406 : : "Illegal idle entry in RCU-sched read-side critical section.");
407 : 12435203 : }
408 : :
409 : : /*
410 : : * Enter an RCU extended quiescent state, which can be either the
411 : : * idle loop or adaptive-tickless usermode execution.
412 : : */
413 : 0 : static void rcu_eqs_enter(bool user)
414 : : {
415 : : long long oldval;
416 : : struct rcu_dynticks *rdtp;
417 : :
418 : 16550804 : rdtp = this_cpu_ptr(&rcu_dynticks);
419 : 8275402 : oldval = rdtp->dynticks_nesting;
420 [ - + ][ # # ]: 8275402 : WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
[ - + ]
421 [ + - ]: 8275434 : if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
422 : 8275434 : rdtp->dynticks_nesting = 0;
423 : 8275434 : rcu_eqs_enter_common(rdtp, oldval, user);
424 : : } else {
425 : 0 : rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
426 : : }
427 : 8583562 : }
428 : :
429 : : /**
430 : : * rcu_idle_enter - inform RCU that current CPU is entering idle
431 : : *
432 : : * Enter idle mode, in other words, -leave- the mode in which RCU
433 : : * read-side critical sections can occur. (Though RCU read-side
434 : : * critical sections can occur in irq handlers in idle, a possibility
435 : : * handled by irq_enter() and irq_exit().)
436 : : *
437 : : * We crowbar the ->dynticks_nesting field to zero to allow for
438 : : * the possibility of usermode upcalls having messed up our count
439 : : * of interrupt nesting level during the prior busy period.
440 : : */
441 : 0 : void rcu_idle_enter(void)
442 : : {
443 : : unsigned long flags;
444 : :
445 : : local_irq_save(flags);
446 : 8244246 : rcu_eqs_enter(false);
447 : 8579591 : rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
448 [ + - ]: 8579591 : local_irq_restore(flags);
449 : 8580022 : }
450 : : EXPORT_SYMBOL_GPL(rcu_idle_enter);
451 : :
452 : : #ifdef CONFIG_RCU_USER_QS
453 : : /**
454 : : * rcu_user_enter - inform RCU that we are resuming userspace.
455 : : *
456 : : * Enter RCU idle mode right before resuming userspace. No use of RCU
457 : : * is permitted between this call and rcu_user_exit(). This way the
458 : : * CPU doesn't need to maintain the tick for RCU maintenance purposes
459 : : * when the CPU runs in userspace.
460 : : */
461 : : void rcu_user_enter(void)
462 : : {
463 : : rcu_eqs_enter(1);
464 : : }
465 : : #endif /* CONFIG_RCU_USER_QS */
466 : :
467 : : /**
468 : : * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
469 : : *
470 : : * Exit from an interrupt handler, which might possibly result in entering
471 : : * idle mode, in other words, leaving the mode in which read-side critical
472 : : * sections can occur.
473 : : *
474 : : * This code assumes that the idle loop never does anything that might
475 : : * result in unbalanced calls to irq_enter() and irq_exit(). If your
476 : : * architecture violates this assumption, RCU will give you what you
477 : : * deserve, good and hard. But very infrequently and irreproducibly.
478 : : *
479 : : * Use things like work queues to work around this limitation.
480 : : *
481 : : * You have been warned.
482 : : */
483 : 0 : void rcu_irq_exit(void)
484 : : {
485 : : unsigned long flags;
486 : : long long oldval;
487 : : struct rcu_dynticks *rdtp;
488 : :
489 : : local_irq_save(flags);
490 : 14071538 : rdtp = this_cpu_ptr(&rcu_dynticks);
491 : : oldval = rdtp->dynticks_nesting;
492 : 7035769 : rdtp->dynticks_nesting--;
493 [ - + ][ # # ]: 7035769 : WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
[ - ]
494 [ + + ]: 7032013 : if (rdtp->dynticks_nesting)
495 : : trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
496 : : else
497 : 3829942 : rcu_eqs_enter_common(rdtp, oldval, true);
498 : : rcu_sysidle_enter(rdtp, 1);
499 [ + - ]: 7030992 : local_irq_restore(flags);
500 : 7034694 : }
501 : :
502 : : /*
503 : : * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
504 : : *
505 : : * If the new value of the ->dynticks_nesting counter was previously zero,
506 : : * we really have exited idle, and must do the appropriate accounting.
507 : : * The caller must have disabled interrupts.
508 : : */
509 : 0 : static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
510 : : int user)
511 : : {
512 : 12366630 : smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
513 : 12418060 : atomic_inc(&rdtp->dynticks);
514 : : /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
515 : 12437410 : smp_mb__after_atomic_inc(); /* See above. */
516 [ - + ][ # # ]: 12422645 : WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
[ - ]
517 : : rcu_cleanup_after_idle(smp_processor_id());
518 : : trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
519 [ + + ][ - + ]: 12360081 : if (!user && !is_idle_task(current)) {
520 : 0 : struct task_struct *idle __maybe_unused =
521 : 0 : idle_task(smp_processor_id());
522 : :
523 : : trace_rcu_dyntick(TPS("Error on exit: not idle task"),
524 : : oldval, rdtp->dynticks_nesting);
525 : 0 : ftrace_dump(DUMP_ORIG);
526 [ # # ][ # # ]: 0 : WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
527 : : current->pid, current->comm,
528 : : idle->pid, idle->comm); /* must be idle task! */
529 : : }
530 : 12360081 : }
531 : :
532 : : /*
533 : : * Exit an RCU extended quiescent state, which can be either the
534 : : * idle loop or adaptive-tickless usermode execution.
535 : : */
536 : 0 : static void rcu_eqs_exit(bool user)
537 : : {
538 : : struct rcu_dynticks *rdtp;
539 : : long long oldval;
540 : :
541 : 17295902 : rdtp = this_cpu_ptr(&rcu_dynticks);
542 : 8647951 : oldval = rdtp->dynticks_nesting;
543 [ - + ][ # # ]: 8647951 : WARN_ON_ONCE(oldval < 0);
[ - ]
544 [ - + ]: 8647804 : if (oldval & DYNTICK_TASK_NEST_MASK) {
545 : 0 : rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
546 : : } else {
547 : 8647804 : rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
548 : 8647804 : rcu_eqs_exit_common(rdtp, oldval, user);
549 : : }
550 : 8647213 : }
551 : :
552 : : /**
553 : : * rcu_idle_exit - inform RCU that current CPU is leaving idle
554 : : *
555 : : * Exit idle mode, in other words, -enter- the mode in which RCU
556 : : * read-side critical sections can occur.
557 : : *
558 : : * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
559 : : * allow for the possibility of usermode upcalls messing up our count
560 : : * of interrupt nesting level during the busy period that is just
561 : : * now starting.
562 : : */
563 : 0 : void rcu_idle_exit(void)
564 : : {
565 : : unsigned long flags;
566 : :
567 : : local_irq_save(flags);
568 : 8647282 : rcu_eqs_exit(false);
569 : 8647634 : rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
570 [ - + ]: 8647634 : local_irq_restore(flags);
571 : 8647965 : }
572 : : EXPORT_SYMBOL_GPL(rcu_idle_exit);
573 : :
574 : : #ifdef CONFIG_RCU_USER_QS
575 : : /**
576 : : * rcu_user_exit - inform RCU that we are exiting userspace.
577 : : *
578 : : * Exit RCU idle mode while entering the kernel because it can
579 : : * run a RCU read side critical section anytime.
580 : : */
581 : : void rcu_user_exit(void)
582 : : {
583 : : rcu_eqs_exit(1);
584 : : }
585 : : #endif /* CONFIG_RCU_USER_QS */
586 : :
587 : : /**
588 : : * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
589 : : *
590 : : * Enter an interrupt handler, which might possibly result in exiting
591 : : * idle mode, in other words, entering the mode in which read-side critical
592 : : * sections can occur.
593 : : *
594 : : * Note that the Linux kernel is fully capable of entering an interrupt
595 : : * handler that it never exits, for example when doing upcalls to
596 : : * user mode! This code assumes that the idle loop never does upcalls to
597 : : * user mode. If your architecture does do upcalls from the idle loop (or
598 : : * does anything else that results in unbalanced calls to the irq_enter()
599 : : * and irq_exit() functions), RCU will give you what you deserve, good
600 : : * and hard. But very infrequently and irreproducibly.
601 : : *
602 : : * Use things like work queues to work around this limitation.
603 : : *
604 : : * You have been warned.
605 : : */
606 : 0 : void rcu_irq_enter(void)
607 : : {
608 : : unsigned long flags;
609 : : struct rcu_dynticks *rdtp;
610 : : long long oldval;
611 : :
612 : : local_irq_save(flags);
613 : 13764818 : rdtp = this_cpu_ptr(&rcu_dynticks);
614 : 6882409 : oldval = rdtp->dynticks_nesting;
615 : 6882409 : rdtp->dynticks_nesting++;
616 [ - + ][ # # ]: 6882409 : WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
[ - + ]
617 [ + + ]: 6909329 : if (oldval)
618 : : trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
619 : : else
620 : 3735379 : rcu_eqs_exit_common(rdtp, oldval, true);
621 : : rcu_sysidle_exit(rdtp, 1);
622 [ + - ]: 6830036 : local_irq_restore(flags);
623 : 6993016 : }
624 : :
625 : : /**
626 : : * rcu_nmi_enter - inform RCU of entry to NMI context
627 : : *
628 : : * If the CPU was idle with dynamic ticks active, and there is no
629 : : * irq handler running, this updates rdtp->dynticks_nmi to let the
630 : : * RCU grace-period handling know that the CPU is active.
631 : : */
632 : 0 : void rcu_nmi_enter(void)
633 : : {
634 : 0 : struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
635 : :
636 [ # # ][ # # ]: 0 : if (rdtp->dynticks_nmi_nesting == 0 &&
637 : 0 : (atomic_read(&rdtp->dynticks) & 0x1))
638 : 0 : return;
639 : 0 : rdtp->dynticks_nmi_nesting++;
640 : 0 : smp_mb__before_atomic_inc(); /* Force delay from prior write. */
641 : 0 : atomic_inc(&rdtp->dynticks);
642 : : /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
643 : 0 : smp_mb__after_atomic_inc(); /* See above. */
644 [ # # ][ # # ]: 0 : WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
[ # # ]
645 : : }
646 : :
647 : : /**
648 : : * rcu_nmi_exit - inform RCU of exit from NMI context
649 : : *
650 : : * If the CPU was idle with dynamic ticks active, and there is no
651 : : * irq handler running, this updates rdtp->dynticks_nmi to let the
652 : : * RCU grace-period handling know that the CPU is no longer active.
653 : : */
654 : 0 : void rcu_nmi_exit(void)
655 : : {
656 : 0 : struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
657 : :
658 [ # # ][ # # ]: 0 : if (rdtp->dynticks_nmi_nesting == 0 ||
659 : 0 : --rdtp->dynticks_nmi_nesting != 0)
660 : 0 : return;
661 : : /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
662 : 0 : smp_mb__before_atomic_inc(); /* See above. */
663 : 0 : atomic_inc(&rdtp->dynticks);
664 : 0 : smp_mb__after_atomic_inc(); /* Force delay to next write. */
665 [ # # ][ # # ]: 0 : WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
[ # # ]
666 : : }
667 : :
668 : : /**
669 : : * __rcu_is_watching - are RCU read-side critical sections safe?
670 : : *
671 : : * Return true if RCU is watching the running CPU, which means that
672 : : * this CPU can safely enter RCU read-side critical sections. Unlike
673 : : * rcu_is_watching(), the caller of __rcu_is_watching() must have at
674 : : * least disabled preemption.
675 : : */
676 : 0 : bool notrace __rcu_is_watching(void)
677 : : {
678 : 21316066 : return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
679 : : }
680 : :
681 : : /**
682 : : * rcu_is_watching - see if RCU thinks that the current CPU is idle
683 : : *
684 : : * If the current CPU is in its idle loop and is neither in an interrupt
685 : : * or NMI handler, return true.
686 : : */
687 : 0 : bool notrace rcu_is_watching(void)
688 : : {
689 : : int ret;
690 : :
691 : 10656004 : preempt_disable();
692 : : ret = __rcu_is_watching();
693 : 10658033 : preempt_enable();
694 : 0 : return ret;
695 : : }
696 : : EXPORT_SYMBOL_GPL(rcu_is_watching);
697 : :
698 : : #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
699 : :
700 : : /*
701 : : * Is the current CPU online? Disable preemption to avoid false positives
702 : : * that could otherwise happen due to the current CPU number being sampled,
703 : : * this task being preempted, its old CPU being taken offline, resuming
704 : : * on some other CPU, then determining that its old CPU is now offline.
705 : : * It is OK to use RCU on an offline processor during initial boot, hence
706 : : * the check for rcu_scheduler_fully_active. Note also that it is OK
707 : : * for a CPU coming online to use RCU for one jiffy prior to marking itself
708 : : * online in the cpu_online_mask. Similarly, it is OK for a CPU going
709 : : * offline to continue to use RCU for one jiffy after marking itself
710 : : * offline in the cpu_online_mask. This leniency is necessary given the
711 : : * non-atomic nature of the online and offline processing, for example,
712 : : * the fact that a CPU enters the scheduler after completing the CPU_DYING
713 : : * notifiers.
714 : : *
715 : : * This is also why RCU internally marks CPUs online during the
716 : : * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
717 : : *
718 : : * Disable checking if in an NMI handler because we cannot safely report
719 : : * errors from NMI handlers anyway.
720 : : */
721 : : bool rcu_lockdep_current_cpu_online(void)
722 : : {
723 : : struct rcu_data *rdp;
724 : : struct rcu_node *rnp;
725 : : bool ret;
726 : :
727 : : if (in_nmi())
728 : : return true;
729 : : preempt_disable();
730 : : rdp = this_cpu_ptr(&rcu_sched_data);
731 : : rnp = rdp->mynode;
732 : : ret = (rdp->grpmask & rnp->qsmaskinit) ||
733 : : !rcu_scheduler_fully_active;
734 : : preempt_enable();
735 : : return ret;
736 : : }
737 : : EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
738 : :
739 : : #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
740 : :
741 : : /**
742 : : * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
743 : : *
744 : : * If the current CPU is idle or running at a first-level (not nested)
745 : : * interrupt from idle, return true. The caller must have at least
746 : : * disabled preemption.
747 : : */
748 : : static int rcu_is_cpu_rrupt_from_idle(void)
749 : : {
750 : 3798942 : return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
751 : : }
752 : :
753 : : /*
754 : : * Snapshot the specified CPU's dynticks counter so that we can later
755 : : * credit them with an implicit quiescent state. Return 1 if this CPU
756 : : * is in dynticks idle mode, which is an extended quiescent state.
757 : : */
758 : 0 : static int dyntick_save_progress_counter(struct rcu_data *rdp,
759 : : bool *isidle, unsigned long *maxj)
760 : : {
761 : 1696632 : rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
762 : : rcu_sysidle_check_cpu(rdp, isidle, maxj);
763 : 848316 : return (rdp->dynticks_snap & 0x1) == 0;
764 : : }
765 : :
766 : : /*
767 : : * This function really isn't for public consumption, but RCU is special in
768 : : * that context switches can allow the state machine to make progress.
769 : : */
770 : : extern void resched_cpu(int cpu);
771 : :
772 : : /*
773 : : * Return true if the specified CPU has passed through a quiescent
774 : : * state by virtue of being in or having passed through an dynticks
775 : : * idle state since the last call to dyntick_save_progress_counter()
776 : : * for this same CPU, or by virtue of having been offline.
777 : : */
778 : 0 : static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
779 : : bool *isidle, unsigned long *maxj)
780 : : {
781 : : unsigned int curr;
782 : : unsigned int snap;
783 : :
784 : 439156 : curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
785 : 219578 : snap = (unsigned int)rdp->dynticks_snap;
786 : :
787 : : /*
788 : : * If the CPU passed through or entered a dynticks idle phase with
789 : : * no active irq/NMI handlers, then we can safely pretend that the CPU
790 : : * already acknowledged the request to pass through a quiescent
791 : : * state. Either way, that CPU cannot possibly be in an RCU
792 : : * read-side critical section that started before the beginning
793 : : * of the current RCU grace period.
794 : : */
795 [ + + ][ + + ]: 219578 : if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
796 : : trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
797 : 171272 : rdp->dynticks_fqs++;
798 : 171272 : return 1;
799 : : }
800 : :
801 : : /*
802 : : * Check for the CPU being offline, but only if the grace period
803 : : * is old enough. We don't need to worry about the CPU changing
804 : : * state: If we see it offline even once, it has been through a
805 : : * quiescent state.
806 : : *
807 : : * The reason for insisting that the grace period be at least
808 : : * one jiffy old is that CPUs that are not quite online and that
809 : : * have just gone offline can still execute RCU read-side critical
810 : : * sections.
811 : : */
812 [ + + ]: 48306 : if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
813 : : return 0; /* Grace period is not old enough. */
814 : 32990 : barrier();
815 [ + + ]: 32990 : if (cpu_is_offline(rdp->cpu)) {
816 : : trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
817 : 11 : rdp->offline_fqs++;
818 : 11 : return 1;
819 : : }
820 : :
821 : : /*
822 : : * There is a possibility that a CPU in adaptive-ticks state
823 : : * might run in the kernel with the scheduling-clock tick disabled
824 : : * for an extended time period. Invoke rcu_kick_nohz_cpu() to
825 : : * force the CPU to restart the scheduling-clock tick in this
826 : : * CPU is in this state.
827 : : */
828 : : rcu_kick_nohz_cpu(rdp->cpu);
829 : :
830 : : /*
831 : : * Alternatively, the CPU might be running in the kernel
832 : : * for an extended period of time without a quiescent state.
833 : : * Attempt to force the CPU through the scheduler to gain the
834 : : * needed quiescent state, but only if the grace period has gone
835 : : * on for an uncommonly long time. If there are many stuck CPUs,
836 : : * we will beat on the first one until it gets unstuck, then move
837 : : * to the next. Only do this for the primary flavor of RCU.
838 : : */
839 [ + - ][ - + ]: 32979 : if (rdp->rsp == rcu_state &&
840 : 32979 : ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) {
841 : 0 : rdp->rsp->jiffies_resched += 5;
842 : 0 : resched_cpu(rdp->cpu);
843 : : }
844 : :
845 : : return 0;
846 : : }
847 : :
848 : 0 : static void record_gp_stall_check_time(struct rcu_state *rsp)
849 : : {
850 : 223588 : unsigned long j = ACCESS_ONCE(jiffies);
851 : : unsigned long j1;
852 : :
853 : 223588 : rsp->gp_start = j;
854 : 223588 : smp_wmb(); /* Record start time before stall time. */
855 : 223588 : j1 = rcu_jiffies_till_stall_check();
856 : 223588 : rsp->jiffies_stall = j + j1;
857 : 223588 : rsp->jiffies_resched = j + j1 / 2;
858 : 223588 : }
859 : :
860 : : /*
861 : : * Dump stacks of all tasks running on stalled CPUs. This is a fallback
862 : : * for architectures that do not implement trigger_all_cpu_backtrace().
863 : : * The NMI-triggered stack traces are more accurate because they are
864 : : * printed by the target CPU.
865 : : */
866 : : static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
867 : : {
868 : : int cpu;
869 : : unsigned long flags;
870 : : struct rcu_node *rnp;
871 : :
872 : : rcu_for_each_leaf_node(rsp, rnp) {
873 : : raw_spin_lock_irqsave(&rnp->lock, flags);
874 : : if (rnp->qsmask != 0) {
875 : : for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
876 : : if (rnp->qsmask & (1UL << cpu))
877 : : dump_cpu_task(rnp->grplo + cpu);
878 : : }
879 : : raw_spin_unlock_irqrestore(&rnp->lock, flags);
880 : : }
881 : : }
882 : :
883 : 0 : static void print_other_cpu_stall(struct rcu_state *rsp)
884 : : {
885 : : int cpu;
886 : : long delta;
887 : : unsigned long flags;
888 : : int ndetected = 0;
889 : : struct rcu_node *rnp = rcu_get_root(rsp);
890 : : long totqlen = 0;
891 : :
892 : : /* Only let one CPU complain about others per time interval. */
893 : :
894 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
895 : 0 : delta = jiffies - rsp->jiffies_stall;
896 [ # # ][ # # ]: 0 : if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
897 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
898 : 0 : return;
899 : : }
900 : 0 : rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
901 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
902 : :
903 : : /*
904 : : * OK, time to rat on our buddy...
905 : : * See Documentation/RCU/stallwarn.txt for info on how to debug
906 : : * RCU CPU stall warnings.
907 : : */
908 : 0 : pr_err("INFO: %s detected stalls on CPUs/tasks:",
909 : : rsp->name);
910 : : print_cpu_stall_info_begin();
911 [ # # ]: 0 : rcu_for_each_leaf_node(rsp, rnp) {
912 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
913 : : ndetected += rcu_print_task_stall(rnp);
914 [ # # ]: 0 : if (rnp->qsmask != 0) {
915 [ # # ]: 0 : for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
916 [ # # ]: 0 : if (rnp->qsmask & (1UL << cpu)) {
917 : 0 : print_cpu_stall_info(rsp,
918 : : rnp->grplo + cpu);
919 : 0 : ndetected++;
920 : : }
921 : : }
922 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
923 : : }
924 : :
925 : : /*
926 : : * Now rat on any tasks that got kicked up to the root rcu_node
927 : : * due to CPU offlining.
928 : : */
929 : : rnp = rcu_get_root(rsp);
930 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
931 : : ndetected += rcu_print_task_stall(rnp);
932 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
933 : :
934 : : print_cpu_stall_info_end();
935 [ # # ]: 0 : for_each_possible_cpu(cpu)
936 : 0 : totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
937 : 0 : pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
938 : : smp_processor_id(), (long)(jiffies - rsp->gp_start),
939 : : rsp->gpnum, rsp->completed, totqlen);
940 [ # # ]: 0 : if (ndetected == 0)
941 : 0 : pr_err("INFO: Stall ended before state dump start\n");
942 : : else if (!trigger_all_cpu_backtrace())
943 : : rcu_dump_cpu_stacks(rsp);
944 : :
945 : : /* Complain about tasks blocking the grace period. */
946 : :
947 : : rcu_print_detail_task_stall(rsp);
948 : :
949 : 0 : force_quiescent_state(rsp); /* Kick them all. */
950 : : }
951 : :
952 : : /*
953 : : * This function really isn't for public consumption, but RCU is special in
954 : : * that context switches can allow the state machine to make progress.
955 : : */
956 : : extern void resched_cpu(int cpu);
957 : :
958 : 0 : static void print_cpu_stall(struct rcu_state *rsp)
959 : : {
960 : : int cpu;
961 : : unsigned long flags;
962 : : struct rcu_node *rnp = rcu_get_root(rsp);
963 : : long totqlen = 0;
964 : :
965 : : /*
966 : : * OK, time to rat on ourselves...
967 : : * See Documentation/RCU/stallwarn.txt for info on how to debug
968 : : * RCU CPU stall warnings.
969 : : */
970 : 0 : pr_err("INFO: %s self-detected stall on CPU", rsp->name);
971 : : print_cpu_stall_info_begin();
972 : 0 : print_cpu_stall_info(rsp, smp_processor_id());
973 : : print_cpu_stall_info_end();
974 [ # # ]: 0 : for_each_possible_cpu(cpu)
975 : 0 : totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
976 : 0 : pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
977 : : jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
978 : : if (!trigger_all_cpu_backtrace())
979 : : dump_stack();
980 : :
981 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
982 [ # # ]: 0 : if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
983 : 0 : rsp->jiffies_stall = jiffies +
984 : 0 : 3 * rcu_jiffies_till_stall_check() + 3;
985 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
986 : :
987 : : /*
988 : : * Attempt to revive the RCU machinery by forcing a context switch.
989 : : *
990 : : * A context switch would normally allow the RCU state machine to make
991 : : * progress and it could be we're stuck in kernel space without context
992 : : * switches for an entirely unreasonable amount of time.
993 : : */
994 : 0 : resched_cpu(smp_processor_id());
995 : 0 : }
996 : :
997 : 3700059 : static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
998 : : {
999 : : unsigned long completed;
1000 : : unsigned long gpnum;
1001 : : unsigned long gps;
1002 : : unsigned long j;
1003 : : unsigned long js;
1004 : : struct rcu_node *rnp;
1005 : :
1006 [ + + ][ + + ]: 3700059 : if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
1007 : : return;
1008 : 1242065 : j = ACCESS_ONCE(jiffies);
1009 : :
1010 : : /*
1011 : : * Lots of memory barriers to reject false positives.
1012 : : *
1013 : : * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall,
1014 : : * then rsp->gp_start, and finally rsp->completed. These values
1015 : : * are updated in the opposite order with memory barriers (or
1016 : : * equivalent) during grace-period initialization and cleanup.
1017 : : * Now, a false positive can occur if we get an new value of
1018 : : * rsp->gp_start and a old value of rsp->jiffies_stall. But given
1019 : : * the memory barriers, the only way that this can happen is if one
1020 : : * grace period ends and another starts between these two fetches.
1021 : : * Detect this by comparing rsp->completed with the previous fetch
1022 : : * from rsp->gpnum.
1023 : : *
1024 : : * Given this check, comparisons of jiffies, rsp->jiffies_stall,
1025 : : * and rsp->gp_start suffice to forestall false positives.
1026 : : */
1027 : 1242065 : gpnum = ACCESS_ONCE(rsp->gpnum);
1028 : 1242065 : smp_rmb(); /* Pick up ->gpnum first... */
1029 : 1347540 : js = ACCESS_ONCE(rsp->jiffies_stall);
1030 : 1347540 : smp_rmb(); /* ...then ->jiffies_stall before the rest... */
1031 : 1370704 : gps = ACCESS_ONCE(rsp->gp_start);
1032 : 1370704 : smp_rmb(); /* ...and finally ->gp_start before ->completed. */
1033 : 1383545 : completed = ACCESS_ONCE(rsp->completed);
1034 [ + + ][ - + ]: 1383545 : if (ULONG_CMP_GE(completed, gpnum) ||
1035 [ # # ]: 0 : ULONG_CMP_LT(j, js) ||
1036 : 0 : ULONG_CMP_GE(gps, js))
1037 : : return; /* No stall or GP completed since entering function. */
1038 : 0 : rnp = rdp->mynode;
1039 [ # # ][ # # ]: 0 : if (rcu_gp_in_progress(rsp) &&
1040 : 0 : (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
1041 : :
1042 : : /* We haven't checked in, so go dump stack. */
1043 : 0 : print_cpu_stall(rsp);
1044 : :
1045 [ # # ][ # # ]: 0 : } else if (rcu_gp_in_progress(rsp) &&
1046 : 0 : ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
1047 : :
1048 : : /* They had a few time units to dump stack, so complain. */
1049 : 0 : print_other_cpu_stall(rsp);
1050 : : }
1051 : : }
1052 : :
1053 : : /**
1054 : : * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
1055 : : *
1056 : : * Set the stall-warning timeout way off into the future, thus preventing
1057 : : * any RCU CPU stall-warning messages from appearing in the current set of
1058 : : * RCU grace periods.
1059 : : *
1060 : : * The caller must disable hard irqs.
1061 : : */
1062 : 0 : void rcu_cpu_stall_reset(void)
1063 : : {
1064 : : struct rcu_state *rsp;
1065 : :
1066 [ # # ]: 0 : for_each_rcu_flavor(rsp)
1067 : 0 : rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
1068 : 0 : }
1069 : :
1070 : : /*
1071 : : * Initialize the specified rcu_data structure's callback list to empty.
1072 : : */
1073 : : static void init_callback_list(struct rcu_data *rdp)
1074 : : {
1075 : : int i;
1076 : :
1077 : : if (init_nocb_callback_list(rdp))
1078 : : return;
1079 : 474 : rdp->nxtlist = NULL;
1080 [ + + ][ + + ]: 2370 : for (i = 0; i < RCU_NEXT_SIZE; i++)
[ # # ][ + + ]
1081 : 1896 : rdp->nxttail[i] = &rdp->nxtlist;
1082 : : }
1083 : :
1084 : : /*
1085 : : * Determine the value that ->completed will have at the end of the
1086 : : * next subsequent grace period. This is used to tag callbacks so that
1087 : : * a CPU can invoke callbacks in a timely fashion even if that CPU has
1088 : : * been dyntick-idle for an extended period with callbacks under the
1089 : : * influence of RCU_FAST_NO_HZ.
1090 : : *
1091 : : * The caller must hold rnp->lock with interrupts disabled.
1092 : : */
1093 : : static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1094 : : struct rcu_node *rnp)
1095 : : {
1096 : : /*
1097 : : * If RCU is idle, we just wait for the next grace period.
1098 : : * But we can only be sure that RCU is idle if we are looking
1099 : : * at the root rcu_node structure -- otherwise, a new grace
1100 : : * period might have started, but just not yet gotten around
1101 : : * to initializing the current non-root rcu_node structure.
1102 : : */
1103 [ + - ][ + + ]: 5896192 : if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
[ + - ][ + + ]
[ + + ][ + ]
1104 : 2706490 : return rnp->completed + 1;
1105 : :
1106 : : /*
1107 : : * Otherwise, wait for a possible partial grace period and
1108 : : * then the subsequent full grace period.
1109 : : */
1110 : 1572634 : return rnp->completed + 2;
1111 : : }
1112 : :
1113 : : /*
1114 : : * Trace-event helper function for rcu_start_future_gp() and
1115 : : * rcu_nocb_wait_gp().
1116 : : */
1117 : : static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1118 : : unsigned long c, const char *s)
1119 : : {
1120 : : trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1121 : : rnp->completed, c, rnp->level,
1122 : : rnp->grplo, rnp->grphi, s);
1123 : : }
1124 : :
1125 : : /*
1126 : : * Start some future grace period, as needed to handle newly arrived
1127 : : * callbacks. The required future grace periods are recorded in each
1128 : : * rcu_node structure's ->need_future_gp field.
1129 : : *
1130 : : * The caller must hold the specified rcu_node structure's ->lock.
1131 : : */
1132 : : static unsigned long __maybe_unused
1133 : 0 : rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1134 : : {
1135 : : unsigned long c;
1136 : : int i;
1137 : 1426357 : struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1138 : :
1139 : : /*
1140 : : * Pick up grace-period number for new callbacks. If this
1141 : : * grace period is already marked as needed, return to the caller.
1142 : : */
1143 : : c = rcu_cbs_completed(rdp->rsp, rnp);
1144 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1145 [ + + ]: 1426357 : if (rnp->need_future_gp[c & 0x1]) {
1146 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1147 : : return c;
1148 : : }
1149 : :
1150 : : /*
1151 : : * If either this rcu_node structure or the root rcu_node structure
1152 : : * believe that a grace period is in progress, then we must wait
1153 : : * for the one following, which is in "c". Because our request
1154 : : * will be noticed at the end of the current grace period, we don't
1155 : : * need to explicitly start one.
1156 : : */
1157 [ + + ][ - + ]: 223588 : if (rnp->gpnum != rnp->completed ||
1158 : 53 : ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1159 : 223535 : rnp->need_future_gp[c & 0x1]++;
1160 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1161 : 223535 : return c;
1162 : : }
1163 : :
1164 : : /*
1165 : : * There might be no grace period in progress. If we don't already
1166 : : * hold it, acquire the root rcu_node structure's lock in order to
1167 : : * start one (if needed).
1168 : : */
1169 [ - + ]: 53 : if (rnp != rnp_root) {
1170 : 0 : raw_spin_lock(&rnp_root->lock);
1171 : : smp_mb__after_unlock_lock();
1172 : : }
1173 : :
1174 : : /*
1175 : : * Get a new grace-period number. If there really is no grace
1176 : : * period in progress, it will be smaller than the one we obtained
1177 : : * earlier. Adjust callbacks as needed. Note that even no-CBs
1178 : : * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
1179 : : */
1180 : 1426410 : c = rcu_cbs_completed(rdp->rsp, rnp_root);
1181 [ + + ]: 212 : for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
1182 [ - + ]: 159 : if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
1183 : 0 : rdp->nxtcompleted[i] = c;
1184 : :
1185 : : /*
1186 : : * If the needed for the required grace period is already
1187 : : * recorded, trace and leave.
1188 : : */
1189 [ + - ]: 53 : if (rnp_root->need_future_gp[c & 0x1]) {
1190 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
1191 : : goto unlock_out;
1192 : : }
1193 : :
1194 : : /* Record the need for the future grace period. */
1195 : 53 : rnp_root->need_future_gp[c & 0x1]++;
1196 : :
1197 : : /* If a grace period is not already in progress, start one. */
1198 [ + - ]: 53 : if (rnp_root->gpnum != rnp_root->completed) {
1199 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1200 : : } else {
1201 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1202 : 53 : rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1203 : : }
1204 : : unlock_out:
1205 [ - + ]: 53 : if (rnp != rnp_root)
1206 : : raw_spin_unlock(&rnp_root->lock);
1207 : 53 : return c;
1208 : : }
1209 : :
1210 : : /*
1211 : : * Clean up any old requests for the just-ended grace period. Also return
1212 : : * whether any additional grace periods have been requested. Also invoke
1213 : : * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
1214 : : * waiting for this grace period to complete.
1215 : : */
1216 : : static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1217 : : {
1218 : 223589 : int c = rnp->completed;
1219 : : int needmore;
1220 : 223589 : struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1221 : :
1222 : : rcu_nocb_gp_cleanup(rsp, rnp);
1223 : 223589 : rnp->need_future_gp[c & 0x1] = 0;
1224 : : needmore = rnp->need_future_gp[(c + 1) & 0x1];
1225 : : trace_rcu_future_gp(rnp, rdp, c,
1226 : : needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1227 : : return needmore;
1228 : : }
1229 : :
1230 : : /*
1231 : : * If there is room, assign a ->completed number to any callbacks on
1232 : : * this CPU that have not already been assigned. Also accelerate any
1233 : : * callbacks that were previously assigned a ->completed number that has
1234 : : * since proven to be too conservative, which can happen if callbacks get
1235 : : * assigned a ->completed number while RCU is idle, but with reference to
1236 : : * a non-root rcu_node structure. This function is idempotent, so it does
1237 : : * not hurt to call it repeatedly.
1238 : : *
1239 : : * The caller must hold rnp->lock with interrupts disabled.
1240 : : */
1241 : 0 : static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1242 : : struct rcu_data *rdp)
1243 : : {
1244 : : unsigned long c;
1245 : : int i;
1246 : :
1247 : : /* If the CPU has no callbacks, nothing to do. */
1248 [ + - ][ + ]: 1617068 : if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1249 : : return;
1250 : :
1251 : : /*
1252 : : * Starting from the sublist containing the callbacks most
1253 : : * recently assigned a ->completed number and working down, find the
1254 : : * first sublist that is not assignable to an upcoming grace period.
1255 : : * Such a sublist has something in it (first two tests) and has
1256 : : * a ->completed number assigned that will complete sooner than
1257 : : * the ->completed number for newly arrived callbacks (last test).
1258 : : *
1259 : : * The key point is that any later sublist can be assigned the
1260 : : * same ->completed number as the newly arrived callbacks, which
1261 : : * means that the callbacks in any of these later sublist can be
1262 : : * grouped into a single sublist, whether or not they have already
1263 : : * been assigned a ->completed number.
1264 : : */
1265 : : c = rcu_cbs_completed(rsp, rnp);
1266 [ + + ]: 3544405 : for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1267 [ + + ][ + + ]: 2852714 : if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1268 : 1487769 : !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1269 : : break;
1270 : :
1271 : : /*
1272 : : * If there are no sublist for unassigned callbacks, leave.
1273 : : * At the same time, advance "i" one sublist, so that "i" will
1274 : : * index into the sublist where all the remaining callbacks should
1275 : : * be grouped into.
1276 : : */
1277 [ + - ]: 1426357 : if (++i >= RCU_NEXT_TAIL)
1278 : : return;
1279 : :
1280 : : /*
1281 : : * Assign all subsequent callbacks' ->completed number to the next
1282 : : * full grace period and group them all in the sublist initially
1283 : : * indexed by "i".
1284 : : */
1285 [ + + ]: 4970762 : for (; i <= RCU_NEXT_TAIL; i++) {
1286 : 3544405 : rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1287 : 3544405 : rdp->nxtcompleted[i] = c;
1288 : : }
1289 : : /* Record any needed additional grace periods. */
1290 : 1426357 : rcu_start_future_gp(rnp, rdp);
1291 : :
1292 : : /* Trace depending on how much we were able to accelerate. */
1293 : : if (!*rdp->nxttail[RCU_WAIT_TAIL])
1294 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1295 : : else
1296 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1297 : : }
1298 : :
1299 : : /*
1300 : : * Move any callbacks whose grace period has completed to the
1301 : : * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1302 : : * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1303 : : * sublist. This function is idempotent, so it does not hurt to
1304 : : * invoke it repeatedly. As long as it is not invoked -too- often...
1305 : : *
1306 : : * The caller must hold rnp->lock with interrupts disabled.
1307 : : */
1308 : 0 : static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1309 : : struct rcu_data *rdp)
1310 : : {
1311 : : int i, j;
1312 : :
1313 : : /* If the CPU has no callbacks, nothing to do. */
1314 [ + - ][ + ]: 1279810 : if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1315 : 0 : return;
1316 : :
1317 : : /*
1318 : : * Find all callbacks whose ->completed numbers indicate that they
1319 : : * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1320 : : */
1321 [ + + ]: 2236595 : for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1322 [ + + ]: 1347505 : if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1323 : : break;
1324 : 485116 : rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1325 : : }
1326 : : /* Clean up any sublist tail pointers that were misordered above. */
1327 [ + + ]: 1374206 : for (j = RCU_WAIT_TAIL; j < i; j++)
1328 : 485116 : rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1329 : :
1330 : : /* Copy down callbacks to fill in empty sublists. */
1331 [ + + ]: 1335059 : for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1332 [ + + ]: 891624 : if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1333 : : break;
1334 : 445969 : rdp->nxttail[j] = rdp->nxttail[i];
1335 : 445969 : rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1336 : : }
1337 : :
1338 : : /* Classify any remaining callbacks. */
1339 : 889090 : rcu_accelerate_cbs(rsp, rnp, rdp);
1340 : : }
1341 : :
1342 : : /*
1343 : : * Update CPU-local rcu_data state to record the beginnings and ends of
1344 : : * grace periods. The caller must hold the ->lock of the leaf rcu_node
1345 : : * structure corresponding to the current CPU, and must have irqs disabled.
1346 : : */
1347 : 0 : static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1348 : : {
1349 : : /* Handle the ends of any preceding grace periods first. */
1350 [ + + ]: 1117217 : if (rdp->completed == rnp->completed) {
1351 : :
1352 : : /* No grace period end, so just accelerate recent callbacks. */
1353 : 325928 : rcu_accelerate_cbs(rsp, rnp, rdp);
1354 : :
1355 : : } else {
1356 : :
1357 : : /* Advance callbacks. */
1358 : 791289 : rcu_advance_cbs(rsp, rnp, rdp);
1359 : :
1360 : : /* Remember that we saw this grace-period completion. */
1361 : 791289 : rdp->completed = rnp->completed;
1362 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1363 : : }
1364 : :
1365 [ + + ]: 2234434 : if (rdp->gpnum != rnp->gpnum) {
1366 : : /*
1367 : : * If the current grace period is waiting for this CPU,
1368 : : * set up to detect a quiescent state, otherwise don't
1369 : : * go looking for one.
1370 : : */
1371 : 789326 : rdp->gpnum = rnp->gpnum;
1372 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1373 : 789326 : rdp->passed_quiesce = 0;
1374 : 789326 : rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1375 : : zero_cpu_stall_ticks(rdp);
1376 : : }
1377 : 1117217 : }
1378 : :
1379 : 0 : static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1380 : : {
1381 : : unsigned long flags;
1382 : : struct rcu_node *rnp;
1383 : :
1384 : : local_irq_save(flags);
1385 : 3740361 : rnp = rdp->mynode;
1386 [ + + ][ + + ]: 3740361 : if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1387 [ + + ]: 736098 : rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
1388 : 708310 : !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1389 [ + + ]: 3098109 : local_irq_restore(flags);
1390 : 3768800 : return;
1391 : : }
1392 : : smp_mb__after_unlock_lock();
1393 : 670040 : __note_gp_changes(rsp, rnp, rdp);
1394 : 670040 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1395 : : }
1396 : :
1397 : : /*
1398 : : * Initialize a new grace period. Return 0 if no grace period required.
1399 : : */
1400 : 0 : static int rcu_gp_init(struct rcu_state *rsp)
1401 : : {
1402 : : struct rcu_data *rdp;
1403 : 223588 : struct rcu_node *rnp = rcu_get_root(rsp);
1404 : :
1405 : : rcu_bind_gp_kthread();
1406 : 223588 : raw_spin_lock_irq(&rnp->lock);
1407 : : smp_mb__after_unlock_lock();
1408 [ - + ]: 223588 : if (rsp->gp_flags == 0) {
1409 : : /* Spurious wakeup, tell caller to go back to sleep. */
1410 : : raw_spin_unlock_irq(&rnp->lock);
1411 : 0 : return 0;
1412 : : }
1413 : 223588 : rsp->gp_flags = 0; /* Clear all flags: New grace period. */
1414 : :
1415 [ - + ][ # # ]: 223588 : if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
[ # # ][ - + ]
1416 : : /*
1417 : : * Grace period already in progress, don't start another.
1418 : : * Not supposed to be able to happen.
1419 : : */
1420 : : raw_spin_unlock_irq(&rnp->lock);
1421 : 0 : return 0;
1422 : : }
1423 : :
1424 : : /* Advance to a new grace period and initialize state. */
1425 : 223588 : record_gp_stall_check_time(rsp);
1426 : 223588 : smp_wmb(); /* Record GP times before starting GP. */
1427 : 223588 : rsp->gpnum++;
1428 : : trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
1429 : : raw_spin_unlock_irq(&rnp->lock);
1430 : :
1431 : : /* Exclude any concurrent CPU-hotplug operations. */
1432 : 223588 : mutex_lock(&rsp->onoff_mutex);
1433 : :
1434 : : /*
1435 : : * Set the quiescent-state-needed bits in all the rcu_node
1436 : : * structures for all currently online CPUs in breadth-first order,
1437 : : * starting from the root rcu_node structure, relying on the layout
1438 : : * of the tree within the rsp->node[] array. Note that other CPUs
1439 : : * will access only the leaves of the hierarchy, thus seeing that no
1440 : : * grace period is in progress, at least until the corresponding
1441 : : * leaf node has been initialized. In addition, we have excluded
1442 : : * CPU-hotplug operations.
1443 : : *
1444 : : * The grace period cannot complete until the initialization
1445 : : * process finishes, because this kthread handles both.
1446 : : */
1447 [ + + ]: 670764 : rcu_for_each_node_breadth_first(rsp, rnp) {
1448 : 223588 : raw_spin_lock_irq(&rnp->lock);
1449 : : smp_mb__after_unlock_lock();
1450 : 447176 : rdp = this_cpu_ptr(rsp->rda);
1451 : 223588 : rcu_preempt_check_blocked_tasks(rnp);
1452 : 223588 : rnp->qsmask = rnp->qsmaskinit;
1453 : 223588 : ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
1454 [ - + ][ # # ]: 223588 : WARN_ON_ONCE(rnp->completed != rsp->completed);
[ # # ]
1455 : 223588 : ACCESS_ONCE(rnp->completed) = rsp->completed;
1456 [ + - ]: 223588 : if (rnp == rdp->mynode)
1457 : 223588 : __note_gp_changes(rsp, rnp, rdp);
1458 : : rcu_preempt_boost_start_gp(rnp);
1459 : : trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1460 : : rnp->level, rnp->grplo,
1461 : : rnp->grphi, rnp->qsmask);
1462 : : raw_spin_unlock_irq(&rnp->lock);
1463 : : #ifdef CONFIG_PROVE_RCU_DELAY
1464 : : if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
1465 : : system_state == SYSTEM_RUNNING)
1466 : : udelay(200);
1467 : : #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1468 : 223588 : cond_resched();
1469 : : }
1470 : :
1471 : 223588 : mutex_unlock(&rsp->onoff_mutex);
1472 : 223588 : return 1;
1473 : : }
1474 : :
1475 : : /*
1476 : : * Do one round of quiescent-state forcing.
1477 : : */
1478 : 0 : static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1479 : : {
1480 : : int fqs_state = fqs_state_in;
1481 : 401760 : bool isidle = false;
1482 : : unsigned long maxj;
1483 : : struct rcu_node *rnp = rcu_get_root(rsp);
1484 : :
1485 : 401760 : rsp->n_force_qs++;
1486 [ + + ]: 401760 : if (fqs_state == RCU_SAVE_DYNTICK) {
1487 : : /* Collect dyntick-idle snapshots. */
1488 : : if (is_sysidle_rcu_state(rsp)) {
1489 : : isidle = 1;
1490 : : maxj = jiffies - ULONG_MAX / 4;
1491 : : }
1492 : 223223 : force_qs_rnp(rsp, dyntick_save_progress_counter,
1493 : : &isidle, &maxj);
1494 : : rcu_sysidle_report_gp(rsp, isidle, maxj);
1495 : : fqs_state = RCU_FORCE_QS;
1496 : : } else {
1497 : : /* Handle dyntick-idle and offline CPUs. */
1498 : : isidle = 0;
1499 : 178537 : force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1500 : : }
1501 : : /* Clear flag to prevent immediate re-entry. */
1502 [ - + ]: 401760 : if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1503 : 0 : raw_spin_lock_irq(&rnp->lock);
1504 : : smp_mb__after_unlock_lock();
1505 : 0 : rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
1506 : : raw_spin_unlock_irq(&rnp->lock);
1507 : : }
1508 : 401760 : return fqs_state;
1509 : : }
1510 : :
1511 : : /*
1512 : : * Clean up after the old grace period.
1513 : : */
1514 : 0 : static void rcu_gp_cleanup(struct rcu_state *rsp)
1515 : : {
1516 : : unsigned long gp_duration;
1517 : : int nocb = 0;
1518 : : struct rcu_data *rdp;
1519 : : struct rcu_node *rnp = rcu_get_root(rsp);
1520 : :
1521 : 223589 : raw_spin_lock_irq(&rnp->lock);
1522 : : smp_mb__after_unlock_lock();
1523 : 223589 : gp_duration = jiffies - rsp->gp_start;
1524 [ + + ]: 223589 : if (gp_duration > rsp->gp_max)
1525 : 1 : rsp->gp_max = gp_duration;
1526 : :
1527 : : /*
1528 : : * We know the grace period is complete, but to everyone else
1529 : : * it appears to still be ongoing. But it is also the case
1530 : : * that to everyone else it looks like there is nothing that
1531 : : * they can do to advance the grace period. It is therefore
1532 : : * safe for us to drop the lock in order to mark the grace
1533 : : * period as completed in all of the rcu_node structures.
1534 : : */
1535 : : raw_spin_unlock_irq(&rnp->lock);
1536 : :
1537 : : /*
1538 : : * Propagate new ->completed value to rcu_node structures so
1539 : : * that other CPUs don't have to wait until the start of the next
1540 : : * grace period to process their callbacks. This also avoids
1541 : : * some nasty RCU grace-period initialization races by forcing
1542 : : * the end of the current grace period to be completely recorded in
1543 : : * all of the rcu_node structures before the beginning of the next
1544 : : * grace period is recorded in any of the rcu_node structures.
1545 : : */
1546 [ + + ]: 447178 : rcu_for_each_node_breadth_first(rsp, rnp) {
1547 : 223589 : raw_spin_lock_irq(&rnp->lock);
1548 : : smp_mb__after_unlock_lock();
1549 : 223589 : ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1550 : 447178 : rdp = this_cpu_ptr(rsp->rda);
1551 [ + - ]: 223589 : if (rnp == rdp->mynode)
1552 : 223589 : __note_gp_changes(rsp, rnp, rdp);
1553 : : /* smp_mb() provided by prior unlock-lock pair. */
1554 : : nocb += rcu_future_gp_cleanup(rsp, rnp);
1555 : : raw_spin_unlock_irq(&rnp->lock);
1556 : 223589 : cond_resched();
1557 : : }
1558 : : rnp = rcu_get_root(rsp);
1559 : 223589 : raw_spin_lock_irq(&rnp->lock);
1560 : : smp_mb__after_unlock_lock();
1561 : : rcu_nocb_gp_set(rnp, nocb);
1562 : :
1563 : 223589 : rsp->completed = rsp->gpnum; /* Declare grace period done. */
1564 : : trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1565 : 223589 : rsp->fqs_state = RCU_GP_IDLE;
1566 : 447178 : rdp = this_cpu_ptr(rsp->rda);
1567 : 223589 : rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
1568 [ + + ]: 223589 : if (cpu_needs_another_gp(rsp, rdp)) {
1569 : 142612 : rsp->gp_flags = RCU_GP_FLAG_INIT;
1570 : : trace_rcu_grace_period(rsp->name,
1571 : : ACCESS_ONCE(rsp->gpnum),
1572 : : TPS("newreq"));
1573 : : }
1574 : : raw_spin_unlock_irq(&rnp->lock);
1575 : 223589 : }
1576 : :
1577 : : /*
1578 : : * Body of kthread that handles grace periods.
1579 : : */
1580 : 0 : static int __noreturn rcu_gp_kthread(void *arg)
1581 : : {
1582 : : int fqs_state;
1583 : : int gf;
1584 : : unsigned long j;
1585 : : int ret;
1586 : : struct rcu_state *rsp = arg;
1587 : : struct rcu_node *rnp = rcu_get_root(rsp);
1588 : :
1589 : : for (;;) {
1590 : :
1591 : : /* Handle grace-period start. */
1592 : : for (;;) {
1593 : : trace_rcu_grace_period(rsp->name,
1594 : : ACCESS_ONCE(rsp->gpnum),
1595 : : TPS("reqwait"));
1596 [ + + ][ + + ]: 304564 : wait_event_interruptible(rsp->gp_wq,
[ + - ]
1597 : : ACCESS_ONCE(rsp->gp_flags) &
1598 : : RCU_GP_FLAG_INIT);
1599 : : /* Locking provides needed memory barrier. */
1600 [ - + ]: 223588 : if (rcu_gp_init(rsp))
1601 : : break;
1602 : 0 : cond_resched();
1603 : 0 : flush_signals(current);
1604 : : trace_rcu_grace_period(rsp->name,
1605 : : ACCESS_ONCE(rsp->gpnum),
1606 : : TPS("reqwaitsig"));
1607 : 223589 : }
1608 : :
1609 : : /* Handle quiescent-state forcing. */
1610 : : fqs_state = RCU_SAVE_DYNTICK;
1611 : 223588 : j = jiffies_till_first_fqs;
1612 [ - + ]: 223588 : if (j > HZ) {
1613 : : j = HZ;
1614 : 625348 : jiffies_till_first_fqs = HZ;
1615 : : }
1616 : : ret = 0;
1617 : : for (;;) {
1618 [ + - ]: 625348 : if (!ret)
1619 : 625348 : rsp->jiffies_force_qs = jiffies + j;
1620 : : trace_rcu_grace_period(rsp->name,
1621 : : ACCESS_ONCE(rsp->gpnum),
1622 : : TPS("fqswait"));
1623 [ + - ][ + + ]: 1097173 : ret = wait_event_interruptible_timeout(rsp->gp_wq,
[ - + ][ + + ]
[ + - ][ + + ]
[ + + ][ + + ]
[ + - ]
1624 : : ((gf = ACCESS_ONCE(rsp->gp_flags)) &
1625 : : RCU_GP_FLAG_FQS) ||
1626 : : (!ACCESS_ONCE(rnp->qsmask) &&
1627 : : !rcu_preempt_blocked_readers_cgp(rnp)),
1628 : : j);
1629 : : /* Locking provides needed memory barriers. */
1630 : : /* If grace period done, leave loop. */
1631 [ + + ]: 625349 : if (!ACCESS_ONCE(rnp->qsmask) &&
1632 : : !rcu_preempt_blocked_readers_cgp(rnp))
1633 : : break;
1634 : : /* If time for quiescent-state forcing, do it. */
1635 [ - + ][ # # ]: 401760 : if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
1636 : 0 : (gf & RCU_GP_FLAG_FQS)) {
1637 : : trace_rcu_grace_period(rsp->name,
1638 : : ACCESS_ONCE(rsp->gpnum),
1639 : : TPS("fqsstart"));
1640 : 401760 : fqs_state = rcu_gp_fqs(rsp, fqs_state);
1641 : : trace_rcu_grace_period(rsp->name,
1642 : : ACCESS_ONCE(rsp->gpnum),
1643 : : TPS("fqsend"));
1644 : 401760 : cond_resched();
1645 : : } else {
1646 : : /* Deal with stray signal. */
1647 : 0 : cond_resched();
1648 : 0 : flush_signals(current);
1649 : : trace_rcu_grace_period(rsp->name,
1650 : : ACCESS_ONCE(rsp->gpnum),
1651 : : TPS("fqswaitsig"));
1652 : : }
1653 : 401760 : j = jiffies_till_next_fqs;
1654 [ - + ]: 401760 : if (j > HZ) {
1655 : : j = HZ;
1656 : 0 : jiffies_till_next_fqs = HZ;
1657 [ + - ]: 401760 : } else if (j < 1) {
1658 : : j = 1;
1659 : 0 : jiffies_till_next_fqs = 1;
1660 : : }
1661 : : }
1662 : :
1663 : : /* Handle grace-period end. */
1664 : 223589 : rcu_gp_cleanup(rsp);
1665 : 223589 : }
1666 : : }
1667 : :
1668 : 0 : static void rsp_wakeup(struct irq_work *work)
1669 : : {
1670 : : struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1671 : :
1672 : : /* Wake up rcu_gp_kthread() to start the grace period. */
1673 : 258609 : wake_up(&rsp->gp_wq);
1674 : 258609 : }
1675 : :
1676 : : /*
1677 : : * Start a new RCU grace period if warranted, re-initializing the hierarchy
1678 : : * in preparation for detecting the next grace period. The caller must hold
1679 : : * the root node's ->lock and hard irqs must be disabled.
1680 : : *
1681 : : * Note that it is legal for a dying CPU (which is marked as offline) to
1682 : : * invoke this function. This can happen when the dying CPU reports its
1683 : : * quiescent state.
1684 : : */
1685 : : static void
1686 : 264985 : rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1687 : : struct rcu_data *rdp)
1688 : : {
1689 [ + - ][ + + ]: 264985 : if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
1690 : : /*
1691 : : * Either we have not yet spawned the grace-period
1692 : : * task, this CPU does not need another grace period,
1693 : : * or a grace period is already in progress.
1694 : : * Either way, don't start a new grace period.
1695 : : */
1696 : 0 : return;
1697 : : }
1698 : 261614 : rsp->gp_flags = RCU_GP_FLAG_INIT;
1699 : : trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
1700 : : TPS("newreq"));
1701 : :
1702 : : /*
1703 : : * We can't do wakeups while holding the rnp->lock, as that
1704 : : * could cause possible deadlocks with the rq->lock. Defer
1705 : : * the wakeup to interrupt context. And don't bother waking
1706 : : * up the running kthread.
1707 : : */
1708 [ + + ]: 261614 : if (current != rsp->gp_kthread)
1709 : 261609 : irq_work_queue(&rsp->wakeup_work);
1710 : : }
1711 : :
1712 : : /*
1713 : : * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
1714 : : * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
1715 : : * is invoked indirectly from rcu_advance_cbs(), which would result in
1716 : : * endless recursion -- or would do so if it wasn't for the self-deadlock
1717 : : * that is encountered beforehand.
1718 : : */
1719 : : static void
1720 : 0 : rcu_start_gp(struct rcu_state *rsp)
1721 : : {
1722 : 529864 : struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1723 : 264932 : struct rcu_node *rnp = rcu_get_root(rsp);
1724 : :
1725 : : /*
1726 : : * If there is no grace period in progress right now, any
1727 : : * callbacks we have up to this point will be satisfied by the
1728 : : * next grace period. Also, advancing the callbacks reduces the
1729 : : * probability of false positives from cpu_needs_another_gp()
1730 : : * resulting in pointless grace periods. So, advance callbacks
1731 : : * then start the grace period!
1732 : : */
1733 : 264932 : rcu_advance_cbs(rsp, rnp, rdp);
1734 : 264932 : rcu_start_gp_advanced(rsp, rnp, rdp);
1735 : 264932 : }
1736 : :
1737 : : /*
1738 : : * Report a full set of quiescent states to the specified rcu_state
1739 : : * data structure. This involves cleaning up after the prior grace
1740 : : * period and letting rcu_start_gp() start up the next grace period
1741 : : * if one is needed. Note that the caller must hold rnp->lock, which
1742 : : * is released before return.
1743 : : */
1744 : 0 : static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1745 : : __releases(rcu_get_root(rsp)->lock)
1746 : : {
1747 [ - + ][ # # ]: 223589 : WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
[ # # ]
1748 : 223589 : raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1749 : 223589 : wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1750 : 223589 : }
1751 : :
1752 : : /*
1753 : : * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1754 : : * Allows quiescent states for a group of CPUs to be reported at one go
1755 : : * to the specified rcu_node structure, though all the CPUs in the group
1756 : : * must be represented by the same rcu_node structure (which need not be
1757 : : * a leaf rcu_node structure, though it often will be). That structure's
1758 : : * lock must be held upon entry, and it is released before return.
1759 : : */
1760 : : static void
1761 : 755623 : rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1762 : : struct rcu_node *rnp, unsigned long flags)
1763 : : __releases(rnp->lock)
1764 : : {
1765 : : struct rcu_node *rnp_c;
1766 : :
1767 : : /* Walk up the rcu_node hierarchy. */
1768 : : for (;;) {
1769 [ - + ]: 755623 : if (!(rnp->qsmask & mask)) {
1770 : :
1771 : : /* Our bit has already been cleared, so done. */
1772 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1773 : 0 : return;
1774 : : }
1775 : 755623 : rnp->qsmask &= ~mask;
1776 : : trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1777 : : mask, rnp->qsmask, rnp->level,
1778 : : rnp->grplo, rnp->grphi,
1779 : : !!rnp->gp_tasks);
1780 [ + + ]: 755623 : if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1781 : :
1782 : : /* Other bits still set at this level, so done. */
1783 : 532034 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1784 : 532034 : return;
1785 : : }
1786 : 223589 : mask = rnp->grpmask;
1787 [ - + ]: 223589 : if (rnp->parent == NULL) {
1788 : :
1789 : : /* No more levels. Exit loop holding root lock. */
1790 : :
1791 : : break;
1792 : : }
1793 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1794 : : rnp_c = rnp;
1795 : 0 : rnp = rnp->parent;
1796 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
1797 : : smp_mb__after_unlock_lock();
1798 [ # # ][ # # ]: 0 : WARN_ON_ONCE(rnp_c->qsmask);
[ # # ]
1799 : : }
1800 : :
1801 : : /*
1802 : : * Get here if we are the last CPU to pass through a quiescent
1803 : : * state for this grace period. Invoke rcu_report_qs_rsp()
1804 : : * to clean up and start the next grace period if one is needed.
1805 : : */
1806 : 223589 : rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
1807 : : }
1808 : :
1809 : : /*
1810 : : * Record a quiescent state for the specified CPU to that CPU's rcu_data
1811 : : * structure. This must be either called from the specified CPU, or
1812 : : * called when the specified CPU is known to be offline (and when it is
1813 : : * also known that no other CPU is concurrently trying to help the offline
1814 : : * CPU). The lastcomp argument is used to make sure we are still in the
1815 : : * grace period of interest. We don't want to end the current grace period
1816 : : * based on quiescent states detected in an earlier grace period!
1817 : : */
1818 : : static void
1819 : 766007 : rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1820 : : {
1821 : : unsigned long flags;
1822 : : unsigned long mask;
1823 : : struct rcu_node *rnp;
1824 : :
1825 : 766007 : rnp = rdp->mynode;
1826 : 766007 : raw_spin_lock_irqsave(&rnp->lock, flags);
1827 : : smp_mb__after_unlock_lock();
1828 [ + - ][ + + ]: 768974 : if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
[ + + ]
1829 : 746488 : rnp->completed == rnp->gpnum) {
1830 : :
1831 : : /*
1832 : : * The grace period in which this quiescent state was
1833 : : * recorded has ended, so don't report it upwards.
1834 : : * We will instead need a new quiescent state that lies
1835 : : * within the current grace period.
1836 : : */
1837 : 0 : rdp->passed_quiesce = 0; /* need qs for new gp. */
1838 : 247824 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1839 : 768974 : return;
1840 : : }
1841 : 521150 : mask = rdp->grpmask;
1842 [ + + ]: 521150 : if ((rnp->qsmask & mask) == 0) {
1843 : 119100 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1844 : : } else {
1845 : 402050 : rdp->qs_pending = 0;
1846 : :
1847 : : /*
1848 : : * This GP can't end until cpu checks in, so all of our
1849 : : * callbacks can be processed during the next GP.
1850 : : */
1851 : 402050 : rcu_accelerate_cbs(rsp, rnp, rdp);
1852 : :
1853 : 402050 : rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1854 : : }
1855 : : }
1856 : :
1857 : : /*
1858 : : * Check to see if there is a new grace period of which this CPU
1859 : : * is not yet aware, and if so, set up local rcu_data state for it.
1860 : : * Otherwise, see if this CPU has just passed through its first
1861 : : * quiescent state for this grace period, and record that fact if so.
1862 : : */
1863 : : static void
1864 : 0 : rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1865 : : {
1866 : : /* Check for grace-period ends and beginnings. */
1867 : 3758157 : note_gp_changes(rsp, rdp);
1868 : :
1869 : : /*
1870 : : * Does this CPU still need to do its part for current grace period?
1871 : : * If no, return and let the other CPUs do their part as well.
1872 : : */
1873 [ + + ]: 3768321 : if (!rdp->qs_pending)
1874 : : return;
1875 : :
1876 : : /*
1877 : : * Was there a quiescent state since the beginning of the grace
1878 : : * period? If no, then exit and wait for the next call.
1879 : : */
1880 [ + + ]: 1282712 : if (!rdp->passed_quiesce)
1881 : : return;
1882 : :
1883 : : /*
1884 : : * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1885 : : * judge of that).
1886 : : */
1887 : 763453 : rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
1888 : : }
1889 : :
1890 : : #ifdef CONFIG_HOTPLUG_CPU
1891 : :
1892 : : /*
1893 : : * Send the specified CPU's RCU callbacks to the orphanage. The
1894 : : * specified CPU must be offline, and the caller must hold the
1895 : : * ->orphan_lock.
1896 : : */
1897 : : static void
1898 : 156 : rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1899 : : struct rcu_node *rnp, struct rcu_data *rdp)
1900 : : {
1901 : : /* No-CBs CPUs do not have orphanable callbacks. */
1902 : : if (rcu_is_nocb_cpu(rdp->cpu))
1903 : 156 : return;
1904 : :
1905 : : /*
1906 : : * Orphan the callbacks. First adjust the counts. This is safe
1907 : : * because _rcu_barrier() excludes CPU-hotplug operations, so it
1908 : : * cannot be running now. Thus no memory barrier is required.
1909 : : */
1910 [ + + ]: 156 : if (rdp->nxtlist != NULL) {
1911 : 48 : rsp->qlen_lazy += rdp->qlen_lazy;
1912 : 48 : rsp->qlen += rdp->qlen;
1913 : 48 : rdp->n_cbs_orphaned += rdp->qlen;
1914 : 48 : rdp->qlen_lazy = 0;
1915 : 48 : ACCESS_ONCE(rdp->qlen) = 0;
1916 : : }
1917 : :
1918 : : /*
1919 : : * Next, move those callbacks still needing a grace period to
1920 : : * the orphanage, where some other CPU will pick them up.
1921 : : * Some of the callbacks might have gone partway through a grace
1922 : : * period, but that is too bad. They get to start over because we
1923 : : * cannot assume that grace periods are synchronized across CPUs.
1924 : : * We don't bother updating the ->nxttail[] array yet, instead
1925 : : * we just reset the whole thing later on.
1926 : : */
1927 [ + + ]: 156 : if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
1928 : 42 : *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
1929 : 42 : rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
1930 : 42 : *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1931 : : }
1932 : :
1933 : : /*
1934 : : * Then move the ready-to-invoke callbacks to the orphanage,
1935 : : * where some other CPU will pick them up. These will not be
1936 : : * required to pass though another grace period: They are done.
1937 : : */
1938 [ + + ]: 156 : if (rdp->nxtlist != NULL) {
1939 : 6 : *rsp->orphan_donetail = rdp->nxtlist;
1940 : 6 : rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
1941 : : }
1942 : :
1943 : : /* Finally, initialize the rcu_data structure's list to empty. */
1944 : : init_callback_list(rdp);
1945 : : }
1946 : :
1947 : : /*
1948 : : * Adopt the RCU callbacks from the specified rcu_state structure's
1949 : : * orphanage. The caller must hold the ->orphan_lock.
1950 : : */
1951 : 156 : static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
1952 : : {
1953 : : int i;
1954 : 312 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1955 : :
1956 : : /* No-CBs CPUs are handled specially. */
1957 : : if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
1958 : 0 : return;
1959 : :
1960 : : /* Do the accounting first. */
1961 : 156 : rdp->qlen_lazy += rsp->qlen_lazy;
1962 : 156 : rdp->qlen += rsp->qlen;
1963 : 156 : rdp->n_cbs_adopted += rsp->qlen;
1964 : : if (rsp->qlen_lazy != rsp->qlen)
1965 : : rcu_idle_count_callbacks_posted();
1966 : 156 : rsp->qlen_lazy = 0;
1967 : 156 : rsp->qlen = 0;
1968 : :
1969 : : /*
1970 : : * We do not need a memory barrier here because the only way we
1971 : : * can get here if there is an rcu_barrier() in flight is if
1972 : : * we are the task doing the rcu_barrier().
1973 : : */
1974 : :
1975 : : /* First adopt the ready-to-invoke callbacks. */
1976 [ + ]: 156 : if (rsp->orphan_donelist != NULL) {
1977 : 6 : *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
1978 : 162 : *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
1979 [ + + ]: 30 : for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
1980 [ + + ]: 24 : if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1981 : 13 : rdp->nxttail[i] = rsp->orphan_donetail;
1982 : 6 : rsp->orphan_donelist = NULL;
1983 : 6 : rsp->orphan_donetail = &rsp->orphan_donelist;
1984 : : }
1985 : :
1986 : : /* And then adopt the callbacks that still need a grace period. */
1987 [ # # ]: 0 : if (rsp->orphan_nxtlist != NULL) {
1988 : 42 : *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
1989 : 42 : rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
1990 : 42 : rsp->orphan_nxtlist = NULL;
1991 : 42 : rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1992 : : }
1993 : : }
1994 : :
1995 : : /*
1996 : : * Trace the fact that this CPU is going offline.
1997 : : */
1998 : : static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1999 : : {
2000 : : RCU_TRACE(unsigned long mask);
2001 : : RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
2002 : : RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
2003 : :
2004 : : RCU_TRACE(mask = rdp->grpmask);
2005 : : trace_rcu_grace_period(rsp->name,
2006 : : rnp->gpnum + 1 - !!(rnp->qsmask & mask),
2007 : : TPS("cpuofl"));
2008 : : }
2009 : :
2010 : : /*
2011 : : * The CPU has been completely removed, and some other CPU is reporting
2012 : : * this fact from process context. Do the remainder of the cleanup,
2013 : : * including orphaning the outgoing CPU's RCU callbacks, and also
2014 : : * adopting them. There can only be one CPU hotplug operation at a time,
2015 : : * so no other CPU can be attempting to update rcu_cpu_kthread_task.
2016 : : */
2017 : 0 : static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2018 : : {
2019 : : unsigned long flags;
2020 : : unsigned long mask;
2021 : : int need_report = 0;
2022 : 156 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2023 : 156 : struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2024 : :
2025 : : /* Adjust any no-longer-needed kthreads. */
2026 : : rcu_boost_kthread_setaffinity(rnp, -1);
2027 : :
2028 : : /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
2029 : :
2030 : : /* Exclude any attempts to start a new grace period. */
2031 : 156 : mutex_lock(&rsp->onoff_mutex);
2032 : 156 : raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
2033 : :
2034 : : /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
2035 : 156 : rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
2036 : 156 : rcu_adopt_orphan_cbs(rsp, flags);
2037 : :
2038 : : /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
2039 : 156 : mask = rdp->grpmask; /* rnp->grplo is constant. */
2040 : : do {
2041 : 156 : raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2042 : : smp_mb__after_unlock_lock();
2043 : 156 : rnp->qsmaskinit &= ~mask;
2044 [ + - ]: 156 : if (rnp->qsmaskinit != 0) {
2045 [ - + ]: 156 : if (rnp != rdp->mynode)
2046 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2047 : : break;
2048 : : }
2049 [ # # ]: 0 : if (rnp == rdp->mynode)
2050 : : need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2051 : : else
2052 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2053 : 0 : mask = rnp->grpmask;
2054 : 0 : rnp = rnp->parent;
2055 [ # # ]: 0 : } while (rnp != NULL);
2056 : :
2057 : : /*
2058 : : * We still hold the leaf rcu_node structure lock here, and
2059 : : * irqs are still disabled. The reason for this subterfuge is
2060 : : * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2061 : : * held leads to deadlock.
2062 : : */
2063 : : raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2064 : 156 : rnp = rdp->mynode;
2065 : : if (need_report & RCU_OFL_TASKS_NORM_GP)
2066 : : rcu_report_unblock_qs_rnp(rnp, flags);
2067 : : else
2068 : 156 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2069 : : if (need_report & RCU_OFL_TASKS_EXP_GP)
2070 : : rcu_report_exp_rnp(rsp, rnp, true);
2071 [ + - ][ + - ]: 156 : WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
[ - + ][ # # ]
[ # # ]
2072 : : "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2073 : : cpu, rdp->qlen, rdp->nxtlist);
2074 : : init_callback_list(rdp);
2075 : : /* Disallow further callbacks on this CPU. */
2076 : 156 : rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2077 : 156 : mutex_unlock(&rsp->onoff_mutex);
2078 : 156 : }
2079 : :
2080 : : #else /* #ifdef CONFIG_HOTPLUG_CPU */
2081 : :
2082 : : static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2083 : : {
2084 : : }
2085 : :
2086 : : static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2087 : : {
2088 : : }
2089 : :
2090 : : #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
2091 : :
2092 : : /*
2093 : : * Invoke any RCU callbacks that have made it to the end of their grace
2094 : : * period. Thottle as specified by rdp->blimit.
2095 : : */
2096 : 0 : static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2097 : : {
2098 : : unsigned long flags;
2099 : : struct rcu_head *next, *list, **tail;
2100 : : long bl, count, count_lazy;
2101 : : int i;
2102 : :
2103 : : /* If no callbacks are ready, just return. */
2104 [ + + ]: 879828 : if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
2105 : : trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
2106 : : trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
2107 : : need_resched(), is_idle_task(current),
2108 : : rcu_is_callbacks_kthread());
2109 : 880053 : return;
2110 : : }
2111 : :
2112 : : /*
2113 : : * Extract the list of ready callbacks, disabling to prevent
2114 : : * races with call_rcu() from interrupt handlers.
2115 : : */
2116 : : local_irq_save(flags);
2117 [ - + ][ # # ]: 880025 : WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
[ - ]
2118 : 879847 : bl = rdp->blimit;
2119 : : trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
2120 : 879847 : list = rdp->nxtlist;
2121 : 879847 : rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
2122 : 879847 : *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2123 : 879847 : tail = rdp->nxttail[RCU_DONE_TAIL];
2124 [ + + ]: 4397031 : for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
2125 [ + + ]: 3517184 : if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
2126 : 1044238 : rdp->nxttail[i] = &rdp->nxtlist;
2127 [ - + ]: 879847 : local_irq_restore(flags);
2128 : :
2129 : : /* Invoke callbacks. */
2130 : : count = count_lazy = 0;
2131 [ + + ]: 11051653 : while (list) {
2132 : 10634508 : next = list->next;
2133 : : prefetch(next);
2134 : : debug_rcu_head_unqueue(list);
2135 [ + + ]: 10648285 : if (__rcu_reclaim(rsp->name, list))
2136 : 564664 : count_lazy++;
2137 : : list = next;
2138 : : /* Stop only if limit reached and CPU has something to do. */
2139 [ + + ][ + + ]: 10648285 : if (++count >= bl &&
2140 [ + + ]: 10565470 : (need_resched() ||
2141 : 1987631 : (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2142 : : break;
2143 : : }
2144 : :
2145 : : local_irq_save(flags);
2146 : : trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
2147 : : is_idle_task(current),
2148 : : rcu_is_callbacks_kthread());
2149 : :
2150 : : /* Update count, and requeue any remaining callbacks. */
2151 [ + + ]: 879933 : if (list != NULL) {
2152 : 454536 : *tail = rdp->nxtlist;
2153 : 454536 : rdp->nxtlist = list;
2154 [ + + ]: 941856 : for (i = 0; i < RCU_NEXT_SIZE; i++)
2155 [ + + ]: 930682 : if (&rdp->nxtlist == rdp->nxttail[i])
2156 : 487320 : rdp->nxttail[i] = tail;
2157 : : else
2158 : : break;
2159 : : }
2160 : 879933 : smp_mb(); /* List handling before counting for rcu_barrier(). */
2161 : 879480 : rdp->qlen_lazy -= count_lazy;
2162 : 879480 : ACCESS_ONCE(rdp->qlen) -= count;
2163 : 879480 : rdp->n_cbs_invoked += count;
2164 : :
2165 : : /* Reinstate batch limit if we have worked down the excess. */
2166 [ + + ][ + + ]: 879480 : if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
2167 : 33 : rdp->blimit = blimit;
2168 : :
2169 : : /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
2170 [ + + ][ + + ]: 879480 : if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
2171 : 32 : rdp->qlen_last_fqs_check = 0;
2172 : 32 : rdp->n_force_qs_snap = rsp->n_force_qs;
2173 [ + + ]: 879448 : } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
2174 : 18 : rdp->qlen_last_fqs_check = rdp->qlen;
2175 [ - + ][ # # ]: 879480 : WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
[ - ]
2176 : :
2177 [ - + ]: 879275 : local_irq_restore(flags);
2178 : :
2179 : : /* Re-invoke RCU core processing if there are callbacks remaining. */
2180 [ + + ]: 879710 : if (cpu_has_callbacks_ready_to_invoke(rdp))
2181 : 454274 : invoke_rcu_core();
2182 : : }
2183 : :
2184 : : /*
2185 : : * Check to see if this CPU is in a non-context-switch quiescent state
2186 : : * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
2187 : : * Also schedule RCU core processing.
2188 : : *
2189 : : * This function must be called from hardirq context. It is normally
2190 : : * invoked from the scheduling-clock interrupt. If rcu_pending returns
2191 : : * false, there is no point in invoking rcu_check_callbacks().
2192 : : */
2193 : 0 : void rcu_check_callbacks(int cpu, int user)
2194 : : {
2195 : 2477055 : trace_rcu_utilization(TPS("Start scheduler-tick"));
2196 : : increment_cpu_stall_ticks();
2197 [ + + ][ + + ]: 2591813 : if (user || rcu_is_cpu_rrupt_from_idle()) {
2198 : :
2199 : : /*
2200 : : * Get here if this CPU took its interrupt from user
2201 : : * mode or from the idle loop, and if this is not a
2202 : : * nested interrupt. In this case, the CPU is in
2203 : : * a quiescent state, so note it.
2204 : : *
2205 : : * No memory barrier is required here because both
2206 : : * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
2207 : : * variables that other CPUs neither access nor modify,
2208 : : * at least not while the corresponding CPU is online.
2209 : : */
2210 : :
2211 : : rcu_sched_qs(cpu);
2212 : : rcu_bh_qs(cpu);
2213 : :
2214 [ + + ]: 614178 : } else if (!in_softirq()) {
2215 : :
2216 : : /*
2217 : : * Get here if this CPU did not take its interrupt from
2218 : : * softirq, in other words, if it is not interrupting
2219 : : * a rcu_bh read-side critical section. This is an _bh
2220 : : * critical section, so note it.
2221 : : */
2222 : :
2223 : : rcu_bh_qs(cpu);
2224 : : }
2225 : : rcu_preempt_check_callbacks(cpu);
2226 [ + + ]: 2591813 : if (rcu_pending(cpu))
2227 : 1379737 : invoke_rcu_core();
2228 : 2588141 : trace_rcu_utilization(TPS("End scheduler-tick"));
2229 : 2588141 : }
2230 : :
2231 : : /*
2232 : : * Scan the leaf rcu_node structures, processing dyntick state for any that
2233 : : * have not yet encountered a quiescent state, using the function specified.
2234 : : * Also initiate boosting for any threads blocked on the root rcu_node.
2235 : : *
2236 : : * The caller must have suppressed start of new grace periods.
2237 : : */
2238 : 0 : static void force_qs_rnp(struct rcu_state *rsp,
2239 : : int (*f)(struct rcu_data *rsp, bool *isidle,
2240 : : unsigned long *maxj),
2241 : : bool *isidle, unsigned long *maxj)
2242 : : {
2243 : : unsigned long bit;
2244 : : int cpu;
2245 : : unsigned long flags;
2246 : : unsigned long mask;
2247 : : struct rcu_node *rnp;
2248 : :
2249 [ + + ]: 803520 : rcu_for_each_leaf_node(rsp, rnp) {
2250 : 401760 : cond_resched();
2251 : : mask = 0;
2252 : 401760 : raw_spin_lock_irqsave(&rnp->lock, flags);
2253 : : smp_mb__after_unlock_lock();
2254 [ - + ]: 803520 : if (!rcu_gp_in_progress(rsp)) {
2255 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2256 : 0 : return;
2257 : : }
2258 [ + + ]: 803520 : if (rnp->qsmask == 0) {
2259 : : rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
2260 : 1551 : continue;
2261 : : }
2262 : 400209 : cpu = rnp->grplo;
2263 : : bit = 1;
2264 [ + + ]: 2401254 : for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2265 [ + + ]: 2001045 : if ((rnp->qsmask & bit) != 0) {
2266 [ + + ]: 1067894 : if ((rnp->qsmaskinit & bit) != 0)
2267 : 1067838 : *isidle = 0;
2268 [ + + ]: 1067894 : if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2269 : 715720 : mask |= bit;
2270 : : }
2271 : : }
2272 [ + + ]: 400209 : if (mask != 0) {
2273 : :
2274 : : /* rcu_report_qs_rnp() releases rnp->lock. */
2275 : 353573 : rcu_report_qs_rnp(mask, rsp, rnp, flags);
2276 : 353573 : continue;
2277 : : }
2278 : 46636 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2279 : : }
2280 : : rnp = rcu_get_root(rsp);
2281 [ + + ]: 401760 : if (rnp->qsmask == 0) {
2282 : 155093 : raw_spin_lock_irqsave(&rnp->lock, flags);
2283 : : smp_mb__after_unlock_lock();
2284 : : rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2285 : : }
2286 : : }
2287 : :
2288 : : /*
2289 : : * Force quiescent states on reluctant CPUs, and also detect which
2290 : : * CPUs are in dyntick-idle mode.
2291 : : */
2292 : 0 : static void force_quiescent_state(struct rcu_state *rsp)
2293 : : {
2294 : : unsigned long flags;
2295 : : bool ret;
2296 : : struct rcu_node *rnp;
2297 : : struct rcu_node *rnp_old = NULL;
2298 : :
2299 : : /* Funnel through hierarchy to reduce memory contention. */
2300 : 0 : rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
2301 [ # # ]: 0 : for (; rnp != NULL; rnp = rnp->parent) {
2302 [ # # # # ]: 0 : ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
2303 : 0 : !raw_spin_trylock(&rnp->fqslock);
2304 [ # # ]: 0 : if (rnp_old != NULL)
2305 : : raw_spin_unlock(&rnp_old->fqslock);
2306 [ # # ]: 0 : if (ret) {
2307 : 0 : rsp->n_force_qs_lh++;
2308 : 0 : return;
2309 : : }
2310 : : rnp_old = rnp;
2311 : : }
2312 : : /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
2313 : :
2314 : : /* Reached the root of the rcu_node tree, acquire lock. */
2315 : 0 : raw_spin_lock_irqsave(&rnp_old->lock, flags);
2316 : : smp_mb__after_unlock_lock();
2317 : : raw_spin_unlock(&rnp_old->fqslock);
2318 [ # # ]: 0 : if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
2319 : 0 : rsp->n_force_qs_lh++;
2320 : 0 : raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2321 : 0 : return; /* Someone beat us to it. */
2322 : : }
2323 : 0 : rsp->gp_flags |= RCU_GP_FLAG_FQS;
2324 : 0 : raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2325 : 0 : wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
2326 : : }
2327 : :
2328 : : /*
2329 : : * This does the RCU core processing work for the specified rcu_state
2330 : : * and rcu_data structures. This may be called only from the CPU to
2331 : : * whom the rdp belongs.
2332 : : */
2333 : : static void
2334 : 0 : __rcu_process_callbacks(struct rcu_state *rsp)
2335 : : {
2336 : : unsigned long flags;
2337 : 7510644 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2338 : :
2339 [ - + ][ # # ]: 3755322 : WARN_ON_ONCE(rdp->beenonline == 0);
[ # # ]
2340 : :
2341 : : /* Update RCU state based on any recent quiescent states. */
2342 : 3755322 : rcu_check_quiescent_state(rsp, rdp);
2343 : :
2344 : : /* Does this CPU require a not-yet-started grace period? */
2345 : : local_irq_save(flags);
2346 [ + + ]: 3770177 : if (cpu_needs_another_gp(rsp, rdp)) {
2347 : 264716 : raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2348 : 264823 : rcu_start_gp(rsp);
2349 : 264823 : raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2350 : : } else {
2351 [ - + ]: 3501000 : local_irq_restore(flags);
2352 : : }
2353 : :
2354 : : /* If there are callbacks ready, invoke them. */
2355 [ + + ]: 3765161 : if (cpu_has_callbacks_ready_to_invoke(rdp))
2356 : 879580 : invoke_rcu_callbacks(rsp, rdp);
2357 : :
2358 : : /* Do any needed deferred wakeups of rcuo kthreads. */
2359 : : do_nocb_deferred_wakeup(rdp);
2360 : 3764921 : }
2361 : :
2362 : : /*
2363 : : * Do RCU core processing for the current CPU.
2364 : : */
2365 : 0 : static void rcu_process_callbacks(struct softirq_action *unused)
2366 : : {
2367 : : struct rcu_state *rsp;
2368 : :
2369 [ + ]: 1862348 : if (cpu_is_offline(smp_processor_id()))
2370 : 7223 : return;
2371 : 1876975 : trace_rcu_utilization(TPS("Start RCU core"));
2372 [ + + ]: 5624721 : for_each_rcu_flavor(rsp)
2373 : 3740523 : __rcu_process_callbacks(rsp);
2374 : 1884198 : trace_rcu_utilization(TPS("End RCU core"));
2375 : : }
2376 : :
2377 : : /*
2378 : : * Schedule RCU callback invocation. If the specified type of RCU
2379 : : * does not support RCU priority boosting, just do a direct call,
2380 : : * otherwise wake up the per-CPU kernel kthread. Note that because we
2381 : : * are running on the current CPU with interrupts disabled, the
2382 : : * rcu_cpu_kthread_task cannot disappear out from under us.
2383 : : */
2384 : 0 : static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2385 : : {
2386 [ + ]: 879664 : if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
2387 : : return;
2388 [ + - ]: 879814 : if (likely(!rsp->boost)) {
2389 : 879814 : rcu_do_batch(rsp, rdp);
2390 : 879239 : return;
2391 : : }
2392 : 0 : invoke_rcu_callbacks_kthread();
2393 : : }
2394 : :
2395 : 0 : static void invoke_rcu_core(void)
2396 : : {
2397 [ + ]: 1826489 : if (cpu_online(smp_processor_id()))
2398 : 1879075 : raise_softirq(RCU_SOFTIRQ);
2399 : 0 : }
2400 : :
2401 : : /*
2402 : : * Handle any core-RCU processing required by a call_rcu() invocation.
2403 : : */
2404 : 0 : static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2405 : : struct rcu_head *head, unsigned long flags)
2406 : : {
2407 : : /*
2408 : : * If called from an extended quiescent state, invoke the RCU
2409 : : * core in order to force a re-evaluation of RCU's idleness.
2410 : : */
2411 [ - + ][ # # ]: 10681828 : if (!rcu_is_watching() && cpu_online(smp_processor_id()))
2412 : 0 : invoke_rcu_core();
2413 : :
2414 : : /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
2415 [ + + ][ + ]: 10677698 : if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2416 : 21694 : return;
2417 : :
2418 : : /*
2419 : : * Force the grace period if too many callbacks or too long waiting.
2420 : : * Enforce hysteresis, and don't invoke force_quiescent_state()
2421 : : * if some other CPU has recently done so. Also, don't bother
2422 : : * invoking force_quiescent_state() if the newly enqueued callback
2423 : : * is the only one waiting for a grace period to complete.
2424 : : */
2425 [ + + ]: 9500900 : if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2426 : :
2427 : : /* Are we ignoring a completed grace period? */
2428 : 282 : note_gp_changes(rsp, rdp);
2429 : :
2430 : : /* Start a new grace period if one not already started. */
2431 [ + + ]: 282 : if (!rcu_gp_in_progress(rsp)) {
2432 : : struct rcu_node *rnp_root = rcu_get_root(rsp);
2433 : :
2434 : 109 : raw_spin_lock(&rnp_root->lock);
2435 : : smp_mb__after_unlock_lock();
2436 : 109 : rcu_start_gp(rsp);
2437 : : raw_spin_unlock(&rnp_root->lock);
2438 : : } else {
2439 : : /* Give the grace period a kick. */
2440 : 173 : rdp->blimit = LONG_MAX;
2441 [ - + ][ # # ]: 173 : if (rsp->n_force_qs == rdp->n_force_qs_snap &&
2442 : 0 : *rdp->nxttail[RCU_DONE_TAIL] != head)
2443 : 0 : force_quiescent_state(rsp);
2444 : 173 : rdp->n_force_qs_snap = rsp->n_force_qs;
2445 : 173 : rdp->qlen_last_fqs_check = rdp->qlen;
2446 : : }
2447 : : }
2448 : : }
2449 : :
2450 : : /*
2451 : : * RCU callback function to leak a callback.
2452 : : */
2453 : : static void rcu_leak_callback(struct rcu_head *rhp)
2454 : : {
2455 : : }
2456 : :
2457 : : /*
2458 : : * Helper function for call_rcu() and friends. The cpu argument will
2459 : : * normally be -1, indicating "currently running CPU". It may specify
2460 : : * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
2461 : : * is expected to specify a CPU.
2462 : : */
2463 : : static void
2464 : 0 : __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2465 : : struct rcu_state *rsp, int cpu, bool lazy)
2466 : : {
2467 : : unsigned long flags;
2468 : : struct rcu_data *rdp;
2469 : :
2470 [ - + ][ # # ]: 10633858 : WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
[ # # ]
2471 : : if (debug_rcu_head_queue(head)) {
2472 : : /* Probable double call_rcu(), so leak the callback. */
2473 : : ACCESS_ONCE(head->func) = rcu_leak_callback;
2474 : : WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
2475 : : return;
2476 : : }
2477 : 10633858 : head->func = func;
2478 : 10633858 : head->next = NULL;
2479 : :
2480 : : /*
2481 : : * Opportunistically note grace-period endings and beginnings.
2482 : : * Note that we might see a beginning right after we see an
2483 : : * end, but never vice versa, since this CPU has to pass through
2484 : : * a quiescent state betweentimes.
2485 : : */
2486 : : local_irq_save(flags);
2487 : 21317520 : rdp = this_cpu_ptr(rsp->rda);
2488 : :
2489 : : /* Add the callback to our list. */
2490 [ + ][ + ]: 10658760 : if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
2491 : : int offline;
2492 : :
2493 [ - ]: 0 : if (cpu != -1)
2494 : 0 : rdp = per_cpu_ptr(rsp->rda, cpu);
2495 : : offline = !__call_rcu_nocb(rdp, head, lazy, flags);
2496 [ - ][ # # ]: 0 : WARN_ON_ONCE(offline);
2497 : : /* _call_rcu() is illegal on offline CPU; leak the callback. */
2498 [ # # ]: 0 : local_irq_restore(flags);
2499 : : return;
2500 : : }
2501 : 10706583 : ACCESS_ONCE(rdp->qlen)++;
2502 [ + + ]: 10706583 : if (lazy)
2503 : 553482 : rdp->qlen_lazy++;
2504 : : else
2505 : : rcu_idle_count_callbacks_posted();
2506 : 10706583 : smp_mb(); /* Count before adding callback for rcu_barrier(). */
2507 : 10667017 : *rdp->nxttail[RCU_NEXT_TAIL] = head;
2508 : 10667017 : rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
2509 : :
2510 : : if (__is_kfree_rcu_offset((unsigned long)func))
2511 : : trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
2512 : : rdp->qlen_lazy, rdp->qlen);
2513 : : else
2514 : : trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
2515 : :
2516 : : /* Go handle any RCU core processing required. */
2517 : 10667017 : __call_rcu_core(rsp, rdp, head, flags);
2518 [ + + ]: 10694852 : local_irq_restore(flags);
2519 : : }
2520 : :
2521 : : /*
2522 : : * Queue an RCU-sched callback for invocation after a grace period.
2523 : : */
2524 : 0 : void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2525 : : {
2526 : 10117452 : __call_rcu(head, func, &rcu_sched_state, -1, 0);
2527 : 10134180 : }
2528 : : EXPORT_SYMBOL_GPL(call_rcu_sched);
2529 : :
2530 : : /*
2531 : : * Queue an RCU callback for invocation after a quicker grace period.
2532 : : */
2533 : 0 : void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2534 : : {
2535 : 0 : __call_rcu(head, func, &rcu_bh_state, -1, 0);
2536 : 0 : }
2537 : : EXPORT_SYMBOL_GPL(call_rcu_bh);
2538 : :
2539 : : /*
2540 : : * Because a context switch is a grace period for RCU-sched and RCU-bh,
2541 : : * any blocking grace-period wait automatically implies a grace period
2542 : : * if there is only one CPU online at any point time during execution
2543 : : * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
2544 : : * occasionally incorrectly indicate that there are multiple CPUs online
2545 : : * when there was in fact only one the whole time, as this just adds
2546 : : * some overhead: RCU still operates correctly.
2547 : : */
2548 : : static inline int rcu_blocking_is_gp(void)
2549 : : {
2550 : : int ret;
2551 : :
2552 : : might_sleep(); /* Check for RCU read-side critical section. */
2553 : 162 : preempt_disable();
2554 : 162 : ret = num_online_cpus() <= 1;
2555 : 162 : preempt_enable();
2556 : : return ret;
2557 : : }
2558 : :
2559 : : /**
2560 : : * synchronize_sched - wait until an rcu-sched grace period has elapsed.
2561 : : *
2562 : : * Control will return to the caller some time after a full rcu-sched
2563 : : * grace period has elapsed, in other words after all currently executing
2564 : : * rcu-sched read-side critical sections have completed. These read-side
2565 : : * critical sections are delimited by rcu_read_lock_sched() and
2566 : : * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
2567 : : * local_irq_disable(), and so on may be used in place of
2568 : : * rcu_read_lock_sched().
2569 : : *
2570 : : * This means that all preempt_disable code sequences, including NMI and
2571 : : * non-threaded hardware-interrupt handlers, in progress on entry will
2572 : : * have completed before this primitive returns. However, this does not
2573 : : * guarantee that softirq handlers will have completed, since in some
2574 : : * kernels, these handlers can run in process context, and can block.
2575 : : *
2576 : : * Note that this guarantee implies further memory-ordering guarantees.
2577 : : * On systems with more than one CPU, when synchronize_sched() returns,
2578 : : * each CPU is guaranteed to have executed a full memory barrier since the
2579 : : * end of its last RCU-sched read-side critical section whose beginning
2580 : : * preceded the call to synchronize_sched(). In addition, each CPU having
2581 : : * an RCU read-side critical section that extends beyond the return from
2582 : : * synchronize_sched() is guaranteed to have executed a full memory barrier
2583 : : * after the beginning of synchronize_sched() and before the beginning of
2584 : : * that RCU read-side critical section. Note that these guarantees include
2585 : : * CPUs that are offline, idle, or executing in user mode, as well as CPUs
2586 : : * that are executing in the kernel.
2587 : : *
2588 : : * Furthermore, if CPU A invoked synchronize_sched(), which returned
2589 : : * to its caller on CPU B, then both CPU A and CPU B are guaranteed
2590 : : * to have executed a full memory barrier during the execution of
2591 : : * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
2592 : : * again only if the system has more than one CPU).
2593 : : *
2594 : : * This primitive provides the guarantees made by the (now removed)
2595 : : * synchronize_kernel() API. In contrast, synchronize_rcu() only
2596 : : * guarantees that rcu_read_lock() sections will have completed.
2597 : : * In "classic RCU", these two guarantees happen to be one and
2598 : : * the same, but can differ in realtime RCU implementations.
2599 : : */
2600 : 0 : void synchronize_sched(void)
2601 : : {
2602 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2603 : : !lock_is_held(&rcu_lock_map) &&
2604 : : !lock_is_held(&rcu_sched_lock_map),
2605 : : "Illegal synchronize_sched() in RCU-sched read-side critical section");
2606 [ + - ]: 162 : if (rcu_blocking_is_gp())
2607 : 162 : return;
2608 [ - + ]: 162 : if (rcu_expedited)
2609 : 0 : synchronize_sched_expedited();
2610 : : else
2611 : 162 : wait_rcu_gp(call_rcu_sched);
2612 : : }
2613 : : EXPORT_SYMBOL_GPL(synchronize_sched);
2614 : :
2615 : : /**
2616 : : * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
2617 : : *
2618 : : * Control will return to the caller some time after a full rcu_bh grace
2619 : : * period has elapsed, in other words after all currently executing rcu_bh
2620 : : * read-side critical sections have completed. RCU read-side critical
2621 : : * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
2622 : : * and may be nested.
2623 : : *
2624 : : * See the description of synchronize_sched() for more detailed information
2625 : : * on memory ordering guarantees.
2626 : : */
2627 : 0 : void synchronize_rcu_bh(void)
2628 : : {
2629 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2630 : : !lock_is_held(&rcu_lock_map) &&
2631 : : !lock_is_held(&rcu_sched_lock_map),
2632 : : "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
2633 [ # # ]: 0 : if (rcu_blocking_is_gp())
2634 : 0 : return;
2635 [ # # ]: 0 : if (rcu_expedited)
2636 : : synchronize_rcu_bh_expedited();
2637 : : else
2638 : 0 : wait_rcu_gp(call_rcu_bh);
2639 : : }
2640 : : EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2641 : :
2642 : 0 : static int synchronize_sched_expedited_cpu_stop(void *data)
2643 : : {
2644 : : /*
2645 : : * There must be a full memory barrier on each affected CPU
2646 : : * between the time that try_stop_cpus() is called and the
2647 : : * time that it returns.
2648 : : *
2649 : : * In the current initial implementation of cpu_stop, the
2650 : : * above condition is already met when the control reaches
2651 : : * this point and the following smp_mb() is not strictly
2652 : : * necessary. Do smp_mb() anyway for documentation and
2653 : : * robustness against future implementation changes.
2654 : : */
2655 : 0 : smp_mb(); /* See above comment block. */
2656 : 0 : return 0;
2657 : : }
2658 : :
2659 : : /**
2660 : : * synchronize_sched_expedited - Brute-force RCU-sched grace period
2661 : : *
2662 : : * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
2663 : : * approach to force the grace period to end quickly. This consumes
2664 : : * significant time on all CPUs and is unfriendly to real-time workloads,
2665 : : * so is thus not recommended for any sort of common-case code. In fact,
2666 : : * if you are using synchronize_sched_expedited() in a loop, please
2667 : : * restructure your code to batch your updates, and then use a single
2668 : : * synchronize_sched() instead.
2669 : : *
2670 : : * Note that it is illegal to call this function while holding any lock
2671 : : * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
2672 : : * to call this function from a CPU-hotplug notifier. Failing to observe
2673 : : * these restriction will result in deadlock.
2674 : : *
2675 : : * This implementation can be thought of as an application of ticket
2676 : : * locking to RCU, with sync_sched_expedited_started and
2677 : : * sync_sched_expedited_done taking on the roles of the halves
2678 : : * of the ticket-lock word. Each task atomically increments
2679 : : * sync_sched_expedited_started upon entry, snapshotting the old value,
2680 : : * then attempts to stop all the CPUs. If this succeeds, then each
2681 : : * CPU will have executed a context switch, resulting in an RCU-sched
2682 : : * grace period. We are then done, so we use atomic_cmpxchg() to
2683 : : * update sync_sched_expedited_done to match our snapshot -- but
2684 : : * only if someone else has not already advanced past our snapshot.
2685 : : *
2686 : : * On the other hand, if try_stop_cpus() fails, we check the value
2687 : : * of sync_sched_expedited_done. If it has advanced past our
2688 : : * initial snapshot, then someone else must have forced a grace period
2689 : : * some time after we took our snapshot. In this case, our work is
2690 : : * done for us, and we can simply return. Otherwise, we try again,
2691 : : * but keep our initial snapshot for purposes of checking for someone
2692 : : * doing our work for us.
2693 : : *
2694 : : * If we fail too many times in a row, we fall back to synchronize_sched().
2695 : : */
2696 : 0 : void synchronize_sched_expedited(void)
2697 : : {
2698 : : long firstsnap, s, snap;
2699 : : int trycount = 0;
2700 : : struct rcu_state *rsp = &rcu_sched_state;
2701 : :
2702 : : /*
2703 : : * If we are in danger of counter wrap, just do synchronize_sched().
2704 : : * By allowing sync_sched_expedited_started to advance no more than
2705 : : * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
2706 : : * that more than 3.5 billion CPUs would be required to force a
2707 : : * counter wrap on a 32-bit system. Quite a few more CPUs would of
2708 : : * course be required on a 64-bit system.
2709 : : */
2710 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
2711 : : (ulong)atomic_long_read(&rsp->expedited_done) +
2712 : : ULONG_MAX / 8)) {
2713 : 0 : synchronize_sched();
2714 : : atomic_long_inc(&rsp->expedited_wrap);
2715 : : return;
2716 : : }
2717 : :
2718 : : /*
2719 : : * Take a ticket. Note that atomic_inc_return() implies a
2720 : : * full memory barrier.
2721 : : */
2722 : : snap = atomic_long_inc_return(&rsp->expedited_start);
2723 : : firstsnap = snap;
2724 : 0 : get_online_cpus();
2725 [ # # ][ # # ]: 0 : WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
[ # # ]
2726 : :
2727 : : /*
2728 : : * Each pass through the following loop attempts to force a
2729 : : * context switch on each CPU.
2730 : : */
2731 [ # # ]: 0 : while (try_stop_cpus(cpu_online_mask,
2732 : : synchronize_sched_expedited_cpu_stop,
2733 : : NULL) == -EAGAIN) {
2734 : 0 : put_online_cpus();
2735 : : atomic_long_inc(&rsp->expedited_tryfail);
2736 : :
2737 : : /* Check to see if someone else did our work for us. */
2738 : : s = atomic_long_read(&rsp->expedited_done);
2739 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2740 : : /* ensure test happens before caller kfree */
2741 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2742 : : atomic_long_inc(&rsp->expedited_workdone1);
2743 : : return;
2744 : : }
2745 : :
2746 : : /* No joy, try again later. Or just synchronize_sched(). */
2747 [ # # ]: 0 : if (trycount++ < 10) {
2748 : 0 : udelay(trycount * num_online_cpus());
2749 : : } else {
2750 : 0 : wait_rcu_gp(call_rcu_sched);
2751 : : atomic_long_inc(&rsp->expedited_normal);
2752 : : return;
2753 : : }
2754 : :
2755 : : /* Recheck to see if someone else did our work for us. */
2756 : : s = atomic_long_read(&rsp->expedited_done);
2757 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2758 : : /* ensure test happens before caller kfree */
2759 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2760 : : atomic_long_inc(&rsp->expedited_workdone2);
2761 : : return;
2762 : : }
2763 : :
2764 : : /*
2765 : : * Refetching sync_sched_expedited_started allows later
2766 : : * callers to piggyback on our grace period. We retry
2767 : : * after they started, so our grace period works for them,
2768 : : * and they started after our first try, so their grace
2769 : : * period works for us.
2770 : : */
2771 : 0 : get_online_cpus();
2772 : : snap = atomic_long_read(&rsp->expedited_start);
2773 : 0 : smp_mb(); /* ensure read is before try_stop_cpus(). */
2774 : : }
2775 : : atomic_long_inc(&rsp->expedited_stoppedcpus);
2776 : :
2777 : : /*
2778 : : * Everyone up to our most recent fetch is covered by our grace
2779 : : * period. Update the counter, but only if our work is still
2780 : : * relevant -- which it won't be if someone who started later
2781 : : * than we did already did their update.
2782 : : */
2783 : : do {
2784 : : atomic_long_inc(&rsp->expedited_done_tries);
2785 : : s = atomic_long_read(&rsp->expedited_done);
2786 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2787 : : /* ensure test happens before caller kfree */
2788 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2789 : : atomic_long_inc(&rsp->expedited_done_lost);
2790 : : break;
2791 : : }
2792 [ # # ]: 0 : } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
2793 : : atomic_long_inc(&rsp->expedited_done_exit);
2794 : :
2795 : 0 : put_online_cpus();
2796 : : }
2797 : : EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2798 : :
2799 : : /*
2800 : : * Check to see if there is any immediate RCU-related work to be done
2801 : : * by the current CPU, for the specified type of RCU, returning 1 if so.
2802 : : * The checks are in order of increasing expense: checks that can be
2803 : : * carried out against CPU-local state are performed first. However,
2804 : : * we must check for CPU stalls first, else we might not get a chance.
2805 : : */
2806 : 0 : static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2807 : : {
2808 : 3650483 : struct rcu_node *rnp = rdp->mynode;
2809 : :
2810 : 3650483 : rdp->n_rcu_pending++;
2811 : :
2812 : : /* Check for CPU stalls, if enabled. */
2813 : 3650483 : check_cpu_stall(rsp, rdp);
2814 : :
2815 : : /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
2816 : : if (rcu_nohz_full_cpu(rsp))
2817 : : return 0;
2818 : :
2819 : : /* Is the RCU core waiting for a quiescent state from this CPU? */
2820 [ + + ][ + + ]: 7382589 : if (rcu_scheduler_fully_active &&
2821 [ + + ]: 995047 : rdp->qs_pending && !rdp->passed_quiesce) {
2822 : 40074 : rdp->n_rp_qs_pending++;
2823 [ + + ][ + ]: 3692032 : } else if (rdp->qs_pending && rdp->passed_quiesce) {
2824 : 970663 : rdp->n_rp_report_qs++;
2825 : 970663 : return 1;
2826 : : }
2827 : :
2828 : : /* Does this CPU have callbacks ready to invoke? */
2829 [ + + ]: 2761443 : if (cpu_has_callbacks_ready_to_invoke(rdp)) {
2830 : 17228 : rdp->n_rp_cb_ready++;
2831 : 17228 : return 1;
2832 : : }
2833 : :
2834 : : /* Has RCU gone idle with this CPU needing another grace period? */
2835 [ + + ]: 2744215 : if (cpu_needs_another_gp(rsp, rdp)) {
2836 : 78005 : rdp->n_rp_cpu_needs_gp++;
2837 : 78005 : return 1;
2838 : : }
2839 : :
2840 : : /* Has another RCU grace period completed? */
2841 [ + + ]: 2689322 : if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
2842 : 258488 : rdp->n_rp_gp_completed++;
2843 : 258488 : return 1;
2844 : : }
2845 : :
2846 : : /* Has a new RCU grace period started? */
2847 [ + + ]: 2430834 : if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
2848 : 76569 : rdp->n_rp_gp_started++;
2849 : 76569 : return 1;
2850 : : }
2851 : :
2852 : : /* Does this CPU need a deferred NOCB wakeup? */
2853 : : if (rcu_nocb_need_deferred_wakeup(rdp)) {
2854 : : rdp->n_rp_nocb_defer_wakeup++;
2855 : : return 1;
2856 : : }
2857 : :
2858 : : /* nothing to do */
2859 : 2354265 : rdp->n_rp_need_nothing++;
2860 : 2354265 : return 0;
2861 : : }
2862 : :
2863 : : /*
2864 : : * Check to see if there is any immediate RCU-related work to be done
2865 : : * by the current CPU, returning 1 if so. This function is part of the
2866 : : * RCU implementation; it is -not- an exported member of the RCU API.
2867 : : */
2868 : 0 : static int rcu_pending(int cpu)
2869 : : {
2870 : : struct rcu_state *rsp;
2871 : :
2872 [ + + ]: 4907131 : for_each_rcu_flavor(rsp)
2873 [ + + ]: 3598258 : if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2874 : : return 1;
2875 : : return 0;
2876 : : }
2877 : :
2878 : : /*
2879 : : * Return true if the specified CPU has any callback. If all_lazy is
2880 : : * non-NULL, store an indication of whether all callbacks are lazy.
2881 : : * (If there are no callbacks, all of them are deemed to be lazy.)
2882 : : */
2883 : 0 : static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2884 : : {
2885 : : bool al = true;
2886 : : bool hc = false;
2887 : : struct rcu_data *rdp;
2888 : : struct rcu_state *rsp;
2889 : :
2890 [ + + ]: 17329846 : for_each_rcu_flavor(rsp) {
2891 : 12404311 : rdp = per_cpu_ptr(rsp->rda, cpu);
2892 [ + + ]: 12404311 : if (!rdp->nxtlist)
2893 : 9857219 : continue;
2894 : : hc = true;
2895 [ + + ][ + ]: 2547092 : if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
2896 : : al = false;
2897 : : break;
2898 : : }
2899 : : }
2900 [ # # ]: 7472921 : if (all_lazy)
2901 : 0 : *all_lazy = al;
2902 : 0 : return hc;
2903 : : }
2904 : :
2905 : : /*
2906 : : * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2907 : : * the compiler is expected to optimize this away.
2908 : : */
2909 : : static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
2910 : : int cpu, unsigned long done)
2911 : : {
2912 : : trace_rcu_barrier(rsp->name, s, cpu,
2913 : : atomic_read(&rsp->barrier_cpu_count), done);
2914 : : }
2915 : :
2916 : : /*
2917 : : * RCU callback function for _rcu_barrier(). If we are last, wake
2918 : : * up the task executing _rcu_barrier().
2919 : : */
2920 : 0 : static void rcu_barrier_callback(struct rcu_head *rhp)
2921 : : {
2922 : : struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2923 : 0 : struct rcu_state *rsp = rdp->rsp;
2924 : :
2925 [ # # ]: 0 : if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2926 : : _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2927 : 0 : complete(&rsp->barrier_completion);
2928 : : } else {
2929 : : _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2930 : : }
2931 : 0 : }
2932 : :
2933 : : /*
2934 : : * Called with preemption disabled, and from cross-cpu IRQ context.
2935 : : */
2936 : 0 : static void rcu_barrier_func(void *type)
2937 : : {
2938 : : struct rcu_state *rsp = type;
2939 : 0 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2940 : :
2941 : : _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2942 : 0 : atomic_inc(&rsp->barrier_cpu_count);
2943 : 0 : rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2944 : 0 : }
2945 : :
2946 : : /*
2947 : : * Orchestrate the specified type of RCU barrier, waiting for all
2948 : : * RCU callbacks of the specified type to complete.
2949 : : */
2950 : 0 : static void _rcu_barrier(struct rcu_state *rsp)
2951 : : {
2952 : : int cpu;
2953 : : struct rcu_data *rdp;
2954 : 0 : unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2955 : : unsigned long snap_done;
2956 : :
2957 : : _rcu_barrier_trace(rsp, "Begin", -1, snap);
2958 : :
2959 : : /* Take mutex to serialize concurrent rcu_barrier() requests. */
2960 : 0 : mutex_lock(&rsp->barrier_mutex);
2961 : :
2962 : : /*
2963 : : * Ensure that all prior references, including to ->n_barrier_done,
2964 : : * are ordered before the _rcu_barrier() machinery.
2965 : : */
2966 : 0 : smp_mb(); /* See above block comment. */
2967 : :
2968 : : /*
2969 : : * Recheck ->n_barrier_done to see if others did our work for us.
2970 : : * This means checking ->n_barrier_done for an even-to-odd-to-even
2971 : : * transition. The "if" expression below therefore rounds the old
2972 : : * value up to the next even number and adds two before comparing.
2973 : : */
2974 : 0 : snap_done = rsp->n_barrier_done;
2975 : : _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2976 : :
2977 : : /*
2978 : : * If the value in snap is odd, we needed to wait for the current
2979 : : * rcu_barrier() to complete, then wait for the next one, in other
2980 : : * words, we need the value of snap_done to be three larger than
2981 : : * the value of snap. On the other hand, if the value in snap is
2982 : : * even, we only had to wait for the next rcu_barrier() to complete,
2983 : : * in other words, we need the value of snap_done to be only two
2984 : : * greater than the value of snap. The "(snap + 3) & ~0x1" computes
2985 : : * this for us (thank you, Linus!).
2986 : : */
2987 [ # # ]: 0 : if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
2988 : : _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2989 : 0 : smp_mb(); /* caller's subsequent code after above check. */
2990 : 0 : mutex_unlock(&rsp->barrier_mutex);
2991 : 0 : return;
2992 : : }
2993 : :
2994 : : /*
2995 : : * Increment ->n_barrier_done to avoid duplicate work. Use
2996 : : * ACCESS_ONCE() to prevent the compiler from speculating
2997 : : * the increment to precede the early-exit check.
2998 : : */
2999 : 0 : ACCESS_ONCE(rsp->n_barrier_done)++;
3000 [ # # ][ # # ]: 0 : WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
[ # # ]
3001 : : _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
3002 : 0 : smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
3003 : :
3004 : : /*
3005 : : * Initialize the count to one rather than to zero in order to
3006 : : * avoid a too-soon return to zero in case of a short grace period
3007 : : * (or preemption of this task). Exclude CPU-hotplug operations
3008 : : * to ensure that no offline CPU has callbacks queued.
3009 : : */
3010 : : init_completion(&rsp->barrier_completion);
3011 : 0 : atomic_set(&rsp->barrier_cpu_count, 1);
3012 : 0 : get_online_cpus();
3013 : :
3014 : : /*
3015 : : * Force each CPU with callbacks to register a new callback.
3016 : : * When that callback is invoked, we will know that all of the
3017 : : * corresponding CPU's preceding callbacks have been invoked.
3018 : : */
3019 [ # # ]: 0 : for_each_possible_cpu(cpu) {
3020 [ # # ]: 0 : if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
3021 : 0 : continue;
3022 : 0 : rdp = per_cpu_ptr(rsp->rda, cpu);
3023 : : if (rcu_is_nocb_cpu(cpu)) {
3024 : : _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3025 : : rsp->n_barrier_done);
3026 : : atomic_inc(&rsp->barrier_cpu_count);
3027 : : __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
3028 : : rsp, cpu, 0);
3029 [ # # ]: 0 : } else if (ACCESS_ONCE(rdp->qlen)) {
3030 : : _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3031 : : rsp->n_barrier_done);
3032 : 0 : smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
3033 : : } else {
3034 : : _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
3035 : : rsp->n_barrier_done);
3036 : : }
3037 : : }
3038 : 0 : put_online_cpus();
3039 : :
3040 : : /*
3041 : : * Now that we have an rcu_barrier_callback() callback on each
3042 : : * CPU, and thus each counted, remove the initial count.
3043 : : */
3044 [ # # ]: 0 : if (atomic_dec_and_test(&rsp->barrier_cpu_count))
3045 : 0 : complete(&rsp->barrier_completion);
3046 : :
3047 : : /* Increment ->n_barrier_done to prevent duplicate work. */
3048 : 0 : smp_mb(); /* Keep increment after above mechanism. */
3049 : 0 : ACCESS_ONCE(rsp->n_barrier_done)++;
3050 [ # # ][ # # ]: 0 : WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
[ # # ]
3051 : : _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
3052 : 0 : smp_mb(); /* Keep increment before caller's subsequent code. */
3053 : :
3054 : : /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
3055 : 0 : wait_for_completion(&rsp->barrier_completion);
3056 : :
3057 : : /* Other rcu_barrier() invocations can now safely proceed. */
3058 : 0 : mutex_unlock(&rsp->barrier_mutex);
3059 : : }
3060 : :
3061 : : /**
3062 : : * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
3063 : : */
3064 : 0 : void rcu_barrier_bh(void)
3065 : : {
3066 : 0 : _rcu_barrier(&rcu_bh_state);
3067 : 0 : }
3068 : : EXPORT_SYMBOL_GPL(rcu_barrier_bh);
3069 : :
3070 : : /**
3071 : : * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
3072 : : */
3073 : 0 : void rcu_barrier_sched(void)
3074 : : {
3075 : 0 : _rcu_barrier(&rcu_sched_state);
3076 : 0 : }
3077 : : EXPORT_SYMBOL_GPL(rcu_barrier_sched);
3078 : :
3079 : : /*
3080 : : * Do boot-time initialization of a CPU's per-CPU RCU data.
3081 : : */
3082 : : static void __init
3083 : 0 : rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3084 : : {
3085 : : unsigned long flags;
3086 : 0 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3087 : : struct rcu_node *rnp = rcu_get_root(rsp);
3088 : :
3089 : : /* Set up local state, ensuring consistent view of global state. */
3090 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
3091 : 0 : rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3092 : : init_callback_list(rdp);
3093 : 0 : rdp->qlen_lazy = 0;
3094 : 0 : ACCESS_ONCE(rdp->qlen) = 0;
3095 : 0 : rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3096 [ # # ][ # # ]: 0 : WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
[ # # ]
3097 [ # # ][ # # ]: 0 : WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
[ # # ]
3098 : 0 : rdp->cpu = cpu;
3099 : 0 : rdp->rsp = rsp;
3100 : : rcu_boot_init_nocb_percpu_data(rdp);
3101 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
3102 : 0 : }
3103 : :
3104 : : /*
3105 : : * Initialize a CPU's per-CPU RCU data. Note that only one online or
3106 : : * offline event can be happening at a given time. Note also that we
3107 : : * can accept some slop in the rsp->completed access due to the fact
3108 : : * that this CPU cannot possibly have any RCU callbacks in flight yet.
3109 : : */
3110 : : static void
3111 : 0 : rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
3112 : : {
3113 : : unsigned long flags;
3114 : : unsigned long mask;
3115 : 162 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3116 : : struct rcu_node *rnp = rcu_get_root(rsp);
3117 : :
3118 : : /* Exclude new grace periods. */
3119 : 162 : mutex_lock(&rsp->onoff_mutex);
3120 : :
3121 : : /* Set up local state, ensuring consistent view of global state. */
3122 : 162 : raw_spin_lock_irqsave(&rnp->lock, flags);
3123 : 162 : rdp->beenonline = 1; /* We have now been online. */
3124 : 162 : rdp->preemptible = preemptible;
3125 : 162 : rdp->qlen_last_fqs_check = 0;
3126 : 162 : rdp->n_force_qs_snap = rsp->n_force_qs;
3127 : 162 : rdp->blimit = blimit;
3128 : : init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
3129 : 162 : rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3130 : : rcu_sysidle_init_percpu_data(rdp->dynticks);
3131 : 162 : atomic_set(&rdp->dynticks->dynticks,
3132 : : (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
3133 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
3134 : :
3135 : : /* Add CPU to rcu_node bitmasks. */
3136 : 162 : rnp = rdp->mynode;
3137 : 162 : mask = rdp->grpmask;
3138 : : do {
3139 : : /* Exclude any attempts to start a new GP on small systems. */
3140 : 162 : raw_spin_lock(&rnp->lock); /* irqs already disabled. */
3141 : 162 : rnp->qsmaskinit |= mask;
3142 : 162 : mask = rnp->grpmask;
3143 [ + - ]: 162 : if (rnp == rdp->mynode) {
3144 : : /*
3145 : : * If there is a grace period in progress, we will
3146 : : * set up to wait for it next time we run the
3147 : : * RCU core code.
3148 : : */
3149 : 162 : rdp->gpnum = rnp->completed;
3150 : 162 : rdp->completed = rnp->completed;
3151 : 162 : rdp->passed_quiesce = 0;
3152 : 162 : rdp->qs_pending = 0;
3153 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3154 : : }
3155 : : raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
3156 : 162 : rnp = rnp->parent;
3157 [ - + ][ # # ]: 324 : } while (rnp != NULL && !(rnp->qsmaskinit & mask));
3158 [ - + ]: 162 : local_irq_restore(flags);
3159 : :
3160 : 162 : mutex_unlock(&rsp->onoff_mutex);
3161 : 162 : }
3162 : :
3163 : 0 : static void rcu_prepare_cpu(int cpu)
3164 : : {
3165 : : struct rcu_state *rsp;
3166 : :
3167 [ + + ]: 243 : for_each_rcu_flavor(rsp)
3168 : 162 : rcu_init_percpu_data(cpu, rsp,
3169 : 162 : strcmp(rsp->name, "rcu_preempt") == 0);
3170 : 81 : }
3171 : :
3172 : : /*
3173 : : * Handle CPU online/offline notification events.
3174 : : */
3175 : 0 : static int rcu_cpu_notify(struct notifier_block *self,
3176 : : unsigned long action, void *hcpu)
3177 : : {
3178 : 555 : long cpu = (long)hcpu;
3179 : 555 : struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
3180 : : struct rcu_node *rnp = rdp->mynode;
3181 : : struct rcu_state *rsp;
3182 : :
3183 : 555 : trace_rcu_utilization(TPS("Start CPU hotplug"));
3184 : : switch (action) {
3185 : : case CPU_UP_PREPARE:
3186 : : case CPU_UP_PREPARE_FROZEN:
3187 : 81 : rcu_prepare_cpu(cpu);
3188 : : rcu_prepare_kthreads(cpu);
3189 : : break;
3190 : : case CPU_ONLINE:
3191 : : case CPU_DOWN_FAILED:
3192 : : rcu_boost_kthread_setaffinity(rnp, -1);
3193 : : break;
3194 : : case CPU_DOWN_PREPARE:
3195 : : rcu_boost_kthread_setaffinity(rnp, cpu);
3196 : : break;
3197 : : case CPU_DYING:
3198 : : case CPU_DYING_FROZEN:
3199 [ + + ]: 234 : for_each_rcu_flavor(rsp)
3200 : : rcu_cleanup_dying_cpu(rsp);
3201 : : break;
3202 : : case CPU_DEAD:
3203 : : case CPU_DEAD_FROZEN:
3204 : : case CPU_UP_CANCELED:
3205 : : case CPU_UP_CANCELED_FROZEN:
3206 [ + + ]: 234 : for_each_rcu_flavor(rsp)
3207 : 156 : rcu_cleanup_dead_cpu(cpu, rsp);
3208 : : break;
3209 : : default:
3210 : : break;
3211 : : }
3212 : 555 : trace_rcu_utilization(TPS("End CPU hotplug"));
3213 : 555 : return NOTIFY_OK;
3214 : : }
3215 : :
3216 : 0 : static int rcu_pm_notify(struct notifier_block *self,
3217 : : unsigned long action, void *hcpu)
3218 : : {
3219 [ # # # ]: 0 : switch (action) {
3220 : : case PM_HIBERNATION_PREPARE:
3221 : : case PM_SUSPEND_PREPARE:
3222 [ # # ]: 0 : if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
3223 : 0 : rcu_expedited = 1;
3224 : : break;
3225 : : case PM_POST_HIBERNATION:
3226 : : case PM_POST_SUSPEND:
3227 : 0 : rcu_expedited = 0;
3228 : 0 : break;
3229 : : default:
3230 : : break;
3231 : : }
3232 : 0 : return NOTIFY_OK;
3233 : : }
3234 : :
3235 : : /*
3236 : : * Spawn the kthread that handles this RCU flavor's grace periods.
3237 : : */
3238 : 0 : static int __init rcu_spawn_gp_kthread(void)
3239 : : {
3240 : : unsigned long flags;
3241 : : struct rcu_node *rnp;
3242 : : struct rcu_state *rsp;
3243 : : struct task_struct *t;
3244 : :
3245 [ # # ]: 0 : for_each_rcu_flavor(rsp) {
3246 [ # # ]: 0 : t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3247 [ # # ]: 0 : BUG_ON(IS_ERR(t));
3248 : : rnp = rcu_get_root(rsp);
3249 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
3250 : 0 : rsp->gp_kthread = t;
3251 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
3252 : : rcu_spawn_nocb_kthreads(rsp);
3253 : : }
3254 : 0 : return 0;
3255 : : }
3256 : : early_initcall(rcu_spawn_gp_kthread);
3257 : :
3258 : : /*
3259 : : * This function is invoked towards the end of the scheduler's initialization
3260 : : * process. Before this is called, the idle task might contain
3261 : : * RCU read-side critical sections (during which time, this idle
3262 : : * task is booting the system). After this function is called, the
3263 : : * idle tasks are prohibited from containing RCU read-side critical
3264 : : * sections. This function also enables RCU lockdep checking.
3265 : : */
3266 : 0 : void rcu_scheduler_starting(void)
3267 : : {
3268 [ # # ]: 0 : WARN_ON(num_online_cpus() != 1);
3269 [ # # ]: 0 : WARN_ON(nr_context_switches() > 0);
3270 : 0 : rcu_scheduler_active = 1;
3271 : 0 : }
3272 : :
3273 : : /*
3274 : : * Compute the per-level fanout, either using the exact fanout specified
3275 : : * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
3276 : : */
3277 : : #ifdef CONFIG_RCU_FANOUT_EXACT
3278 : : static void __init rcu_init_levelspread(struct rcu_state *rsp)
3279 : : {
3280 : : int i;
3281 : :
3282 : : rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
3283 : : for (i = rcu_num_lvls - 2; i >= 0; i--)
3284 : : rsp->levelspread[i] = CONFIG_RCU_FANOUT;
3285 : : }
3286 : : #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
3287 : 0 : static void __init rcu_init_levelspread(struct rcu_state *rsp)
3288 : : {
3289 : : int ccur;
3290 : : int cprv;
3291 : : int i;
3292 : :
3293 : 0 : cprv = nr_cpu_ids;
3294 [ # # ]: 0 : for (i = rcu_num_lvls - 1; i >= 0; i--) {
3295 : 0 : ccur = rsp->levelcnt[i];
3296 : 0 : rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
3297 : : cprv = ccur;
3298 : : }
3299 : 0 : }
3300 : : #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
3301 : :
3302 : : /*
3303 : : * Helper function for rcu_init() that initializes one rcu_state structure.
3304 : : */
3305 : 0 : static void __init rcu_init_one(struct rcu_state *rsp,
3306 : : struct rcu_data __percpu *rda)
3307 : : {
3308 : : static char *buf[] = { "rcu_node_0",
3309 : : "rcu_node_1",
3310 : : "rcu_node_2",
3311 : : "rcu_node_3" }; /* Match MAX_RCU_LVLS */
3312 : : static char *fqs[] = { "rcu_node_fqs_0",
3313 : : "rcu_node_fqs_1",
3314 : : "rcu_node_fqs_2",
3315 : : "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
3316 : : int cpustride = 1;
3317 : : int i;
3318 : : int j;
3319 : : struct rcu_node *rnp;
3320 : :
3321 : : BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
3322 : :
3323 : : /* Silence gcc 4.8 warning about array index out of range. */
3324 [ # # ]: 0 : if (rcu_num_lvls > RCU_NUM_LVLS)
3325 : 0 : panic("rcu_init_one: rcu_num_lvls overflow");
3326 : :
3327 : : /* Initialize the level-tracking arrays. */
3328 : :
3329 [ # # ]: 0 : for (i = 0; i < rcu_num_lvls; i++)
3330 : 0 : rsp->levelcnt[i] = num_rcu_lvl[i];
3331 [ # # ]: 0 : for (i = 1; i < rcu_num_lvls; i++)
3332 : 0 : rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
3333 : 0 : rcu_init_levelspread(rsp);
3334 : :
3335 : : /* Initialize the elements themselves, starting from the leaves. */
3336 : :
3337 [ # # ]: 0 : for (i = rcu_num_lvls - 1; i >= 0; i--) {
3338 : 0 : cpustride *= rsp->levelspread[i];
3339 : 0 : rnp = rsp->level[i];
3340 [ # # ]: 0 : for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
3341 : 0 : raw_spin_lock_init(&rnp->lock);
3342 : : lockdep_set_class_and_name(&rnp->lock,
3343 : : &rcu_node_class[i], buf[i]);
3344 : 0 : raw_spin_lock_init(&rnp->fqslock);
3345 : : lockdep_set_class_and_name(&rnp->fqslock,
3346 : : &rcu_fqs_class[i], fqs[i]);
3347 : 0 : rnp->gpnum = rsp->gpnum;
3348 : 0 : rnp->completed = rsp->completed;
3349 : 0 : rnp->qsmask = 0;
3350 : 0 : rnp->qsmaskinit = 0;
3351 : 0 : rnp->grplo = j * cpustride;
3352 : 0 : rnp->grphi = (j + 1) * cpustride - 1;
3353 [ # # ]: 0 : if (rnp->grphi >= NR_CPUS)
3354 : 0 : rnp->grphi = NR_CPUS - 1;
3355 [ # # ]: 0 : if (i == 0) {
3356 : 0 : rnp->grpnum = 0;
3357 : 0 : rnp->grpmask = 0;
3358 : 0 : rnp->parent = NULL;
3359 : : } else {
3360 : 0 : rnp->grpnum = j % rsp->levelspread[i - 1];
3361 : 0 : rnp->grpmask = 1UL << rnp->grpnum;
3362 : 0 : rnp->parent = rsp->level[i - 1] +
3363 : 0 : j / rsp->levelspread[i - 1];
3364 : : }
3365 : 0 : rnp->level = i;
3366 : 0 : INIT_LIST_HEAD(&rnp->blkd_tasks);
3367 : : rcu_init_one_nocb(rnp);
3368 : : }
3369 : : }
3370 : :
3371 : 0 : rsp->rda = rda;
3372 : 0 : init_waitqueue_head(&rsp->gp_wq);
3373 : : init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3374 : 0 : rnp = rsp->level[rcu_num_lvls - 1];
3375 [ # # ]: 0 : for_each_possible_cpu(i) {
3376 [ # # ]: 0 : while (i > rnp->grphi)
3377 : 0 : rnp++;
3378 : 0 : per_cpu_ptr(rsp->rda, i)->mynode = rnp;
3379 : 0 : rcu_boot_init_percpu_data(i, rsp);
3380 : : }
3381 : 0 : list_add(&rsp->flavors, &rcu_struct_flavors);
3382 : 0 : }
3383 : :
3384 : : /*
3385 : : * Compute the rcu_node tree geometry from kernel parameters. This cannot
3386 : : * replace the definitions in tree.h because those are needed to size
3387 : : * the ->node array in the rcu_state structure.
3388 : : */
3389 : 0 : static void __init rcu_init_geometry(void)
3390 : : {
3391 : : ulong d;
3392 : : int i;
3393 : : int j;
3394 : 0 : int n = nr_cpu_ids;
3395 : : int rcu_capacity[MAX_RCU_LVLS + 1];
3396 : :
3397 : : /*
3398 : : * Initialize any unspecified boot parameters.
3399 : : * The default values of jiffies_till_first_fqs and
3400 : : * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
3401 : : * value, which is a function of HZ, then adding one for each
3402 : : * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
3403 : : */
3404 : 0 : d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3405 [ # # ]: 0 : if (jiffies_till_first_fqs == ULONG_MAX)
3406 : 0 : jiffies_till_first_fqs = d;
3407 [ # # ]: 0 : if (jiffies_till_next_fqs == ULONG_MAX)
3408 : 0 : jiffies_till_next_fqs = d;
3409 : :
3410 : : /* If the compile-time values are accurate, just leave. */
3411 [ # # ][ # # ]: 0 : if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3412 : : nr_cpu_ids == NR_CPUS)
3413 : 0 : return;
3414 : 0 : pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
3415 : : rcu_fanout_leaf, nr_cpu_ids);
3416 : :
3417 : : /*
3418 : : * Compute number of nodes that can be handled an rcu_node tree
3419 : : * with the given number of levels. Setting rcu_capacity[0] makes
3420 : : * some of the arithmetic easier.
3421 : : */
3422 : 0 : rcu_capacity[0] = 1;
3423 : 0 : rcu_capacity[1] = rcu_fanout_leaf;
3424 [ # # ]: 0 : for (i = 2; i <= MAX_RCU_LVLS; i++)
3425 : 0 : rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
3426 : :
3427 : : /*
3428 : : * The boot-time rcu_fanout_leaf parameter is only permitted
3429 : : * to increase the leaf-level fanout, not decrease it. Of course,
3430 : : * the leaf-level fanout cannot exceed the number of bits in
3431 : : * the rcu_node masks. Finally, the tree must be able to accommodate
3432 : : * the configured number of CPUs. Complain and fall back to the
3433 : : * compile-time values if these limits are exceeded.
3434 : : */
3435 [ # # ]: 0 : if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
3436 [ # # ]: 0 : rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
3437 : 0 : n > rcu_capacity[MAX_RCU_LVLS]) {
3438 : 0 : WARN_ON(1);
3439 : 0 : return;
3440 : : }
3441 : :
3442 : : /* Calculate the number of rcu_nodes at each level of the tree. */
3443 [ # # ]: 0 : for (i = 1; i <= MAX_RCU_LVLS; i++)
3444 [ # # ]: 0 : if (n <= rcu_capacity[i]) {
3445 [ # # ]: 0 : for (j = 0; j <= i; j++)
3446 : 0 : num_rcu_lvl[j] =
3447 : 0 : DIV_ROUND_UP(n, rcu_capacity[i - j]);
3448 : 0 : rcu_num_lvls = i;
3449 [ # # ]: 0 : for (j = i + 1; j <= MAX_RCU_LVLS; j++)
3450 : 0 : num_rcu_lvl[j] = 0;
3451 : : break;
3452 : : }
3453 : :
3454 : : /* Calculate the total number of rcu_node structures. */
3455 : 0 : rcu_num_nodes = 0;
3456 [ # # ]: 0 : for (i = 0; i <= MAX_RCU_LVLS; i++)
3457 : 0 : rcu_num_nodes += num_rcu_lvl[i];
3458 : 0 : rcu_num_nodes -= n;
3459 : : }
3460 : :
3461 : 0 : void __init rcu_init(void)
3462 : : {
3463 : : int cpu;
3464 : :
3465 : 0 : rcu_bootup_announce();
3466 : 0 : rcu_init_geometry();
3467 : 0 : rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3468 : 0 : rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3469 : : __rcu_init_preempt();
3470 : 0 : open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3471 : :
3472 : : /*
3473 : : * We don't need protection against CPU-hotplug here because
3474 : : * this is called early in boot, before either interrupts
3475 : : * or the scheduler are operational.
3476 : : */
3477 : 0 : cpu_notifier(rcu_cpu_notify, 0);
3478 : 0 : pm_notifier(rcu_pm_notify, 0);
3479 [ # # ]: 0 : for_each_online_cpu(cpu)
3480 : 0 : rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3481 : 0 : }
3482 : :
3483 : : #include "tree_plugin.h"
|