Branch data Line data Source code
1 : : /*
2 : : * Read-Copy Update mechanism for mutual exclusion
3 : : *
4 : : * This program is free software; you can redistribute it and/or modify
5 : : * it under the terms of the GNU General Public License as published by
6 : : * the Free Software Foundation; either version 2 of the License, or
7 : : * (at your option) any later version.
8 : : *
9 : : * This program is distributed in the hope that it will be useful,
10 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : : * GNU General Public License for more details.
13 : : *
14 : : * You should have received a copy of the GNU General Public License
15 : : * along with this program; if not, write to the Free Software
16 : : * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 : : *
18 : : * Copyright IBM Corporation, 2008
19 : : *
20 : : * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 : : * Manfred Spraul <manfred@colorfullife.com>
22 : : * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 : : *
24 : : * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 : : * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 : : *
27 : : * For detailed explanation of Read-Copy Update mechanism see -
28 : : * Documentation/RCU
29 : : */
30 : : #include <linux/types.h>
31 : : #include <linux/kernel.h>
32 : : #include <linux/init.h>
33 : : #include <linux/spinlock.h>
34 : : #include <linux/smp.h>
35 : : #include <linux/rcupdate.h>
36 : : #include <linux/interrupt.h>
37 : : #include <linux/sched.h>
38 : : #include <linux/nmi.h>
39 : : #include <linux/atomic.h>
40 : : #include <linux/bitops.h>
41 : : #include <linux/export.h>
42 : : #include <linux/completion.h>
43 : : #include <linux/moduleparam.h>
44 : : #include <linux/module.h>
45 : : #include <linux/percpu.h>
46 : : #include <linux/notifier.h>
47 : : #include <linux/cpu.h>
48 : : #include <linux/mutex.h>
49 : : #include <linux/time.h>
50 : : #include <linux/kernel_stat.h>
51 : : #include <linux/wait.h>
52 : : #include <linux/kthread.h>
53 : : #include <linux/prefetch.h>
54 : : #include <linux/delay.h>
55 : : #include <linux/stop_machine.h>
56 : : #include <linux/random.h>
57 : : #include <linux/ftrace_event.h>
58 : : #include <linux/suspend.h>
59 : :
60 : : #include "tree.h"
61 : : #include <trace/events/rcu.h>
62 : :
63 : : #include "rcu.h"
64 : :
65 : : MODULE_ALIAS("rcutree");
66 : : #ifdef MODULE_PARAM_PREFIX
67 : : #undef MODULE_PARAM_PREFIX
68 : : #endif
69 : : #define MODULE_PARAM_PREFIX "rcutree."
70 : :
71 : : /* Data structures. */
72 : :
73 : : static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
74 : : static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
75 : :
76 : : /*
77 : : * In order to export the rcu_state name to the tracing tools, it
78 : : * needs to be added in the __tracepoint_string section.
79 : : * This requires defining a separate variable tp_<sname>_varname
80 : : * that points to the string being used, and this will allow
81 : : * the tracing userspace tools to be able to decipher the string
82 : : * address to the matching string.
83 : : */
84 : : #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
85 : : static char sname##_varname[] = #sname; \
86 : : static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
87 : : struct rcu_state sname##_state = { \
88 : : .level = { &sname##_state.node[0] }, \
89 : : .call = cr, \
90 : : .fqs_state = RCU_GP_IDLE, \
91 : : .gpnum = 0UL - 300UL, \
92 : : .completed = 0UL - 300UL, \
93 : : .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
94 : : .orphan_nxttail = &sname##_state.orphan_nxtlist, \
95 : : .orphan_donetail = &sname##_state.orphan_donelist, \
96 : : .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
97 : : .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
98 : : .name = sname##_varname, \
99 : : .abbr = sabbr, \
100 : : }; \
101 : : DEFINE_PER_CPU(struct rcu_data, sname##_data)
102 : :
103 : : RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
104 : : RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
105 : :
106 : : static struct rcu_state *rcu_state;
107 : : LIST_HEAD(rcu_struct_flavors);
108 : :
109 : : /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
110 : : static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
111 : : module_param(rcu_fanout_leaf, int, 0444);
112 : : int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
113 : : static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
114 : : NUM_RCU_LVL_0,
115 : : NUM_RCU_LVL_1,
116 : : NUM_RCU_LVL_2,
117 : : NUM_RCU_LVL_3,
118 : : NUM_RCU_LVL_4,
119 : : };
120 : : int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
121 : :
122 : : /*
123 : : * The rcu_scheduler_active variable transitions from zero to one just
124 : : * before the first task is spawned. So when this variable is zero, RCU
125 : : * can assume that there is but one task, allowing RCU to (for example)
126 : : * optimize synchronize_sched() to a simple barrier(). When this variable
127 : : * is one, RCU must actually do all the hard work required to detect real
128 : : * grace periods. This variable is also used to suppress boot-time false
129 : : * positives from lockdep-RCU error checking.
130 : : */
131 : : int rcu_scheduler_active __read_mostly;
132 : : EXPORT_SYMBOL_GPL(rcu_scheduler_active);
133 : :
134 : : /*
135 : : * The rcu_scheduler_fully_active variable transitions from zero to one
136 : : * during the early_initcall() processing, which is after the scheduler
137 : : * is capable of creating new tasks. So RCU processing (for example,
138 : : * creating tasks for RCU priority boosting) must be delayed until after
139 : : * rcu_scheduler_fully_active transitions from zero to one. We also
140 : : * currently delay invocation of any RCU callbacks until after this point.
141 : : *
142 : : * It might later prove better for people registering RCU callbacks during
143 : : * early boot to take responsibility for these callbacks, but one step at
144 : : * a time.
145 : : */
146 : : static int rcu_scheduler_fully_active __read_mostly;
147 : :
148 : : #ifdef CONFIG_RCU_BOOST
149 : :
150 : : /*
151 : : * Control variables for per-CPU and per-rcu_node kthreads. These
152 : : * handle all flavors of RCU.
153 : : */
154 : : static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
155 : : DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
156 : : DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
157 : : DEFINE_PER_CPU(char, rcu_cpu_has_work);
158 : :
159 : : #endif /* #ifdef CONFIG_RCU_BOOST */
160 : :
161 : : static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
162 : : static void invoke_rcu_core(void);
163 : : static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
164 : :
165 : : /*
166 : : * Track the rcutorture test sequence number and the update version
167 : : * number within a given test. The rcutorture_testseq is incremented
168 : : * on every rcutorture module load and unload, so has an odd value
169 : : * when a test is running. The rcutorture_vernum is set to zero
170 : : * when rcutorture starts and is incremented on each rcutorture update.
171 : : * These variables enable correlating rcutorture output with the
172 : : * RCU tracing information.
173 : : */
174 : : unsigned long rcutorture_testseq;
175 : : unsigned long rcutorture_vernum;
176 : :
177 : : /*
178 : : * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
179 : : * permit this function to be invoked without holding the root rcu_node
180 : : * structure's ->lock, but of course results can be subject to change.
181 : : */
182 : : static int rcu_gp_in_progress(struct rcu_state *rsp)
183 : : {
184 : 34780909 : return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
185 : : }
186 : :
187 : : /*
188 : : * Note a quiescent state. Because we do not need to know
189 : : * how many quiescent states passed, just if there was at least
190 : : * one since the start of the grace period, this just sets a flag.
191 : : * The caller must have disabled preemption.
192 : : */
193 : 0 : void rcu_sched_qs(int cpu)
194 : : {
195 : 126208637 : struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
196 : :
197 : : if (rdp->passed_quiesce == 0)
198 : : trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
199 : 126208637 : rdp->passed_quiesce = 1;
200 : 0 : }
201 : :
202 : 0 : void rcu_bh_qs(int cpu)
203 : : {
204 : 18938975 : struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
205 : :
206 : : if (rdp->passed_quiesce == 0)
207 : : trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
208 : 7846758 : rdp->passed_quiesce = 1;
209 : 11092217 : }
210 : :
211 : : /*
212 : : * Note a context switch. This is a quiescent state for RCU-sched,
213 : : * and requires special handling for preemptible RCU.
214 : : * The caller must have disabled preemption.
215 : : */
216 : 0 : void rcu_note_context_switch(int cpu)
217 : : {
218 : 118817074 : trace_rcu_utilization(TPS("Start context switch"));
219 : : rcu_sched_qs(cpu);
220 : : rcu_preempt_note_context_switch(cpu);
221 : 118771914 : trace_rcu_utilization(TPS("End context switch"));
222 : 118771914 : }
223 : : EXPORT_SYMBOL_GPL(rcu_note_context_switch);
224 : :
225 : : static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
226 : : .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
227 : : .dynticks = ATOMIC_INIT(1),
228 : : #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
229 : : .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
230 : : .dynticks_idle = ATOMIC_INIT(1),
231 : : #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
232 : : };
233 : :
234 : : static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
235 : : static long qhimark = 10000; /* If this many pending, ignore blimit. */
236 : : static long qlowmark = 100; /* Once only this many pending, use blimit. */
237 : :
238 : : module_param(blimit, long, 0444);
239 : : module_param(qhimark, long, 0444);
240 : : module_param(qlowmark, long, 0444);
241 : :
242 : : static ulong jiffies_till_first_fqs = ULONG_MAX;
243 : : static ulong jiffies_till_next_fqs = ULONG_MAX;
244 : :
245 : : module_param(jiffies_till_first_fqs, ulong, 0644);
246 : : module_param(jiffies_till_next_fqs, ulong, 0644);
247 : :
248 : : static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
249 : : struct rcu_data *rdp);
250 : : static void force_qs_rnp(struct rcu_state *rsp,
251 : : int (*f)(struct rcu_data *rsp, bool *isidle,
252 : : unsigned long *maxj),
253 : : bool *isidle, unsigned long *maxj);
254 : : static void force_quiescent_state(struct rcu_state *rsp);
255 : : static int rcu_pending(int cpu);
256 : :
257 : : /*
258 : : * Return the number of RCU-sched batches processed thus far for debug & stats.
259 : : */
260 : 0 : long rcu_batches_completed_sched(void)
261 : : {
262 : 0 : return rcu_sched_state.completed;
263 : : }
264 : : EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
265 : :
266 : : /*
267 : : * Return the number of RCU BH batches processed thus far for debug & stats.
268 : : */
269 : 0 : long rcu_batches_completed_bh(void)
270 : : {
271 : 0 : return rcu_bh_state.completed;
272 : : }
273 : : EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
274 : :
275 : : /*
276 : : * Force a quiescent state for RCU BH.
277 : : */
278 : 0 : void rcu_bh_force_quiescent_state(void)
279 : : {
280 : 0 : force_quiescent_state(&rcu_bh_state);
281 : 0 : }
282 : : EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
283 : :
284 : : /*
285 : : * Record the number of times rcutorture tests have been initiated and
286 : : * terminated. This information allows the debugfs tracing stats to be
287 : : * correlated to the rcutorture messages, even when the rcutorture module
288 : : * is being repeatedly loaded and unloaded. In other words, we cannot
289 : : * store this state in rcutorture itself.
290 : : */
291 : 0 : void rcutorture_record_test_transition(void)
292 : : {
293 : 0 : rcutorture_testseq++;
294 : 0 : rcutorture_vernum = 0;
295 : 0 : }
296 : : EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
297 : :
298 : : /*
299 : : * Record the number of writer passes through the current rcutorture test.
300 : : * This is also used to correlate debugfs tracing stats with the rcutorture
301 : : * messages.
302 : : */
303 : 0 : void rcutorture_record_progress(unsigned long vernum)
304 : : {
305 : 0 : rcutorture_vernum++;
306 : 0 : }
307 : : EXPORT_SYMBOL_GPL(rcutorture_record_progress);
308 : :
309 : : /*
310 : : * Force a quiescent state for RCU-sched.
311 : : */
312 : 0 : void rcu_sched_force_quiescent_state(void)
313 : : {
314 : 0 : force_quiescent_state(&rcu_sched_state);
315 : 0 : }
316 : : EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
317 : :
318 : : /*
319 : : * Does the CPU have callbacks ready to be invoked?
320 : : */
321 : : static int
322 : : cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
323 : : {
324 [ + + ][ + ]: 25087409 : return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
[ + + ][ + ]
[ + + ][ + + ]
[ + + ][ + + ]
325 : : rdp->nxttail[RCU_DONE_TAIL] != NULL;
326 : : }
327 : :
328 : : /*
329 : : * Does the current CPU require a not-yet-started grace period?
330 : : * The caller must have disabled interrupts to prevent races with
331 : : * normal callback registry.
332 : : */
333 : : static int
334 : 0 : cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
335 : : {
336 : : int i;
337 : :
338 [ + + ]: 18898027 : if (rcu_gp_in_progress(rsp))
339 : : return 0; /* No, a grace period is already in progress. */
340 : : if (rcu_nocb_needs_gp(rsp))
341 : : return 1; /* Yes, a no-CBs CPU needs one. */
342 [ + ]: 17027845 : if (!rdp->nxttail[RCU_NEXT_TAIL])
343 : : return 0; /* No, this is a no-CBs (or offline) CPU. */
344 [ + + ]: 17028639 : if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
345 : : return 1; /* Yes, this CPU has newly registered callbacks. */
346 [ + + ]: 49247572 : for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
347 [ + + ][ + + ]: 33091191 : if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
348 : 784133 : ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
349 : : rdp->nxtcompleted[i]))
350 : : return 1; /* Yes, CBs for future grace period. */
351 : : return 0; /* No grace period needed. */
352 : : }
353 : :
354 : : /*
355 : : * Return the root node of the specified rcu_state structure.
356 : : */
357 : : static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
358 : : {
359 : : return &rsp->node[0];
360 : : }
361 : :
362 : : /*
363 : : * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
364 : : *
365 : : * If the new value of the ->dynticks_nesting counter now is zero,
366 : : * we really have entered idle, and must do the appropriate accounting.
367 : : * The caller must have disabled interrupts.
368 : : */
369 : 0 : static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
370 : : bool user)
371 : : {
372 : : trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
373 [ + + ][ - + ]: 7837505 : if (!user && !is_idle_task(current)) {
374 : 0 : struct task_struct *idle __maybe_unused =
375 : 0 : idle_task(smp_processor_id());
376 : :
377 : : trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
378 : 0 : ftrace_dump(DUMP_ORIG);
379 [ # # ][ # # ]: 0 : WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
380 : : current->pid, current->comm,
381 : : idle->pid, idle->comm); /* must be idle task! */
382 : : }
383 : : rcu_prepare_for_idle(smp_processor_id());
384 : : /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
385 : 7837505 : smp_mb__before_atomic_inc(); /* See above. */
386 : 7837522 : atomic_inc(&rdtp->dynticks);
387 : 7837496 : smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
388 [ - + ][ # # ]: 7837502 : WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
[ # # ]
389 : :
390 : : /*
391 : : * It is illegal to enter an extended quiescent state while
392 : : * in an RCU read-side critical section.
393 : : */
394 : : rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
395 : : "Illegal idle entry in RCU read-side critical section.");
396 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
397 : : "Illegal idle entry in RCU-bh read-side critical section.");
398 : : rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
399 : : "Illegal idle entry in RCU-sched read-side critical section.");
400 : 7837502 : }
401 : :
402 : : /*
403 : : * Enter an RCU extended quiescent state, which can be either the
404 : : * idle loop or adaptive-tickless usermode execution.
405 : : */
406 : 0 : static void rcu_eqs_enter(bool user)
407 : : {
408 : : long long oldval;
409 : : struct rcu_dynticks *rdtp;
410 : :
411 : 11275724 : rdtp = this_cpu_ptr(&rcu_dynticks);
412 : 5637862 : oldval = rdtp->dynticks_nesting;
413 [ - + ][ # # ]: 5637862 : WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
[ - ]
414 [ + - ]: 5637820 : if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
415 : 5637820 : rdtp->dynticks_nesting = 0;
416 : : else
417 : 0 : rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
418 : 5637820 : rcu_eqs_enter_common(rdtp, oldval, user);
419 : 5637829 : }
420 : :
421 : : /**
422 : : * rcu_idle_enter - inform RCU that current CPU is entering idle
423 : : *
424 : : * Enter idle mode, in other words, -leave- the mode in which RCU
425 : : * read-side critical sections can occur. (Though RCU read-side
426 : : * critical sections can occur in irq handlers in idle, a possibility
427 : : * handled by irq_enter() and irq_exit().)
428 : : *
429 : : * We crowbar the ->dynticks_nesting field to zero to allow for
430 : : * the possibility of usermode upcalls having messed up our count
431 : : * of interrupt nesting level during the prior busy period.
432 : : */
433 : 0 : void rcu_idle_enter(void)
434 : : {
435 : : unsigned long flags;
436 : :
437 : : local_irq_save(flags);
438 : 5637794 : rcu_eqs_enter(false);
439 : 5637783 : rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
440 [ + - ]: 5637783 : local_irq_restore(flags);
441 : 5637836 : }
442 : : EXPORT_SYMBOL_GPL(rcu_idle_enter);
443 : :
444 : : #ifdef CONFIG_RCU_USER_QS
445 : : /**
446 : : * rcu_user_enter - inform RCU that we are resuming userspace.
447 : : *
448 : : * Enter RCU idle mode right before resuming userspace. No use of RCU
449 : : * is permitted between this call and rcu_user_exit(). This way the
450 : : * CPU doesn't need to maintain the tick for RCU maintenance purposes
451 : : * when the CPU runs in userspace.
452 : : */
453 : : void rcu_user_enter(void)
454 : : {
455 : : rcu_eqs_enter(1);
456 : : }
457 : : #endif /* CONFIG_RCU_USER_QS */
458 : :
459 : : /**
460 : : * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
461 : : *
462 : : * Exit from an interrupt handler, which might possibly result in entering
463 : : * idle mode, in other words, leaving the mode in which read-side critical
464 : : * sections can occur.
465 : : *
466 : : * This code assumes that the idle loop never does anything that might
467 : : * result in unbalanced calls to irq_enter() and irq_exit(). If your
468 : : * architecture violates this assumption, RCU will give you what you
469 : : * deserve, good and hard. But very infrequently and irreproducibly.
470 : : *
471 : : * Use things like work queues to work around this limitation.
472 : : *
473 : : * You have been warned.
474 : : */
475 : 0 : void rcu_irq_exit(void)
476 : : {
477 : : unsigned long flags;
478 : : long long oldval;
479 : : struct rcu_dynticks *rdtp;
480 : :
481 : : local_irq_save(flags);
482 : 21820500 : rdtp = this_cpu_ptr(&rcu_dynticks);
483 : : oldval = rdtp->dynticks_nesting;
484 : 10910250 : rdtp->dynticks_nesting--;
485 [ - + ][ # # ]: 10910250 : WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
[ - + ]
486 [ + + ]: 10911250 : if (rdtp->dynticks_nesting)
487 : : trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
488 : : else
489 : 2199693 : rcu_eqs_enter_common(rdtp, oldval, true);
490 : : rcu_sysidle_enter(rdtp, 1);
491 [ + - ]: 10911264 : local_irq_restore(flags);
492 : 10911312 : }
493 : :
494 : : /*
495 : : * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
496 : : *
497 : : * If the new value of the ->dynticks_nesting counter was previously zero,
498 : : * we really have exited idle, and must do the appropriate accounting.
499 : : * The caller must have disabled interrupts.
500 : : */
501 : 0 : static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
502 : : int user)
503 : : {
504 : 7837362 : smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
505 : 7837347 : atomic_inc(&rdtp->dynticks);
506 : : /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
507 : 7837098 : smp_mb__after_atomic_inc(); /* See above. */
508 [ - + ][ # # ]: 7837292 : WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
[ - ]
509 : : rcu_cleanup_after_idle(smp_processor_id());
510 : : trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
511 [ + + ][ - + ]: 7837246 : if (!user && !is_idle_task(current)) {
512 : 0 : struct task_struct *idle __maybe_unused =
513 : 0 : idle_task(smp_processor_id());
514 : :
515 : : trace_rcu_dyntick(TPS("Error on exit: not idle task"),
516 : : oldval, rdtp->dynticks_nesting);
517 : 0 : ftrace_dump(DUMP_ORIG);
518 [ # # ][ # # ]: 0 : WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
519 : : current->pid, current->comm,
520 : : idle->pid, idle->comm); /* must be idle task! */
521 : : }
522 : 7837246 : }
523 : :
524 : : /*
525 : : * Exit an RCU extended quiescent state, which can be either the
526 : : * idle loop or adaptive-tickless usermode execution.
527 : : */
528 : 0 : static void rcu_eqs_exit(bool user)
529 : : {
530 : : struct rcu_dynticks *rdtp;
531 : : long long oldval;
532 : :
533 : 11275768 : rdtp = this_cpu_ptr(&rcu_dynticks);
534 : 5637884 : oldval = rdtp->dynticks_nesting;
535 [ - + ][ # # ]: 5637884 : WARN_ON_ONCE(oldval < 0);
[ - + ]
536 [ - + ]: 5637893 : if (oldval & DYNTICK_TASK_NEST_MASK)
537 : 0 : rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
538 : : else
539 : 5637893 : rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
540 : 5637893 : rcu_eqs_exit_common(rdtp, oldval, user);
541 : 5637894 : }
542 : :
543 : : /**
544 : : * rcu_idle_exit - inform RCU that current CPU is leaving idle
545 : : *
546 : : * Exit idle mode, in other words, -enter- the mode in which RCU
547 : : * read-side critical sections can occur.
548 : : *
549 : : * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
550 : : * allow for the possibility of usermode upcalls messing up our count
551 : : * of interrupt nesting level during the busy period that is just
552 : : * now starting.
553 : : */
554 : 0 : void rcu_idle_exit(void)
555 : : {
556 : : unsigned long flags;
557 : :
558 : : local_irq_save(flags);
559 : 5637865 : rcu_eqs_exit(false);
560 : 5637892 : rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
561 [ - + ]: 5637892 : local_irq_restore(flags);
562 : 5637897 : }
563 : : EXPORT_SYMBOL_GPL(rcu_idle_exit);
564 : :
565 : : #ifdef CONFIG_RCU_USER_QS
566 : : /**
567 : : * rcu_user_exit - inform RCU that we are exiting userspace.
568 : : *
569 : : * Exit RCU idle mode while entering the kernel because it can
570 : : * run a RCU read side critical section anytime.
571 : : */
572 : : void rcu_user_exit(void)
573 : : {
574 : : rcu_eqs_exit(1);
575 : : }
576 : : #endif /* CONFIG_RCU_USER_QS */
577 : :
578 : : /**
579 : : * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
580 : : *
581 : : * Enter an interrupt handler, which might possibly result in exiting
582 : : * idle mode, in other words, entering the mode in which read-side critical
583 : : * sections can occur.
584 : : *
585 : : * Note that the Linux kernel is fully capable of entering an interrupt
586 : : * handler that it never exits, for example when doing upcalls to
587 : : * user mode! This code assumes that the idle loop never does upcalls to
588 : : * user mode. If your architecture does do upcalls from the idle loop (or
589 : : * does anything else that results in unbalanced calls to the irq_enter()
590 : : * and irq_exit() functions), RCU will give you what you deserve, good
591 : : * and hard. But very infrequently and irreproducibly.
592 : : *
593 : : * Use things like work queues to work around this limitation.
594 : : *
595 : : * You have been warned.
596 : : */
597 : 0 : void rcu_irq_enter(void)
598 : : {
599 : : unsigned long flags;
600 : : struct rcu_dynticks *rdtp;
601 : : long long oldval;
602 : :
603 : : local_irq_save(flags);
604 : 21793324 : rdtp = this_cpu_ptr(&rcu_dynticks);
605 : 10896662 : oldval = rdtp->dynticks_nesting;
606 : 10896662 : rdtp->dynticks_nesting++;
607 [ - + ][ # # ]: 10896662 : WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
[ - + ]
608 [ + + ]: 10907254 : if (oldval)
609 : : trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
610 : : else
611 : 2199416 : rcu_eqs_exit_common(rdtp, oldval, true);
612 : : rcu_sysidle_exit(rdtp, 1);
613 [ + - ]: 10907228 : local_irq_restore(flags);
614 : 10903311 : }
615 : :
616 : : /**
617 : : * rcu_nmi_enter - inform RCU of entry to NMI context
618 : : *
619 : : * If the CPU was idle with dynamic ticks active, and there is no
620 : : * irq handler running, this updates rdtp->dynticks_nmi to let the
621 : : * RCU grace-period handling know that the CPU is active.
622 : : */
623 : 0 : void rcu_nmi_enter(void)
624 : : {
625 : 0 : struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
626 : :
627 [ # # ][ # # ]: 0 : if (rdtp->dynticks_nmi_nesting == 0 &&
628 : 0 : (atomic_read(&rdtp->dynticks) & 0x1))
629 : 0 : return;
630 : 0 : rdtp->dynticks_nmi_nesting++;
631 : 0 : smp_mb__before_atomic_inc(); /* Force delay from prior write. */
632 : 0 : atomic_inc(&rdtp->dynticks);
633 : : /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
634 : 0 : smp_mb__after_atomic_inc(); /* See above. */
635 [ # # ][ # # ]: 0 : WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
[ # # ]
636 : : }
637 : :
638 : : /**
639 : : * rcu_nmi_exit - inform RCU of exit from NMI context
640 : : *
641 : : * If the CPU was idle with dynamic ticks active, and there is no
642 : : * irq handler running, this updates rdtp->dynticks_nmi to let the
643 : : * RCU grace-period handling know that the CPU is no longer active.
644 : : */
645 : 0 : void rcu_nmi_exit(void)
646 : : {
647 : 0 : struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
648 : :
649 [ # # ][ # # ]: 0 : if (rdtp->dynticks_nmi_nesting == 0 ||
650 : 0 : --rdtp->dynticks_nmi_nesting != 0)
651 : 0 : return;
652 : : /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
653 : 0 : smp_mb__before_atomic_inc(); /* See above. */
654 : 0 : atomic_inc(&rdtp->dynticks);
655 : 0 : smp_mb__after_atomic_inc(); /* Force delay to next write. */
656 [ # # ][ # # ]: 0 : WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
[ # # ]
657 : : }
658 : :
659 : : /**
660 : : * __rcu_is_watching - are RCU read-side critical sections safe?
661 : : *
662 : : * Return true if RCU is watching the running CPU, which means that
663 : : * this CPU can safely enter RCU read-side critical sections. Unlike
664 : : * rcu_is_watching(), the caller of __rcu_is_watching() must have at
665 : : * least disabled preemption.
666 : : */
667 : 0 : bool notrace __rcu_is_watching(void)
668 : : {
669 : 31931304 : return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
670 : : }
671 : :
672 : : /**
673 : : * rcu_is_watching - see if RCU thinks that the current CPU is idle
674 : : *
675 : : * If the current CPU is in its idle loop and is neither in an interrupt
676 : : * or NMI handler, return true.
677 : : */
678 : 0 : bool notrace rcu_is_watching(void)
679 : : {
680 : : int ret;
681 : :
682 : 15965663 : preempt_disable();
683 : : ret = __rcu_is_watching();
684 : 15965652 : preempt_enable();
685 : 0 : return ret;
686 : : }
687 : : EXPORT_SYMBOL_GPL(rcu_is_watching);
688 : :
689 : : #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
690 : :
691 : : /*
692 : : * Is the current CPU online? Disable preemption to avoid false positives
693 : : * that could otherwise happen due to the current CPU number being sampled,
694 : : * this task being preempted, its old CPU being taken offline, resuming
695 : : * on some other CPU, then determining that its old CPU is now offline.
696 : : * It is OK to use RCU on an offline processor during initial boot, hence
697 : : * the check for rcu_scheduler_fully_active. Note also that it is OK
698 : : * for a CPU coming online to use RCU for one jiffy prior to marking itself
699 : : * online in the cpu_online_mask. Similarly, it is OK for a CPU going
700 : : * offline to continue to use RCU for one jiffy after marking itself
701 : : * offline in the cpu_online_mask. This leniency is necessary given the
702 : : * non-atomic nature of the online and offline processing, for example,
703 : : * the fact that a CPU enters the scheduler after completing the CPU_DYING
704 : : * notifiers.
705 : : *
706 : : * This is also why RCU internally marks CPUs online during the
707 : : * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
708 : : *
709 : : * Disable checking if in an NMI handler because we cannot safely report
710 : : * errors from NMI handlers anyway.
711 : : */
712 : : bool rcu_lockdep_current_cpu_online(void)
713 : : {
714 : : struct rcu_data *rdp;
715 : : struct rcu_node *rnp;
716 : : bool ret;
717 : :
718 : : if (in_nmi())
719 : : return 1;
720 : : preempt_disable();
721 : : rdp = this_cpu_ptr(&rcu_sched_data);
722 : : rnp = rdp->mynode;
723 : : ret = (rdp->grpmask & rnp->qsmaskinit) ||
724 : : !rcu_scheduler_fully_active;
725 : : preempt_enable();
726 : : return ret;
727 : : }
728 : : EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
729 : :
730 : : #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
731 : :
732 : : /**
733 : : * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
734 : : *
735 : : * If the current CPU is idle or running at a first-level (not nested)
736 : : * interrupt from idle, return true. The caller must have at least
737 : : * disabled preemption.
738 : : */
739 : : static int rcu_is_cpu_rrupt_from_idle(void)
740 : : {
741 : 2415686 : return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
742 : : }
743 : :
744 : : /*
745 : : * Snapshot the specified CPU's dynticks counter so that we can later
746 : : * credit them with an implicit quiescent state. Return 1 if this CPU
747 : : * is in dynticks idle mode, which is an extended quiescent state.
748 : : */
749 : 0 : static int dyntick_save_progress_counter(struct rcu_data *rdp,
750 : : bool *isidle, unsigned long *maxj)
751 : : {
752 : 372722 : rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
753 : : rcu_sysidle_check_cpu(rdp, isidle, maxj);
754 : 186361 : return (rdp->dynticks_snap & 0x1) == 0;
755 : : }
756 : :
757 : : /*
758 : : * Return true if the specified CPU has passed through a quiescent
759 : : * state by virtue of being in or having passed through an dynticks
760 : : * idle state since the last call to dyntick_save_progress_counter()
761 : : * for this same CPU, or by virtue of having been offline.
762 : : */
763 : 0 : static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
764 : : bool *isidle, unsigned long *maxj)
765 : : {
766 : : unsigned int curr;
767 : : unsigned int snap;
768 : :
769 : 112882 : curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
770 : 56441 : snap = (unsigned int)rdp->dynticks_snap;
771 : :
772 : : /*
773 : : * If the CPU passed through or entered a dynticks idle phase with
774 : : * no active irq/NMI handlers, then we can safely pretend that the CPU
775 : : * already acknowledged the request to pass through a quiescent
776 : : * state. Either way, that CPU cannot possibly be in an RCU
777 : : * read-side critical section that started before the beginning
778 : : * of the current RCU grace period.
779 : : */
780 [ + + ][ + + ]: 56441 : if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
781 : : trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
782 : 13429 : rdp->dynticks_fqs++;
783 : 13429 : return 1;
784 : : }
785 : :
786 : : /*
787 : : * Check for the CPU being offline, but only if the grace period
788 : : * is old enough. We don't need to worry about the CPU changing
789 : : * state: If we see it offline even once, it has been through a
790 : : * quiescent state.
791 : : *
792 : : * The reason for insisting that the grace period be at least
793 : : * one jiffy old is that CPUs that are not quite online and that
794 : : * have just gone offline can still execute RCU read-side critical
795 : : * sections.
796 : : */
797 [ + + ]: 43012 : if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
798 : : return 0; /* Grace period is not old enough. */
799 : 29190 : barrier();
800 [ - + ]: 29190 : if (cpu_is_offline(rdp->cpu)) {
801 : : trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
802 : 0 : rdp->offline_fqs++;
803 : 0 : return 1;
804 : : }
805 : :
806 : : /*
807 : : * There is a possibility that a CPU in adaptive-ticks state
808 : : * might run in the kernel with the scheduling-clock tick disabled
809 : : * for an extended time period. Invoke rcu_kick_nohz_cpu() to
810 : : * force the CPU to restart the scheduling-clock tick in this
811 : : * CPU is in this state.
812 : : */
813 : : rcu_kick_nohz_cpu(rdp->cpu);
814 : :
815 : : return 0;
816 : : }
817 : :
818 : : static void record_gp_stall_check_time(struct rcu_state *rsp)
819 : : {
820 : 202052 : unsigned long j = ACCESS_ONCE(jiffies);
821 : :
822 : 202052 : rsp->gp_start = j;
823 : 202052 : smp_wmb(); /* Record start time before stall time. */
824 : 202052 : rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
825 : : }
826 : :
827 : : /*
828 : : * Dump stacks of all tasks running on stalled CPUs. This is a fallback
829 : : * for architectures that do not implement trigger_all_cpu_backtrace().
830 : : * The NMI-triggered stack traces are more accurate because they are
831 : : * printed by the target CPU.
832 : : */
833 : : static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
834 : : {
835 : : int cpu;
836 : : unsigned long flags;
837 : : struct rcu_node *rnp;
838 : :
839 : : rcu_for_each_leaf_node(rsp, rnp) {
840 : : raw_spin_lock_irqsave(&rnp->lock, flags);
841 : : if (rnp->qsmask != 0) {
842 : : for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
843 : : if (rnp->qsmask & (1UL << cpu))
844 : : dump_cpu_task(rnp->grplo + cpu);
845 : : }
846 : : raw_spin_unlock_irqrestore(&rnp->lock, flags);
847 : : }
848 : : }
849 : :
850 : 0 : static void print_other_cpu_stall(struct rcu_state *rsp)
851 : : {
852 : : int cpu;
853 : : long delta;
854 : : unsigned long flags;
855 : : int ndetected = 0;
856 : : struct rcu_node *rnp = rcu_get_root(rsp);
857 : : long totqlen = 0;
858 : :
859 : : /* Only let one CPU complain about others per time interval. */
860 : :
861 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
862 : 0 : delta = jiffies - rsp->jiffies_stall;
863 [ # # ][ # # ]: 0 : if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
864 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
865 : 0 : return;
866 : : }
867 : 0 : rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
868 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
869 : :
870 : : /*
871 : : * OK, time to rat on our buddy...
872 : : * See Documentation/RCU/stallwarn.txt for info on how to debug
873 : : * RCU CPU stall warnings.
874 : : */
875 : 0 : pr_err("INFO: %s detected stalls on CPUs/tasks:",
876 : : rsp->name);
877 : : print_cpu_stall_info_begin();
878 [ # # ]: 0 : rcu_for_each_leaf_node(rsp, rnp) {
879 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
880 : : ndetected += rcu_print_task_stall(rnp);
881 [ # # ]: 0 : if (rnp->qsmask != 0) {
882 [ # # ]: 0 : for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
883 [ # # ]: 0 : if (rnp->qsmask & (1UL << cpu)) {
884 : 0 : print_cpu_stall_info(rsp,
885 : : rnp->grplo + cpu);
886 : 0 : ndetected++;
887 : : }
888 : : }
889 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
890 : : }
891 : :
892 : : /*
893 : : * Now rat on any tasks that got kicked up to the root rcu_node
894 : : * due to CPU offlining.
895 : : */
896 : : rnp = rcu_get_root(rsp);
897 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
898 : : ndetected += rcu_print_task_stall(rnp);
899 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
900 : :
901 : : print_cpu_stall_info_end();
902 [ # # ]: 0 : for_each_possible_cpu(cpu)
903 : 0 : totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
904 : 0 : pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
905 : : smp_processor_id(), (long)(jiffies - rsp->gp_start),
906 : : rsp->gpnum, rsp->completed, totqlen);
907 [ # # ]: 0 : if (ndetected == 0)
908 : 0 : pr_err("INFO: Stall ended before state dump start\n");
909 : : else if (!trigger_all_cpu_backtrace())
910 : : rcu_dump_cpu_stacks(rsp);
911 : :
912 : : /* Complain about tasks blocking the grace period. */
913 : :
914 : : rcu_print_detail_task_stall(rsp);
915 : :
916 : 0 : force_quiescent_state(rsp); /* Kick them all. */
917 : : }
918 : :
919 : : /*
920 : : * This function really isn't for public consumption, but RCU is special in
921 : : * that context switches can allow the state machine to make progress.
922 : : */
923 : : extern void resched_cpu(int cpu);
924 : :
925 : 0 : static void print_cpu_stall(struct rcu_state *rsp)
926 : : {
927 : : int cpu;
928 : : unsigned long flags;
929 : : struct rcu_node *rnp = rcu_get_root(rsp);
930 : : long totqlen = 0;
931 : :
932 : : /*
933 : : * OK, time to rat on ourselves...
934 : : * See Documentation/RCU/stallwarn.txt for info on how to debug
935 : : * RCU CPU stall warnings.
936 : : */
937 : 0 : pr_err("INFO: %s self-detected stall on CPU", rsp->name);
938 : : print_cpu_stall_info_begin();
939 : 0 : print_cpu_stall_info(rsp, smp_processor_id());
940 : : print_cpu_stall_info_end();
941 [ # # ]: 0 : for_each_possible_cpu(cpu)
942 : 0 : totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
943 : 0 : pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
944 : : jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
945 : : if (!trigger_all_cpu_backtrace())
946 : : dump_stack();
947 : :
948 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
949 [ # # ]: 0 : if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
950 : 0 : rsp->jiffies_stall = jiffies +
951 : 0 : 3 * rcu_jiffies_till_stall_check() + 3;
952 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
953 : :
954 : : /*
955 : : * Attempt to revive the RCU machinery by forcing a context switch.
956 : : *
957 : : * A context switch would normally allow the RCU state machine to make
958 : : * progress and it could be we're stuck in kernel space without context
959 : : * switches for an entirely unreasonable amount of time.
960 : : */
961 : 0 : resched_cpu(smp_processor_id());
962 : 0 : }
963 : :
964 : 14993191 : static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
965 : : {
966 : : unsigned long completed;
967 : : unsigned long gpnum;
968 : : unsigned long gps;
969 : : unsigned long j;
970 : : unsigned long js;
971 : : struct rcu_node *rnp;
972 : :
973 [ + + ][ + + ]: 14993191 : if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
974 : : return;
975 : 785708 : j = ACCESS_ONCE(jiffies);
976 : :
977 : : /*
978 : : * Lots of memory barriers to reject false positives.
979 : : *
980 : : * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall,
981 : : * then rsp->gp_start, and finally rsp->completed. These values
982 : : * are updated in the opposite order with memory barriers (or
983 : : * equivalent) during grace-period initialization and cleanup.
984 : : * Now, a false positive can occur if we get an new value of
985 : : * rsp->gp_start and a old value of rsp->jiffies_stall. But given
986 : : * the memory barriers, the only way that this can happen is if one
987 : : * grace period ends and another starts between these two fetches.
988 : : * Detect this by comparing rsp->completed with the previous fetch
989 : : * from rsp->gpnum.
990 : : *
991 : : * Given this check, comparisons of jiffies, rsp->jiffies_stall,
992 : : * and rsp->gp_start suffice to forestall false positives.
993 : : */
994 : 785708 : gpnum = ACCESS_ONCE(rsp->gpnum);
995 : 785708 : smp_rmb(); /* Pick up ->gpnum first... */
996 : 785282 : js = ACCESS_ONCE(rsp->jiffies_stall);
997 : 785282 : smp_rmb(); /* ...then ->jiffies_stall before the rest... */
998 : 785380 : gps = ACCESS_ONCE(rsp->gp_start);
999 : 785380 : smp_rmb(); /* ...and finally ->gp_start before ->completed. */
1000 : 785397 : completed = ACCESS_ONCE(rsp->completed);
1001 [ + ][ - + ]: 785397 : if (ULONG_CMP_GE(completed, gpnum) ||
1002 [ # # ]: 0 : ULONG_CMP_LT(j, js) ||
1003 : 0 : ULONG_CMP_GE(gps, js))
1004 : : return; /* No stall or GP completed since entering function. */
1005 : 0 : rnp = rdp->mynode;
1006 [ # # ][ # # ]: 0 : if (rcu_gp_in_progress(rsp) &&
1007 : 0 : (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
1008 : :
1009 : : /* We haven't checked in, so go dump stack. */
1010 : 0 : print_cpu_stall(rsp);
1011 : :
1012 [ # # ][ # # ]: 0 : } else if (rcu_gp_in_progress(rsp) &&
1013 : 0 : ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
1014 : :
1015 : : /* They had a few time units to dump stack, so complain. */
1016 : 0 : print_other_cpu_stall(rsp);
1017 : : }
1018 : : }
1019 : :
1020 : : /**
1021 : : * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
1022 : : *
1023 : : * Set the stall-warning timeout way off into the future, thus preventing
1024 : : * any RCU CPU stall-warning messages from appearing in the current set of
1025 : : * RCU grace periods.
1026 : : *
1027 : : * The caller must disable hard irqs.
1028 : : */
1029 : 0 : void rcu_cpu_stall_reset(void)
1030 : : {
1031 : : struct rcu_state *rsp;
1032 : :
1033 [ # # ]: 0 : for_each_rcu_flavor(rsp)
1034 : 0 : rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
1035 : 0 : }
1036 : :
1037 : : /*
1038 : : * Initialize the specified rcu_data structure's callback list to empty.
1039 : : */
1040 : : static void init_callback_list(struct rcu_data *rdp)
1041 : : {
1042 : : int i;
1043 : :
1044 : : if (init_nocb_callback_list(rdp))
1045 : : return;
1046 : 0 : rdp->nxtlist = NULL;
1047 [ # # ][ # # ]: 0 : for (i = 0; i < RCU_NEXT_SIZE; i++)
[ # # ][ # # ]
1048 : 0 : rdp->nxttail[i] = &rdp->nxtlist;
1049 : : }
1050 : :
1051 : : /*
1052 : : * Determine the value that ->completed will have at the end of the
1053 : : * next subsequent grace period. This is used to tag callbacks so that
1054 : : * a CPU can invoke callbacks in a timely fashion even if that CPU has
1055 : : * been dyntick-idle for an extended period with callbacks under the
1056 : : * influence of RCU_FAST_NO_HZ.
1057 : : *
1058 : : * The caller must hold rnp->lock with interrupts disabled.
1059 : : */
1060 : : static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1061 : : struct rcu_node *rnp)
1062 : : {
1063 : : /*
1064 : : * If RCU is idle, we just wait for the next grace period.
1065 : : * But we can only be sure that RCU is idle if we are looking
1066 : : * at the root rcu_node structure -- otherwise, a new grace
1067 : : * period might have started, but just not yet gotten around
1068 : : * to initializing the current non-root rcu_node structure.
1069 : : */
1070 [ + - ][ + + ]: 5060814 : if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
[ + - ][ + + ]
[ + + ][ + ]
1071 : 2463136 : return rnp->completed + 1;
1072 : :
1073 : : /*
1074 : : * Otherwise, wait for a possible partial grace period and
1075 : : * then the subsequent full grace period.
1076 : : */
1077 : 1175288 : return rnp->completed + 2;
1078 : : }
1079 : :
1080 : : /*
1081 : : * Trace-event helper function for rcu_start_future_gp() and
1082 : : * rcu_nocb_wait_gp().
1083 : : */
1084 : : static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1085 : : unsigned long c, const char *s)
1086 : : {
1087 : : trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1088 : : rnp->completed, c, rnp->level,
1089 : : rnp->grplo, rnp->grphi, s);
1090 : : }
1091 : :
1092 : : /*
1093 : : * Start some future grace period, as needed to handle newly arrived
1094 : : * callbacks. The required future grace periods are recorded in each
1095 : : * rcu_node structure's ->need_future_gp field.
1096 : : *
1097 : : * The caller must hold the specified rcu_node structure's ->lock.
1098 : : */
1099 : : static unsigned long __maybe_unused
1100 : 0 : rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1101 : : {
1102 : : unsigned long c;
1103 : : int i;
1104 : 1212701 : struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1105 : :
1106 : : /*
1107 : : * Pick up grace-period number for new callbacks. If this
1108 : : * grace period is already marked as needed, return to the caller.
1109 : : */
1110 : : c = rcu_cbs_completed(rdp->rsp, rnp);
1111 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1112 [ + + ]: 1212701 : if (rnp->need_future_gp[c & 0x1]) {
1113 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1114 : : return c;
1115 : : }
1116 : :
1117 : : /*
1118 : : * If either this rcu_node structure or the root rcu_node structure
1119 : : * believe that a grace period is in progress, then we must wait
1120 : : * for the one following, which is in "c". Because our request
1121 : : * will be noticed at the end of the current grace period, we don't
1122 : : * need to explicitly start one.
1123 : : */
1124 [ + + ][ - + ]: 202052 : if (rnp->gpnum != rnp->completed ||
1125 : 321 : ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1126 : 201731 : rnp->need_future_gp[c & 0x1]++;
1127 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1128 : 201731 : return c;
1129 : : }
1130 : :
1131 : : /*
1132 : : * There might be no grace period in progress. If we don't already
1133 : : * hold it, acquire the root rcu_node structure's lock in order to
1134 : : * start one (if needed).
1135 : : */
1136 [ - + ]: 321 : if (rnp != rnp_root)
1137 : 0 : raw_spin_lock(&rnp_root->lock);
1138 : :
1139 : : /*
1140 : : * Get a new grace-period number. If there really is no grace
1141 : : * period in progress, it will be smaller than the one we obtained
1142 : : * earlier. Adjust callbacks as needed. Note that even no-CBs
1143 : : * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
1144 : : */
1145 : 1213022 : c = rcu_cbs_completed(rdp->rsp, rnp_root);
1146 [ + + ]: 1284 : for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
1147 [ - + ]: 963 : if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
1148 : 0 : rdp->nxtcompleted[i] = c;
1149 : :
1150 : : /*
1151 : : * If the needed for the required grace period is already
1152 : : * recorded, trace and leave.
1153 : : */
1154 [ + - ]: 321 : if (rnp_root->need_future_gp[c & 0x1]) {
1155 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
1156 : : goto unlock_out;
1157 : : }
1158 : :
1159 : : /* Record the need for the future grace period. */
1160 : 321 : rnp_root->need_future_gp[c & 0x1]++;
1161 : :
1162 : : /* If a grace period is not already in progress, start one. */
1163 [ + - ]: 321 : if (rnp_root->gpnum != rnp_root->completed) {
1164 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1165 : : } else {
1166 : : trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1167 : 321 : rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1168 : : }
1169 : : unlock_out:
1170 [ - + ]: 321 : if (rnp != rnp_root)
1171 : : raw_spin_unlock(&rnp_root->lock);
1172 : 321 : return c;
1173 : : }
1174 : :
1175 : : /*
1176 : : * Clean up any old requests for the just-ended grace period. Also return
1177 : : * whether any additional grace periods have been requested. Also invoke
1178 : : * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
1179 : : * waiting for this grace period to complete.
1180 : : */
1181 : : static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1182 : : {
1183 : 202052 : int c = rnp->completed;
1184 : : int needmore;
1185 : 202052 : struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1186 : :
1187 : : rcu_nocb_gp_cleanup(rsp, rnp);
1188 : 202052 : rnp->need_future_gp[c & 0x1] = 0;
1189 : : needmore = rnp->need_future_gp[(c + 1) & 0x1];
1190 : : trace_rcu_future_gp(rnp, rdp, c,
1191 : : needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1192 : : return needmore;
1193 : : }
1194 : :
1195 : : /*
1196 : : * If there is room, assign a ->completed number to any callbacks on
1197 : : * this CPU that have not already been assigned. Also accelerate any
1198 : : * callbacks that were previously assigned a ->completed number that has
1199 : : * since proven to be too conservative, which can happen if callbacks get
1200 : : * assigned a ->completed number while RCU is idle, but with reference to
1201 : : * a non-root rcu_node structure. This function is idempotent, so it does
1202 : : * not hurt to call it repeatedly.
1203 : : *
1204 : : * The caller must hold rnp->lock with interrupts disabled.
1205 : : */
1206 : 0 : static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1207 : : struct rcu_data *rdp)
1208 : : {
1209 : : unsigned long c;
1210 : : int i;
1211 : :
1212 : : /* If the CPU has no callbacks, nothing to do. */
1213 [ + - ][ + ]: 1422390 : if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1214 : : return;
1215 : :
1216 : : /*
1217 : : * Starting from the sublist containing the callbacks most
1218 : : * recently assigned a ->completed number and working down, find the
1219 : : * first sublist that is not assignable to an upcoming grace period.
1220 : : * Such a sublist has something in it (first two tests) and has
1221 : : * a ->completed number assigned that will complete sooner than
1222 : : * the ->completed number for newly arrived callbacks (last test).
1223 : : *
1224 : : * The key point is that any later sublist can be assigned the
1225 : : * same ->completed number as the newly arrived callbacks, which
1226 : : * means that the callbacks in any of these later sublist can be
1227 : : * grouped into a single sublist, whether or not they have already
1228 : : * been assigned a ->completed number.
1229 : : */
1230 : : c = rcu_cbs_completed(rsp, rnp);
1231 [ + + ]: 3100887 : for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1232 [ + + ][ + + ]: 2425402 : if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1233 : 1238453 : !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1234 : : break;
1235 : :
1236 : : /*
1237 : : * If there are no sublist for unassigned callbacks, leave.
1238 : : * At the same time, advance "i" one sublist, so that "i" will
1239 : : * index into the sublist where all the remaining callbacks should
1240 : : * be grouped into.
1241 : : */
1242 [ + - ]: 1212701 : if (++i >= RCU_NEXT_TAIL)
1243 : : return;
1244 : :
1245 : : /*
1246 : : * Assign all subsequent callbacks' ->completed number to the next
1247 : : * full grace period and group them all in the sublist initially
1248 : : * indexed by "i".
1249 : : */
1250 [ + + ]: 4313588 : for (; i <= RCU_NEXT_TAIL; i++) {
1251 : 3100887 : rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1252 : 3100887 : rdp->nxtcompleted[i] = c;
1253 : : }
1254 : : /* Record any needed additional grace periods. */
1255 : 1212701 : rcu_start_future_gp(rnp, rdp);
1256 : :
1257 : : /* Trace depending on how much we were able to accelerate. */
1258 : : if (!*rdp->nxttail[RCU_WAIT_TAIL])
1259 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1260 : : else
1261 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1262 : : }
1263 : :
1264 : : /*
1265 : : * Move any callbacks whose grace period has completed to the
1266 : : * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1267 : : * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1268 : : * sublist. This function is idempotent, so it does not hurt to
1269 : : * invoke it repeatedly. As long as it is not invoked -too- often...
1270 : : *
1271 : : * The caller must hold rnp->lock with interrupts disabled.
1272 : : */
1273 : 0 : static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1274 : : struct rcu_data *rdp)
1275 : : {
1276 : : int i, j;
1277 : :
1278 : : /* If the CPU has no callbacks, nothing to do. */
1279 [ + - ][ + ]: 940429 : if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1280 : 0 : return;
1281 : :
1282 : : /*
1283 : : * Find all callbacks whose ->completed numbers indicate that they
1284 : : * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1285 : : */
1286 [ + + ]: 1803548 : for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1287 [ + + ]: 1053549 : if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1288 : : break;
1289 : 313729 : rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1290 : : }
1291 : : /* Clean up any sublist tail pointers that were misordered above. */
1292 [ + + ]: 1063728 : for (j = RCU_WAIT_TAIL; j < i; j++)
1293 : 313729 : rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1294 : :
1295 : : /* Copy down callbacks to fill in empty sublists. */
1296 [ + + ]: 1032239 : for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1297 [ + + ]: 756381 : if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1298 : : break;
1299 : 282240 : rdp->nxttail[j] = rdp->nxttail[i];
1300 : 282240 : rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1301 : : }
1302 : :
1303 : : /* Classify any remaining callbacks. */
1304 : 749999 : rcu_accelerate_cbs(rsp, rnp, rdp);
1305 : : }
1306 : :
1307 : : /*
1308 : : * Update CPU-local rcu_data state to record the beginnings and ends of
1309 : : * grace periods. The caller must hold the ->lock of the leaf rcu_node
1310 : : * structure corresponding to the current CPU, and must have irqs disabled.
1311 : : */
1312 : 0 : static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1313 : : {
1314 : : /* Handle the ends of any preceding grace periods first. */
1315 [ + + ]: 696237 : if (rdp->completed == rnp->completed) {
1316 : :
1317 : : /* No grace period end, so just accelerate recent callbacks. */
1318 : 293678 : rcu_accelerate_cbs(rsp, rnp, rdp);
1319 : :
1320 : : } else {
1321 : :
1322 : : /* Advance callbacks. */
1323 : 402559 : rcu_advance_cbs(rsp, rnp, rdp);
1324 : :
1325 : : /* Remember that we saw this grace-period completion. */
1326 : 402559 : rdp->completed = rnp->completed;
1327 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1328 : : }
1329 : :
1330 [ + + ]: 1392474 : if (rdp->gpnum != rnp->gpnum) {
1331 : : /*
1332 : : * If the current grace period is waiting for this CPU,
1333 : : * set up to detect a quiescent state, otherwise don't
1334 : : * go looking for one.
1335 : : */
1336 : 402101 : rdp->gpnum = rnp->gpnum;
1337 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1338 : 402101 : rdp->passed_quiesce = 0;
1339 : 402101 : rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1340 : : zero_cpu_stall_ticks(rdp);
1341 : : }
1342 : 696237 : }
1343 : :
1344 : 0 : static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1345 : : {
1346 : : unsigned long flags;
1347 : : struct rcu_node *rnp;
1348 : :
1349 : : local_irq_save(flags);
1350 : 3798900 : rnp = rdp->mynode;
1351 [ + + ][ + + ]: 3798900 : if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1352 [ + + ]: 295078 : rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
1353 : 295430 : !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1354 [ + + ]: 3506415 : local_irq_restore(flags);
1355 : 3800544 : return;
1356 : : }
1357 : 292133 : __note_gp_changes(rsp, rnp, rdp);
1358 : 292133 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1359 : : }
1360 : :
1361 : : /*
1362 : : * Initialize a new grace period. Return 0 if no grace period required.
1363 : : */
1364 : 0 : static int rcu_gp_init(struct rcu_state *rsp)
1365 : : {
1366 : : struct rcu_data *rdp;
1367 : 202052 : struct rcu_node *rnp = rcu_get_root(rsp);
1368 : :
1369 : : rcu_bind_gp_kthread();
1370 : 202052 : raw_spin_lock_irq(&rnp->lock);
1371 [ - + ]: 202052 : if (rsp->gp_flags == 0) {
1372 : : /* Spurious wakeup, tell caller to go back to sleep. */
1373 : : raw_spin_unlock_irq(&rnp->lock);
1374 : 0 : return 0;
1375 : : }
1376 : 202052 : rsp->gp_flags = 0; /* Clear all flags: New grace period. */
1377 : :
1378 [ - + ][ # # ]: 202052 : if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
[ # # ][ - + ]
1379 : : /*
1380 : : * Grace period already in progress, don't start another.
1381 : : * Not supposed to be able to happen.
1382 : : */
1383 : : raw_spin_unlock_irq(&rnp->lock);
1384 : 0 : return 0;
1385 : : }
1386 : :
1387 : : /* Advance to a new grace period and initialize state. */
1388 : : record_gp_stall_check_time(rsp);
1389 : 202052 : smp_wmb(); /* Record GP times before starting GP. */
1390 : 202052 : rsp->gpnum++;
1391 : : trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
1392 : : raw_spin_unlock_irq(&rnp->lock);
1393 : :
1394 : : /* Exclude any concurrent CPU-hotplug operations. */
1395 : 202052 : mutex_lock(&rsp->onoff_mutex);
1396 : :
1397 : : /*
1398 : : * Set the quiescent-state-needed bits in all the rcu_node
1399 : : * structures for all currently online CPUs in breadth-first order,
1400 : : * starting from the root rcu_node structure, relying on the layout
1401 : : * of the tree within the rsp->node[] array. Note that other CPUs
1402 : : * will access only the leaves of the hierarchy, thus seeing that no
1403 : : * grace period is in progress, at least until the corresponding
1404 : : * leaf node has been initialized. In addition, we have excluded
1405 : : * CPU-hotplug operations.
1406 : : *
1407 : : * The grace period cannot complete until the initialization
1408 : : * process finishes, because this kthread handles both.
1409 : : */
1410 [ + + ]: 606156 : rcu_for_each_node_breadth_first(rsp, rnp) {
1411 : 202052 : raw_spin_lock_irq(&rnp->lock);
1412 : 404104 : rdp = this_cpu_ptr(rsp->rda);
1413 : 202052 : rcu_preempt_check_blocked_tasks(rnp);
1414 : 202052 : rnp->qsmask = rnp->qsmaskinit;
1415 : 202052 : ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
1416 [ - + ][ # # ]: 202052 : WARN_ON_ONCE(rnp->completed != rsp->completed);
[ # # ]
1417 : 202052 : ACCESS_ONCE(rnp->completed) = rsp->completed;
1418 [ + - ]: 202052 : if (rnp == rdp->mynode)
1419 : 202052 : __note_gp_changes(rsp, rnp, rdp);
1420 : : rcu_preempt_boost_start_gp(rnp);
1421 : : trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1422 : : rnp->level, rnp->grplo,
1423 : : rnp->grphi, rnp->qsmask);
1424 : : raw_spin_unlock_irq(&rnp->lock);
1425 : : #ifdef CONFIG_PROVE_RCU_DELAY
1426 : : if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
1427 : : system_state == SYSTEM_RUNNING)
1428 : : udelay(200);
1429 : : #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1430 : 202052 : cond_resched();
1431 : : }
1432 : :
1433 : 202052 : mutex_unlock(&rsp->onoff_mutex);
1434 : 202052 : return 1;
1435 : : }
1436 : :
1437 : : /*
1438 : : * Do one round of quiescent-state forcing.
1439 : : */
1440 : 0 : static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1441 : : {
1442 : : int fqs_state = fqs_state_in;
1443 : 243099 : bool isidle = false;
1444 : : unsigned long maxj;
1445 : : struct rcu_node *rnp = rcu_get_root(rsp);
1446 : :
1447 : 243099 : rsp->n_force_qs++;
1448 [ + + ]: 243099 : if (fqs_state == RCU_SAVE_DYNTICK) {
1449 : : /* Collect dyntick-idle snapshots. */
1450 : : if (is_sysidle_rcu_state(rsp)) {
1451 : : isidle = 1;
1452 : : maxj = jiffies - ULONG_MAX / 4;
1453 : : }
1454 : 186281 : force_qs_rnp(rsp, dyntick_save_progress_counter,
1455 : : &isidle, &maxj);
1456 : : rcu_sysidle_report_gp(rsp, isidle, maxj);
1457 : : fqs_state = RCU_FORCE_QS;
1458 : : } else {
1459 : : /* Handle dyntick-idle and offline CPUs. */
1460 : : isidle = 0;
1461 : 56818 : force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1462 : : }
1463 : : /* Clear flag to prevent immediate re-entry. */
1464 [ - + ]: 243099 : if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1465 : 0 : raw_spin_lock_irq(&rnp->lock);
1466 : 0 : rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
1467 : : raw_spin_unlock_irq(&rnp->lock);
1468 : : }
1469 : 243099 : return fqs_state;
1470 : : }
1471 : :
1472 : : /*
1473 : : * Clean up after the old grace period.
1474 : : */
1475 : 0 : static void rcu_gp_cleanup(struct rcu_state *rsp)
1476 : : {
1477 : : unsigned long gp_duration;
1478 : : int nocb = 0;
1479 : : struct rcu_data *rdp;
1480 : : struct rcu_node *rnp = rcu_get_root(rsp);
1481 : :
1482 : 202052 : raw_spin_lock_irq(&rnp->lock);
1483 : 202052 : gp_duration = jiffies - rsp->gp_start;
1484 [ + + ]: 202052 : if (gp_duration > rsp->gp_max)
1485 : 2 : rsp->gp_max = gp_duration;
1486 : :
1487 : : /*
1488 : : * We know the grace period is complete, but to everyone else
1489 : : * it appears to still be ongoing. But it is also the case
1490 : : * that to everyone else it looks like there is nothing that
1491 : : * they can do to advance the grace period. It is therefore
1492 : : * safe for us to drop the lock in order to mark the grace
1493 : : * period as completed in all of the rcu_node structures.
1494 : : */
1495 : : raw_spin_unlock_irq(&rnp->lock);
1496 : :
1497 : : /*
1498 : : * Propagate new ->completed value to rcu_node structures so
1499 : : * that other CPUs don't have to wait until the start of the next
1500 : : * grace period to process their callbacks. This also avoids
1501 : : * some nasty RCU grace-period initialization races by forcing
1502 : : * the end of the current grace period to be completely recorded in
1503 : : * all of the rcu_node structures before the beginning of the next
1504 : : * grace period is recorded in any of the rcu_node structures.
1505 : : */
1506 [ + + ]: 404104 : rcu_for_each_node_breadth_first(rsp, rnp) {
1507 : 202052 : raw_spin_lock_irq(&rnp->lock);
1508 : 202052 : ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1509 : 404104 : rdp = this_cpu_ptr(rsp->rda);
1510 [ + - ]: 202052 : if (rnp == rdp->mynode)
1511 : 202052 : __note_gp_changes(rsp, rnp, rdp);
1512 : : nocb += rcu_future_gp_cleanup(rsp, rnp);
1513 : : raw_spin_unlock_irq(&rnp->lock);
1514 : 202052 : cond_resched();
1515 : : }
1516 : : rnp = rcu_get_root(rsp);
1517 : 202052 : raw_spin_lock_irq(&rnp->lock);
1518 : : rcu_nocb_gp_set(rnp, nocb);
1519 : :
1520 : 202052 : rsp->completed = rsp->gpnum; /* Declare grace period done. */
1521 : : trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1522 : 202052 : rsp->fqs_state = RCU_GP_IDLE;
1523 : 404104 : rdp = this_cpu_ptr(rsp->rda);
1524 : 202052 : rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
1525 [ + + ]: 202052 : if (cpu_needs_another_gp(rsp, rdp)) {
1526 : 106589 : rsp->gp_flags = RCU_GP_FLAG_INIT;
1527 : : trace_rcu_grace_period(rsp->name,
1528 : : ACCESS_ONCE(rsp->gpnum),
1529 : : TPS("newreq"));
1530 : : }
1531 : : raw_spin_unlock_irq(&rnp->lock);
1532 : 202052 : }
1533 : :
1534 : : /*
1535 : : * Body of kthread that handles grace periods.
1536 : : */
1537 : 0 : static int __noreturn rcu_gp_kthread(void *arg)
1538 : : {
1539 : : int fqs_state;
1540 : : int gf;
1541 : : unsigned long j;
1542 : : int ret;
1543 : : struct rcu_state *rsp = arg;
1544 : : struct rcu_node *rnp = rcu_get_root(rsp);
1545 : :
1546 : : for (;;) {
1547 : :
1548 : : /* Handle grace-period start. */
1549 : : for (;;) {
1550 : : trace_rcu_grace_period(rsp->name,
1551 : : ACCESS_ONCE(rsp->gpnum),
1552 : : TPS("reqwait"));
1553 [ + + ][ + + ]: 297514 : wait_event_interruptible(rsp->gp_wq,
[ + - ]
1554 : : ACCESS_ONCE(rsp->gp_flags) &
1555 : : RCU_GP_FLAG_INIT);
1556 [ - + ]: 202052 : if (rcu_gp_init(rsp))
1557 : : break;
1558 : 0 : cond_resched();
1559 : 0 : flush_signals(current);
1560 : : trace_rcu_grace_period(rsp->name,
1561 : : ACCESS_ONCE(rsp->gpnum),
1562 : : TPS("reqwaitsig"));
1563 : 202052 : }
1564 : :
1565 : : /* Handle quiescent-state forcing. */
1566 : : fqs_state = RCU_SAVE_DYNTICK;
1567 : 202052 : j = jiffies_till_first_fqs;
1568 [ - + ]: 202052 : if (j > HZ) {
1569 : : j = HZ;
1570 : 445151 : jiffies_till_first_fqs = HZ;
1571 : : }
1572 : : ret = 0;
1573 : : for (;;) {
1574 [ + - ]: 445151 : if (!ret)
1575 : 445151 : rsp->jiffies_force_qs = jiffies + j;
1576 : : trace_rcu_grace_period(rsp->name,
1577 : : ACCESS_ONCE(rsp->gpnum),
1578 : : TPS("fqswait"));
1579 [ + - ][ + + ]: 864293 : ret = wait_event_interruptible_timeout(rsp->gp_wq,
[ - + ][ + + ]
[ + - ][ + + ]
[ + + ][ + + ]
[ + - ]
1580 : : ((gf = ACCESS_ONCE(rsp->gp_flags)) &
1581 : : RCU_GP_FLAG_FQS) ||
1582 : : (!ACCESS_ONCE(rnp->qsmask) &&
1583 : : !rcu_preempt_blocked_readers_cgp(rnp)),
1584 : : j);
1585 : : /* If grace period done, leave loop. */
1586 [ + + ]: 445151 : if (!ACCESS_ONCE(rnp->qsmask) &&
1587 : : !rcu_preempt_blocked_readers_cgp(rnp))
1588 : : break;
1589 : : /* If time for quiescent-state forcing, do it. */
1590 [ - + ][ # # ]: 243099 : if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
1591 : 0 : (gf & RCU_GP_FLAG_FQS)) {
1592 : : trace_rcu_grace_period(rsp->name,
1593 : : ACCESS_ONCE(rsp->gpnum),
1594 : : TPS("fqsstart"));
1595 : 243099 : fqs_state = rcu_gp_fqs(rsp, fqs_state);
1596 : : trace_rcu_grace_period(rsp->name,
1597 : : ACCESS_ONCE(rsp->gpnum),
1598 : : TPS("fqsend"));
1599 : 243099 : cond_resched();
1600 : : } else {
1601 : : /* Deal with stray signal. */
1602 : 0 : cond_resched();
1603 : 0 : flush_signals(current);
1604 : : trace_rcu_grace_period(rsp->name,
1605 : : ACCESS_ONCE(rsp->gpnum),
1606 : : TPS("fqswaitsig"));
1607 : : }
1608 : 243099 : j = jiffies_till_next_fqs;
1609 [ - + ]: 243099 : if (j > HZ) {
1610 : : j = HZ;
1611 : 0 : jiffies_till_next_fqs = HZ;
1612 [ + - ]: 243099 : } else if (j < 1) {
1613 : : j = 1;
1614 : 0 : jiffies_till_next_fqs = 1;
1615 : : }
1616 : : }
1617 : :
1618 : : /* Handle grace-period end. */
1619 : 202052 : rcu_gp_cleanup(rsp);
1620 : 202052 : }
1621 : : }
1622 : :
1623 : 0 : static void rsp_wakeup(struct irq_work *work)
1624 : : {
1625 : : struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1626 : :
1627 : : /* Wake up rcu_gp_kthread() to start the grace period. */
1628 : 334162 : wake_up(&rsp->gp_wq);
1629 : 334162 : }
1630 : :
1631 : : /*
1632 : : * Start a new RCU grace period if warranted, re-initializing the hierarchy
1633 : : * in preparation for detecting the next grace period. The caller must hold
1634 : : * the root node's ->lock and hard irqs must be disabled.
1635 : : *
1636 : : * Note that it is legal for a dying CPU (which is marked as offline) to
1637 : : * invoke this function. This can happen when the dying CPU reports its
1638 : : * quiescent state.
1639 : : */
1640 : : static void
1641 : 336139 : rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1642 : : struct rcu_data *rdp)
1643 : : {
1644 [ + - ][ + + ]: 336139 : if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
1645 : : /*
1646 : : * Either we have not yet spawned the grace-period
1647 : : * task, this CPU does not need another grace period,
1648 : : * or a grace period is already in progress.
1649 : : * Either way, don't start a new grace period.
1650 : : */
1651 : 0 : return;
1652 : : }
1653 : 335619 : rsp->gp_flags = RCU_GP_FLAG_INIT;
1654 : : trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
1655 : : TPS("newreq"));
1656 : :
1657 : : /*
1658 : : * We can't do wakeups while holding the rnp->lock, as that
1659 : : * could cause possible deadlocks with the rq->lock. Defer
1660 : : * the wakeup to interrupt context. And don't bother waking
1661 : : * up the running kthread.
1662 : : */
1663 [ + + ]: 335619 : if (current != rsp->gp_kthread)
1664 : 335615 : irq_work_queue(&rsp->wakeup_work);
1665 : : }
1666 : :
1667 : : /*
1668 : : * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
1669 : : * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
1670 : : * is invoked indirectly from rcu_advance_cbs(), which would result in
1671 : : * endless recursion -- or would do so if it wasn't for the self-deadlock
1672 : : * that is encountered beforehand.
1673 : : */
1674 : : static void
1675 : 0 : rcu_start_gp(struct rcu_state *rsp)
1676 : : {
1677 : 671636 : struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1678 : 335818 : struct rcu_node *rnp = rcu_get_root(rsp);
1679 : :
1680 : : /*
1681 : : * If there is no grace period in progress right now, any
1682 : : * callbacks we have up to this point will be satisfied by the
1683 : : * next grace period. Also, advancing the callbacks reduces the
1684 : : * probability of false positives from cpu_needs_another_gp()
1685 : : * resulting in pointless grace periods. So, advance callbacks
1686 : : * then start the grace period!
1687 : : */
1688 : 335818 : rcu_advance_cbs(rsp, rnp, rdp);
1689 : 335818 : rcu_start_gp_advanced(rsp, rnp, rdp);
1690 : 335818 : }
1691 : :
1692 : : /*
1693 : : * Report a full set of quiescent states to the specified rcu_state
1694 : : * data structure. This involves cleaning up after the prior grace
1695 : : * period and letting rcu_start_gp() start up the next grace period
1696 : : * if one is needed. Note that the caller must hold rnp->lock, which
1697 : : * is released before return.
1698 : : */
1699 : 0 : static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1700 : : __releases(rcu_get_root(rsp)->lock)
1701 : : {
1702 [ - + ][ # # ]: 202052 : WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
[ # # ]
1703 : 202052 : raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1704 : 202052 : wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1705 : 202052 : }
1706 : :
1707 : : /*
1708 : : * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1709 : : * Allows quiescent states for a group of CPUs to be reported at one go
1710 : : * to the specified rcu_node structure, though all the CPUs in the group
1711 : : * must be represented by the same rcu_node structure (which need not be
1712 : : * a leaf rcu_node structure, though it often will be). That structure's
1713 : : * lock must be held upon entry, and it is released before return.
1714 : : */
1715 : : static void
1716 : 404105 : rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1717 : : struct rcu_node *rnp, unsigned long flags)
1718 : : __releases(rnp->lock)
1719 : : {
1720 : : struct rcu_node *rnp_c;
1721 : :
1722 : : /* Walk up the rcu_node hierarchy. */
1723 : : for (;;) {
1724 [ - + ]: 404105 : if (!(rnp->qsmask & mask)) {
1725 : :
1726 : : /* Our bit has already been cleared, so done. */
1727 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1728 : 0 : return;
1729 : : }
1730 : 404105 : rnp->qsmask &= ~mask;
1731 : : trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1732 : : mask, rnp->qsmask, rnp->level,
1733 : : rnp->grplo, rnp->grphi,
1734 : : !!rnp->gp_tasks);
1735 [ + + ]: 404105 : if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1736 : :
1737 : : /* Other bits still set at this level, so done. */
1738 : 202053 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1739 : 202053 : return;
1740 : : }
1741 : 202052 : mask = rnp->grpmask;
1742 [ - + ]: 202052 : if (rnp->parent == NULL) {
1743 : :
1744 : : /* No more levels. Exit loop holding root lock. */
1745 : :
1746 : : break;
1747 : : }
1748 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1749 : : rnp_c = rnp;
1750 : 0 : rnp = rnp->parent;
1751 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
1752 [ # # ][ # # ]: 0 : WARN_ON_ONCE(rnp_c->qsmask);
[ # # ]
1753 : : }
1754 : :
1755 : : /*
1756 : : * Get here if we are the last CPU to pass through a quiescent
1757 : : * state for this grace period. Invoke rcu_report_qs_rsp()
1758 : : * to clean up and start the next grace period if one is needed.
1759 : : */
1760 : 202052 : rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
1761 : : }
1762 : :
1763 : : /*
1764 : : * Record a quiescent state for the specified CPU to that CPU's rcu_data
1765 : : * structure. This must be either called from the specified CPU, or
1766 : : * called when the specified CPU is known to be offline (and when it is
1767 : : * also known that no other CPU is concurrently trying to help the offline
1768 : : * CPU). The lastcomp argument is used to make sure we are still in the
1769 : : * grace period of interest. We don't want to end the current grace period
1770 : : * based on quiescent states detected in an earlier grace period!
1771 : : */
1772 : : static void
1773 : 430339 : rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1774 : : {
1775 : : unsigned long flags;
1776 : : unsigned long mask;
1777 : : struct rcu_node *rnp;
1778 : :
1779 : 430339 : rnp = rdp->mynode;
1780 : 430339 : raw_spin_lock_irqsave(&rnp->lock, flags);
1781 [ + - ][ + + ]: 430399 : if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
[ + + ]
1782 : 430194 : rnp->completed == rnp->gpnum) {
1783 : :
1784 : : /*
1785 : : * The grace period in which this quiescent state was
1786 : : * recorded has ended, so don't report it upwards.
1787 : : * We will instead need a new quiescent state that lies
1788 : : * within the current grace period.
1789 : : */
1790 : 0 : rdp->passed_quiesce = 0; /* need qs for new gp. */
1791 : 51350 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1792 : 430399 : return;
1793 : : }
1794 : 379049 : mask = rdp->grpmask;
1795 [ + + ]: 379049 : if ((rnp->qsmask & mask) == 0) {
1796 : 336 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
1797 : : } else {
1798 : 378713 : rdp->qs_pending = 0;
1799 : :
1800 : : /*
1801 : : * This GP can't end until cpu checks in, so all of our
1802 : : * callbacks can be processed during the next GP.
1803 : : */
1804 : 378713 : rcu_accelerate_cbs(rsp, rnp, rdp);
1805 : :
1806 : 378713 : rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1807 : : }
1808 : : }
1809 : :
1810 : : /*
1811 : : * Check to see if there is a new grace period of which this CPU
1812 : : * is not yet aware, and if so, set up local rcu_data state for it.
1813 : : * Otherwise, see if this CPU has just passed through its first
1814 : : * quiescent state for this grace period, and record that fact if so.
1815 : : */
1816 : : static void
1817 : 0 : rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1818 : : {
1819 : : /* Check for grace-period ends and beginnings. */
1820 : 3797574 : note_gp_changes(rsp, rdp);
1821 : :
1822 : : /*
1823 : : * Does this CPU still need to do its part for current grace period?
1824 : : * If no, return and let the other CPUs do their part as well.
1825 : : */
1826 [ + + ]: 3798581 : if (!rdp->qs_pending)
1827 : : return;
1828 : :
1829 : : /*
1830 : : * Was there a quiescent state since the beginning of the grace
1831 : : * period? If no, then exit and wait for the next call.
1832 : : */
1833 [ + + ]: 772627 : if (!rdp->passed_quiesce)
1834 : : return;
1835 : :
1836 : : /*
1837 : : * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1838 : : * judge of that).
1839 : : */
1840 : 430339 : rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
1841 : : }
1842 : :
1843 : : #ifdef CONFIG_HOTPLUG_CPU
1844 : :
1845 : : /*
1846 : : * Send the specified CPU's RCU callbacks to the orphanage. The
1847 : : * specified CPU must be offline, and the caller must hold the
1848 : : * ->orphan_lock.
1849 : : */
1850 : : static void
1851 : 0 : rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1852 : : struct rcu_node *rnp, struct rcu_data *rdp)
1853 : : {
1854 : : /* No-CBs CPUs do not have orphanable callbacks. */
1855 : : if (rcu_is_nocb_cpu(rdp->cpu))
1856 : 0 : return;
1857 : :
1858 : : /*
1859 : : * Orphan the callbacks. First adjust the counts. This is safe
1860 : : * because _rcu_barrier() excludes CPU-hotplug operations, so it
1861 : : * cannot be running now. Thus no memory barrier is required.
1862 : : */
1863 [ # # ]: 0 : if (rdp->nxtlist != NULL) {
1864 : 0 : rsp->qlen_lazy += rdp->qlen_lazy;
1865 : 0 : rsp->qlen += rdp->qlen;
1866 : 0 : rdp->n_cbs_orphaned += rdp->qlen;
1867 : 0 : rdp->qlen_lazy = 0;
1868 : 0 : ACCESS_ONCE(rdp->qlen) = 0;
1869 : : }
1870 : :
1871 : : /*
1872 : : * Next, move those callbacks still needing a grace period to
1873 : : * the orphanage, where some other CPU will pick them up.
1874 : : * Some of the callbacks might have gone partway through a grace
1875 : : * period, but that is too bad. They get to start over because we
1876 : : * cannot assume that grace periods are synchronized across CPUs.
1877 : : * We don't bother updating the ->nxttail[] array yet, instead
1878 : : * we just reset the whole thing later on.
1879 : : */
1880 [ # # ]: 0 : if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
1881 : 0 : *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
1882 : 0 : rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
1883 : 0 : *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1884 : : }
1885 : :
1886 : : /*
1887 : : * Then move the ready-to-invoke callbacks to the orphanage,
1888 : : * where some other CPU will pick them up. These will not be
1889 : : * required to pass though another grace period: They are done.
1890 : : */
1891 [ # # ]: 0 : if (rdp->nxtlist != NULL) {
1892 : 0 : *rsp->orphan_donetail = rdp->nxtlist;
1893 : 0 : rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
1894 : : }
1895 : :
1896 : : /* Finally, initialize the rcu_data structure's list to empty. */
1897 : : init_callback_list(rdp);
1898 : : }
1899 : :
1900 : : /*
1901 : : * Adopt the RCU callbacks from the specified rcu_state structure's
1902 : : * orphanage. The caller must hold the ->orphan_lock.
1903 : : */
1904 : 0 : static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1905 : : {
1906 : : int i;
1907 : 0 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1908 : :
1909 : : /* No-CBs CPUs are handled specially. */
1910 : : if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
1911 : 0 : return;
1912 : :
1913 : : /* Do the accounting first. */
1914 : 0 : rdp->qlen_lazy += rsp->qlen_lazy;
1915 : 0 : rdp->qlen += rsp->qlen;
1916 : 0 : rdp->n_cbs_adopted += rsp->qlen;
1917 : : if (rsp->qlen_lazy != rsp->qlen)
1918 : : rcu_idle_count_callbacks_posted();
1919 : 0 : rsp->qlen_lazy = 0;
1920 : 0 : rsp->qlen = 0;
1921 : :
1922 : : /*
1923 : : * We do not need a memory barrier here because the only way we
1924 : : * can get here if there is an rcu_barrier() in flight is if
1925 : : * we are the task doing the rcu_barrier().
1926 : : */
1927 : :
1928 : : /* First adopt the ready-to-invoke callbacks. */
1929 [ # # ]: 0 : if (rsp->orphan_donelist != NULL) {
1930 : 0 : *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
1931 : 0 : *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
1932 [ # # ]: 0 : for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
1933 [ # # ]: 0 : if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1934 : 0 : rdp->nxttail[i] = rsp->orphan_donetail;
1935 : 0 : rsp->orphan_donelist = NULL;
1936 : 0 : rsp->orphan_donetail = &rsp->orphan_donelist;
1937 : : }
1938 : :
1939 : : /* And then adopt the callbacks that still need a grace period. */
1940 [ # # ]: 0 : if (rsp->orphan_nxtlist != NULL) {
1941 : 0 : *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
1942 : 0 : rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
1943 : 0 : rsp->orphan_nxtlist = NULL;
1944 : 0 : rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1945 : : }
1946 : : }
1947 : :
1948 : : /*
1949 : : * Trace the fact that this CPU is going offline.
1950 : : */
1951 : : static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1952 : : {
1953 : : RCU_TRACE(unsigned long mask);
1954 : : RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
1955 : : RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
1956 : :
1957 : : RCU_TRACE(mask = rdp->grpmask);
1958 : : trace_rcu_grace_period(rsp->name,
1959 : : rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1960 : : TPS("cpuofl"));
1961 : : }
1962 : :
1963 : : /*
1964 : : * The CPU has been completely removed, and some other CPU is reporting
1965 : : * this fact from process context. Do the remainder of the cleanup,
1966 : : * including orphaning the outgoing CPU's RCU callbacks, and also
1967 : : * adopting them. There can only be one CPU hotplug operation at a time,
1968 : : * so no other CPU can be attempting to update rcu_cpu_kthread_task.
1969 : : */
1970 : 0 : static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1971 : : {
1972 : : unsigned long flags;
1973 : : unsigned long mask;
1974 : : int need_report = 0;
1975 : 0 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1976 : 0 : struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
1977 : :
1978 : : /* Adjust any no-longer-needed kthreads. */
1979 : : rcu_boost_kthread_setaffinity(rnp, -1);
1980 : :
1981 : : /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1982 : :
1983 : : /* Exclude any attempts to start a new grace period. */
1984 : 0 : mutex_lock(&rsp->onoff_mutex);
1985 : 0 : raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
1986 : :
1987 : : /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
1988 : 0 : rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
1989 : 0 : rcu_adopt_orphan_cbs(rsp);
1990 : :
1991 : : /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1992 : 0 : mask = rdp->grpmask; /* rnp->grplo is constant. */
1993 : : do {
1994 : 0 : raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1995 : 0 : rnp->qsmaskinit &= ~mask;
1996 [ # # ]: 0 : if (rnp->qsmaskinit != 0) {
1997 [ # # ]: 0 : if (rnp != rdp->mynode)
1998 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1999 : : break;
2000 : : }
2001 [ # # ]: 0 : if (rnp == rdp->mynode)
2002 : : need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2003 : : else
2004 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2005 : 0 : mask = rnp->grpmask;
2006 : 0 : rnp = rnp->parent;
2007 [ # # ]: 0 : } while (rnp != NULL);
2008 : :
2009 : : /*
2010 : : * We still hold the leaf rcu_node structure lock here, and
2011 : : * irqs are still disabled. The reason for this subterfuge is
2012 : : * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2013 : : * held leads to deadlock.
2014 : : */
2015 : : raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2016 : 0 : rnp = rdp->mynode;
2017 : : if (need_report & RCU_OFL_TASKS_NORM_GP)
2018 : : rcu_report_unblock_qs_rnp(rnp, flags);
2019 : : else
2020 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2021 : : if (need_report & RCU_OFL_TASKS_EXP_GP)
2022 : : rcu_report_exp_rnp(rsp, rnp, true);
2023 [ # # ][ # # ]: 0 : WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
[ # # ][ # # ]
[ # # ]
2024 : : "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2025 : : cpu, rdp->qlen, rdp->nxtlist);
2026 : : init_callback_list(rdp);
2027 : : /* Disallow further callbacks on this CPU. */
2028 : 0 : rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2029 : 0 : mutex_unlock(&rsp->onoff_mutex);
2030 : 0 : }
2031 : :
2032 : : #else /* #ifdef CONFIG_HOTPLUG_CPU */
2033 : :
2034 : : static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2035 : : {
2036 : : }
2037 : :
2038 : : static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2039 : : {
2040 : : }
2041 : :
2042 : : #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
2043 : :
2044 : : /*
2045 : : * Invoke any RCU callbacks that have made it to the end of their grace
2046 : : * period. Thottle as specified by rdp->blimit.
2047 : : */
2048 : 0 : static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2049 : : {
2050 : : unsigned long flags;
2051 : : struct rcu_head *next, *list, **tail;
2052 : : long bl, count, count_lazy;
2053 : : int i;
2054 : :
2055 : : /* If no callbacks are ready, just return. */
2056 [ + ]: 1453532 : if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
2057 : : trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
2058 : : trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
2059 : : need_resched(), is_idle_task(current),
2060 : : rcu_is_callbacks_kthread());
2061 : 1453477 : return;
2062 : : }
2063 : :
2064 : : /*
2065 : : * Extract the list of ready callbacks, disabling to prevent
2066 : : * races with call_rcu() from interrupt handlers.
2067 : : */
2068 : : local_irq_save(flags);
2069 [ - + ][ # # ]: 1453567 : WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
[ - + ]
2070 : 1453580 : bl = rdp->blimit;
2071 : : trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
2072 : 1453580 : list = rdp->nxtlist;
2073 : 1453580 : rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
2074 : 1453580 : *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2075 : 1453580 : tail = rdp->nxttail[RCU_DONE_TAIL];
2076 [ + + ]: 7267346 : for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
2077 [ + + ]: 5813766 : if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
2078 : 1751532 : rdp->nxttail[i] = &rdp->nxtlist;
2079 [ - + ]: 1453580 : local_irq_restore(flags);
2080 : :
2081 : : /* Invoke callbacks. */
2082 : : count = count_lazy = 0;
2083 [ + + ]: 16149809 : while (list) {
2084 : 15893881 : next = list->next;
2085 : : prefetch(next);
2086 : : debug_rcu_head_unqueue(list);
2087 [ + + ]: 15909122 : if (__rcu_reclaim(rsp->name, list))
2088 : 697586 : count_lazy++;
2089 : : list = next;
2090 : : /* Stop only if limit reached and CPU has something to do. */
2091 [ + + ][ + + ]: 15909122 : if (++count >= bl &&
2092 [ + + ]: 15826225 : (need_resched() ||
2093 : 1872436 : (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2094 : : break;
2095 : : }
2096 : :
2097 : : local_irq_save(flags);
2098 : : trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
2099 : : is_idle_task(current),
2100 : : rcu_is_callbacks_kthread());
2101 : :
2102 : : /* Update count, and requeue any remaining callbacks. */
2103 [ + + ]: 1453361 : if (list != NULL) {
2104 : 1188446 : *tail = rdp->nxtlist;
2105 : 1188446 : rdp->nxtlist = list;
2106 [ + + ]: 2552793 : for (i = 0; i < RCU_NEXT_SIZE; i++)
2107 [ + + ]: 2497271 : if (&rdp->nxtlist == rdp->nxttail[i])
2108 : 1364347 : rdp->nxttail[i] = tail;
2109 : : else
2110 : : break;
2111 : : }
2112 : 1453361 : smp_mb(); /* List handling before counting for rcu_barrier(). */
2113 : 1453169 : rdp->qlen_lazy -= count_lazy;
2114 : 1453169 : ACCESS_ONCE(rdp->qlen) -= count;
2115 : 1453169 : rdp->n_cbs_invoked += count;
2116 : :
2117 : : /* Reinstate batch limit if we have worked down the excess. */
2118 [ + + ][ + + ]: 1453169 : if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
2119 : 26 : rdp->blimit = blimit;
2120 : :
2121 : : /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
2122 [ + + ][ + + ]: 1453169 : if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
2123 : 26 : rdp->qlen_last_fqs_check = 0;
2124 : 26 : rdp->n_force_qs_snap = rsp->n_force_qs;
2125 [ + + ]: 1453143 : } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
2126 : 26 : rdp->qlen_last_fqs_check = rdp->qlen;
2127 [ - + ][ # # ]: 1453169 : WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
[ - ]
2128 : :
2129 [ - + ]: 1453139 : local_irq_restore(flags);
2130 : :
2131 : : /* Re-invoke RCU core processing if there are callbacks remaining. */
2132 [ + + ]: 1453473 : if (cpu_has_callbacks_ready_to_invoke(rdp))
2133 : 1188508 : invoke_rcu_core();
2134 : : }
2135 : :
2136 : : /*
2137 : : * Check to see if this CPU is in a non-context-switch quiescent state
2138 : : * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
2139 : : * Also schedule RCU core processing.
2140 : : *
2141 : : * This function must be called from hardirq context. It is normally
2142 : : * invoked from the scheduling-clock interrupt. If rcu_pending returns
2143 : : * false, there is no point in invoking rcu_check_callbacks().
2144 : : */
2145 : 0 : void rcu_check_callbacks(int cpu, int user)
2146 : : {
2147 : 7849242 : trace_rcu_utilization(TPS("Start scheduler-tick"));
2148 : : increment_cpu_stall_ticks();
2149 [ + + ][ + + ]: 7852563 : if (user || rcu_is_cpu_rrupt_from_idle()) {
2150 : :
2151 : : /*
2152 : : * Get here if this CPU took its interrupt from user
2153 : : * mode or from the idle loop, and if this is not a
2154 : : * nested interrupt. In this case, the CPU is in
2155 : : * a quiescent state, so note it.
2156 : : *
2157 : : * No memory barrier is required here because both
2158 : : * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
2159 : : * variables that other CPUs neither access nor modify,
2160 : : * at least not while the corresponding CPU is online.
2161 : : */
2162 : :
2163 : : rcu_sched_qs(cpu);
2164 : : rcu_bh_qs(cpu);
2165 : :
2166 [ + + ]: 415840 : } else if (!in_softirq()) {
2167 : :
2168 : : /*
2169 : : * Get here if this CPU did not take its interrupt from
2170 : : * softirq, in other words, if it is not interrupting
2171 : : * a rcu_bh read-side critical section. This is an _bh
2172 : : * critical section, so note it.
2173 : : */
2174 : :
2175 : : rcu_bh_qs(cpu);
2176 : : }
2177 : : rcu_preempt_check_callbacks(cpu);
2178 [ + + ]: 7852563 : if (rcu_pending(cpu))
2179 : 710971 : invoke_rcu_core();
2180 : 7853338 : trace_rcu_utilization(TPS("End scheduler-tick"));
2181 : 7853338 : }
2182 : :
2183 : : /*
2184 : : * Scan the leaf rcu_node structures, processing dyntick state for any that
2185 : : * have not yet encountered a quiescent state, using the function specified.
2186 : : * Also initiate boosting for any threads blocked on the root rcu_node.
2187 : : *
2188 : : * The caller must have suppressed start of new grace periods.
2189 : : */
2190 : 0 : static void force_qs_rnp(struct rcu_state *rsp,
2191 : : int (*f)(struct rcu_data *rsp, bool *isidle,
2192 : : unsigned long *maxj),
2193 : : bool *isidle, unsigned long *maxj)
2194 : : {
2195 : : unsigned long bit;
2196 : : int cpu;
2197 : : unsigned long flags;
2198 : : unsigned long mask;
2199 : : struct rcu_node *rnp;
2200 : :
2201 [ + + ]: 486198 : rcu_for_each_leaf_node(rsp, rnp) {
2202 : 243099 : cond_resched();
2203 : : mask = 0;
2204 : 243099 : raw_spin_lock_irqsave(&rnp->lock, flags);
2205 [ - + ]: 486198 : if (!rcu_gp_in_progress(rsp)) {
2206 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2207 : 0 : return;
2208 : : }
2209 [ + + ]: 486198 : if (rnp->qsmask == 0) {
2210 : : rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
2211 : 519 : continue;
2212 : : }
2213 : 242580 : cpu = rnp->grplo;
2214 : : bit = 1;
2215 [ + + ]: 1455480 : for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2216 [ + + ]: 1212900 : if ((rnp->qsmask & bit) != 0) {
2217 [ + - ]: 242802 : if ((rnp->qsmaskinit & bit) != 0)
2218 : 242802 : *isidle = 0;
2219 [ + + ]: 242802 : if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2220 : 25392 : mask |= bit;
2221 : : }
2222 : : }
2223 [ + + ]: 242580 : if (mask != 0) {
2224 : :
2225 : : /* rcu_report_qs_rnp() releases rnp->lock. */
2226 : 25392 : rcu_report_qs_rnp(mask, rsp, rnp, flags);
2227 : 25392 : continue;
2228 : : }
2229 : 217188 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
2230 : : }
2231 : : rnp = rcu_get_root(rsp);
2232 [ + + ]: 243099 : if (rnp->qsmask == 0) {
2233 : 25911 : raw_spin_lock_irqsave(&rnp->lock, flags);
2234 : : rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2235 : : }
2236 : : }
2237 : :
2238 : : /*
2239 : : * Force quiescent states on reluctant CPUs, and also detect which
2240 : : * CPUs are in dyntick-idle mode.
2241 : : */
2242 : 0 : static void force_quiescent_state(struct rcu_state *rsp)
2243 : : {
2244 : : unsigned long flags;
2245 : : bool ret;
2246 : : struct rcu_node *rnp;
2247 : : struct rcu_node *rnp_old = NULL;
2248 : :
2249 : : /* Funnel through hierarchy to reduce memory contention. */
2250 : 0 : rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
2251 [ # # ]: 0 : for (; rnp != NULL; rnp = rnp->parent) {
2252 [ # # # # ]: 0 : ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
2253 : 0 : !raw_spin_trylock(&rnp->fqslock);
2254 [ # # ]: 0 : if (rnp_old != NULL)
2255 : : raw_spin_unlock(&rnp_old->fqslock);
2256 [ # # ]: 0 : if (ret) {
2257 : 0 : rsp->n_force_qs_lh++;
2258 : 0 : return;
2259 : : }
2260 : : rnp_old = rnp;
2261 : : }
2262 : : /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
2263 : :
2264 : : /* Reached the root of the rcu_node tree, acquire lock. */
2265 : 0 : raw_spin_lock_irqsave(&rnp_old->lock, flags);
2266 : : raw_spin_unlock(&rnp_old->fqslock);
2267 [ # # ]: 0 : if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
2268 : 0 : rsp->n_force_qs_lh++;
2269 : 0 : raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2270 : 0 : return; /* Someone beat us to it. */
2271 : : }
2272 : 0 : rsp->gp_flags |= RCU_GP_FLAG_FQS;
2273 : 0 : raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2274 : 0 : wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
2275 : : }
2276 : :
2277 : : /*
2278 : : * This does the RCU core processing work for the specified rcu_state
2279 : : * and rcu_data structures. This may be called only from the CPU to
2280 : : * whom the rdp belongs.
2281 : : */
2282 : : static void
2283 : 0 : __rcu_process_callbacks(struct rcu_state *rsp)
2284 : : {
2285 : : unsigned long flags;
2286 : 7596202 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2287 : :
2288 [ - + ][ # # ]: 3798101 : WARN_ON_ONCE(rdp->beenonline == 0);
[ # # ]
2289 : :
2290 : : /* Update RCU state based on any recent quiescent states. */
2291 : 3798101 : rcu_check_quiescent_state(rsp, rdp);
2292 : :
2293 : : /* Does this CPU require a not-yet-started grace period? */
2294 : : local_irq_save(flags);
2295 [ + + ]: 3799471 : if (cpu_needs_another_gp(rsp, rdp)) {
2296 : 335276 : raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2297 : 335379 : rcu_start_gp(rsp);
2298 : 335379 : raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2299 : : } else {
2300 [ - + ]: 3463506 : local_irq_restore(flags);
2301 : : }
2302 : :
2303 : : /* If there are callbacks ready, invoke them. */
2304 [ + + ]: 3799402 : if (cpu_has_callbacks_ready_to_invoke(rdp))
2305 : 1453526 : invoke_rcu_callbacks(rsp, rdp);
2306 : 3799229 : }
2307 : :
2308 : : /*
2309 : : * Do RCU core processing for the current CPU.
2310 : : */
2311 : 0 : static void rcu_process_callbacks(struct softirq_action *unused)
2312 : : {
2313 : : struct rcu_state *rsp;
2314 : :
2315 [ + + ]: 1899362 : if (cpu_is_offline(smp_processor_id()))
2316 : 673 : return;
2317 : 1898865 : trace_rcu_utilization(TPS("Start RCU core"));
2318 [ + + ]: 5696962 : for_each_rcu_flavor(rsp)
2319 : 3797424 : __rcu_process_callbacks(rsp);
2320 : 1899538 : trace_rcu_utilization(TPS("End RCU core"));
2321 : : }
2322 : :
2323 : : /*
2324 : : * Schedule RCU callback invocation. If the specified type of RCU
2325 : : * does not support RCU priority boosting, just do a direct call,
2326 : : * otherwise wake up the per-CPU kernel kthread. Note that because we
2327 : : * are running on the current CPU with interrupts disabled, the
2328 : : * rcu_cpu_kthread_task cannot disappear out from under us.
2329 : : */
2330 : 0 : static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2331 : : {
2332 [ + ]: 1453526 : if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
2333 : : return;
2334 [ + - ]: 1453533 : if (likely(!rsp->boost)) {
2335 : 1453533 : rcu_do_batch(rsp, rdp);
2336 : 1453337 : return;
2337 : : }
2338 : 0 : invoke_rcu_callbacks_kthread();
2339 : : }
2340 : :
2341 : 0 : static void invoke_rcu_core(void)
2342 : : {
2343 [ + ]: 1899767 : if (cpu_online(smp_processor_id()))
2344 : 1899772 : raise_softirq(RCU_SOFTIRQ);
2345 : 186 : }
2346 : :
2347 : : /*
2348 : : * Handle any core-RCU processing required by a call_rcu() invocation.
2349 : : */
2350 : 0 : static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2351 : : struct rcu_head *head, unsigned long flags)
2352 : : {
2353 : : /*
2354 : : * If called from an extended quiescent state, invoke the RCU
2355 : : * core in order to force a re-evaluation of RCU's idleness.
2356 : : */
2357 [ - + ][ # # ]: 15965671 : if (!rcu_is_watching() && cpu_online(smp_processor_id()))
2358 : 0 : invoke_rcu_core();
2359 : :
2360 : : /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
2361 [ + + ][ + + ]: 15965543 : if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2362 : 0 : return;
2363 : :
2364 : : /*
2365 : : * Force the grace period if too many callbacks or too long waiting.
2366 : : * Enforce hysteresis, and don't invoke force_quiescent_state()
2367 : : * if some other CPU has recently done so. Also, don't bother
2368 : : * invoking force_quiescent_state() if the newly enqueued callback
2369 : : * is the only one waiting for a grace period to complete.
2370 : : */
2371 [ + + ]: 14701213 : if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2372 : :
2373 : : /* Are we ignoring a completed grace period? */
2374 : 593 : note_gp_changes(rsp, rdp);
2375 : :
2376 : : /* Start a new grace period if one not already started. */
2377 [ + + ]: 593 : if (!rcu_gp_in_progress(rsp)) {
2378 : : struct rcu_node *rnp_root = rcu_get_root(rsp);
2379 : :
2380 : 439 : raw_spin_lock(&rnp_root->lock);
2381 : 439 : rcu_start_gp(rsp);
2382 : : raw_spin_unlock(&rnp_root->lock);
2383 : : } else {
2384 : : /* Give the grace period a kick. */
2385 : 154 : rdp->blimit = LONG_MAX;
2386 [ - + ][ # # ]: 154 : if (rsp->n_force_qs == rdp->n_force_qs_snap &&
2387 : 0 : *rdp->nxttail[RCU_DONE_TAIL] != head)
2388 : 0 : force_quiescent_state(rsp);
2389 : 154 : rdp->n_force_qs_snap = rsp->n_force_qs;
2390 : 154 : rdp->qlen_last_fqs_check = rdp->qlen;
2391 : : }
2392 : : }
2393 : : }
2394 : :
2395 : : /*
2396 : : * RCU callback function to leak a callback.
2397 : : */
2398 : : static void rcu_leak_callback(struct rcu_head *rhp)
2399 : : {
2400 : : }
2401 : :
2402 : : /*
2403 : : * Helper function for call_rcu() and friends. The cpu argument will
2404 : : * normally be -1, indicating "currently running CPU". It may specify
2405 : : * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
2406 : : * is expected to specify a CPU.
2407 : : */
2408 : : static void
2409 : 0 : __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2410 : : struct rcu_state *rsp, int cpu, bool lazy)
2411 : : {
2412 : : unsigned long flags;
2413 : : struct rcu_data *rdp;
2414 : :
2415 [ - + ][ # # ]: 15965133 : WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
[ # # ]
2416 : : if (debug_rcu_head_queue(head)) {
2417 : : /* Probable double call_rcu(), so leak the callback. */
2418 : : ACCESS_ONCE(head->func) = rcu_leak_callback;
2419 : : WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
2420 : : return;
2421 : : }
2422 : 15965133 : head->func = func;
2423 : 15965133 : head->next = NULL;
2424 : :
2425 : : /*
2426 : : * Opportunistically note grace-period endings and beginnings.
2427 : : * Note that we might see a beginning right after we see an
2428 : : * end, but never vice versa, since this CPU has to pass through
2429 : : * a quiescent state betweentimes.
2430 : : */
2431 : : local_irq_save(flags);
2432 : 31930120 : rdp = this_cpu_ptr(rsp->rda);
2433 : :
2434 : : /* Add the callback to our list. */
2435 [ + + ][ + + ]: 15965060 : if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
2436 : : int offline;
2437 : :
2438 [ - + ]: 137 : if (cpu != -1)
2439 : 0 : rdp = per_cpu_ptr(rsp->rda, cpu);
2440 : : offline = !__call_rcu_nocb(rdp, head, lazy);
2441 [ - ][ # # ]: 137 : WARN_ON_ONCE(offline);
2442 : : /* _call_rcu() is illegal on offline CPU; leak the callback. */
2443 [ # # ]: 0 : local_irq_restore(flags);
2444 : : return;
2445 : : }
2446 : 15964923 : ACCESS_ONCE(rdp->qlen)++;
2447 [ + + ]: 15964923 : if (lazy)
2448 : 698327 : rdp->qlen_lazy++;
2449 : : else
2450 : : rcu_idle_count_callbacks_posted();
2451 : 15964923 : smp_mb(); /* Count before adding callback for rcu_barrier(). */
2452 : 15965678 : *rdp->nxttail[RCU_NEXT_TAIL] = head;
2453 : 15965678 : rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
2454 : :
2455 : : if (__is_kfree_rcu_offset((unsigned long)func))
2456 : : trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
2457 : : rdp->qlen_lazy, rdp->qlen);
2458 : : else
2459 : : trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
2460 : :
2461 : : /* Go handle any RCU core processing required. */
2462 : 15965678 : __call_rcu_core(rsp, rdp, head, flags);
2463 [ + + ]: 15965565 : local_irq_restore(flags);
2464 : : }
2465 : :
2466 : : /*
2467 : : * Queue an RCU-sched callback for invocation after a grace period.
2468 : : */
2469 : 0 : void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2470 : : {
2471 : 15267213 : __call_rcu(head, func, &rcu_sched_state, -1, 0);
2472 : 15267421 : }
2473 : : EXPORT_SYMBOL_GPL(call_rcu_sched);
2474 : :
2475 : : /*
2476 : : * Queue an RCU callback for invocation after a quicker grace period.
2477 : : */
2478 : 0 : void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2479 : : {
2480 : 0 : __call_rcu(head, func, &rcu_bh_state, -1, 0);
2481 : 0 : }
2482 : : EXPORT_SYMBOL_GPL(call_rcu_bh);
2483 : :
2484 : : /*
2485 : : * Because a context switch is a grace period for RCU-sched and RCU-bh,
2486 : : * any blocking grace-period wait automatically implies a grace period
2487 : : * if there is only one CPU online at any point time during execution
2488 : : * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
2489 : : * occasionally incorrectly indicate that there are multiple CPUs online
2490 : : * when there was in fact only one the whole time, as this just adds
2491 : : * some overhead: RCU still operates correctly.
2492 : : */
2493 : : static inline int rcu_blocking_is_gp(void)
2494 : : {
2495 : : int ret;
2496 : :
2497 : : might_sleep(); /* Check for RCU read-side critical section. */
2498 : 865 : preempt_disable();
2499 : 865 : ret = num_online_cpus() <= 1;
2500 : 865 : preempt_enable();
2501 : : return ret;
2502 : : }
2503 : :
2504 : : /**
2505 : : * synchronize_sched - wait until an rcu-sched grace period has elapsed.
2506 : : *
2507 : : * Control will return to the caller some time after a full rcu-sched
2508 : : * grace period has elapsed, in other words after all currently executing
2509 : : * rcu-sched read-side critical sections have completed. These read-side
2510 : : * critical sections are delimited by rcu_read_lock_sched() and
2511 : : * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
2512 : : * local_irq_disable(), and so on may be used in place of
2513 : : * rcu_read_lock_sched().
2514 : : *
2515 : : * This means that all preempt_disable code sequences, including NMI and
2516 : : * non-threaded hardware-interrupt handlers, in progress on entry will
2517 : : * have completed before this primitive returns. However, this does not
2518 : : * guarantee that softirq handlers will have completed, since in some
2519 : : * kernels, these handlers can run in process context, and can block.
2520 : : *
2521 : : * Note that this guarantee implies further memory-ordering guarantees.
2522 : : * On systems with more than one CPU, when synchronize_sched() returns,
2523 : : * each CPU is guaranteed to have executed a full memory barrier since the
2524 : : * end of its last RCU-sched read-side critical section whose beginning
2525 : : * preceded the call to synchronize_sched(). In addition, each CPU having
2526 : : * an RCU read-side critical section that extends beyond the return from
2527 : : * synchronize_sched() is guaranteed to have executed a full memory barrier
2528 : : * after the beginning of synchronize_sched() and before the beginning of
2529 : : * that RCU read-side critical section. Note that these guarantees include
2530 : : * CPUs that are offline, idle, or executing in user mode, as well as CPUs
2531 : : * that are executing in the kernel.
2532 : : *
2533 : : * Furthermore, if CPU A invoked synchronize_sched(), which returned
2534 : : * to its caller on CPU B, then both CPU A and CPU B are guaranteed
2535 : : * to have executed a full memory barrier during the execution of
2536 : : * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
2537 : : * again only if the system has more than one CPU).
2538 : : *
2539 : : * This primitive provides the guarantees made by the (now removed)
2540 : : * synchronize_kernel() API. In contrast, synchronize_rcu() only
2541 : : * guarantees that rcu_read_lock() sections will have completed.
2542 : : * In "classic RCU", these two guarantees happen to be one and
2543 : : * the same, but can differ in realtime RCU implementations.
2544 : : */
2545 : 0 : void synchronize_sched(void)
2546 : : {
2547 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2548 : : !lock_is_held(&rcu_lock_map) &&
2549 : : !lock_is_held(&rcu_sched_lock_map),
2550 : : "Illegal synchronize_sched() in RCU-sched read-side critical section");
2551 [ + - ]: 865 : if (rcu_blocking_is_gp())
2552 : 865 : return;
2553 [ - + ]: 865 : if (rcu_expedited)
2554 : 0 : synchronize_sched_expedited();
2555 : : else
2556 : 865 : wait_rcu_gp(call_rcu_sched);
2557 : : }
2558 : : EXPORT_SYMBOL_GPL(synchronize_sched);
2559 : :
2560 : : /**
2561 : : * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
2562 : : *
2563 : : * Control will return to the caller some time after a full rcu_bh grace
2564 : : * period has elapsed, in other words after all currently executing rcu_bh
2565 : : * read-side critical sections have completed. RCU read-side critical
2566 : : * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
2567 : : * and may be nested.
2568 : : *
2569 : : * See the description of synchronize_sched() for more detailed information
2570 : : * on memory ordering guarantees.
2571 : : */
2572 : 0 : void synchronize_rcu_bh(void)
2573 : : {
2574 : : rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2575 : : !lock_is_held(&rcu_lock_map) &&
2576 : : !lock_is_held(&rcu_sched_lock_map),
2577 : : "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
2578 [ # # ]: 0 : if (rcu_blocking_is_gp())
2579 : 0 : return;
2580 [ # # ]: 0 : if (rcu_expedited)
2581 : : synchronize_rcu_bh_expedited();
2582 : : else
2583 : 0 : wait_rcu_gp(call_rcu_bh);
2584 : : }
2585 : : EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2586 : :
2587 : 0 : static int synchronize_sched_expedited_cpu_stop(void *data)
2588 : : {
2589 : : /*
2590 : : * There must be a full memory barrier on each affected CPU
2591 : : * between the time that try_stop_cpus() is called and the
2592 : : * time that it returns.
2593 : : *
2594 : : * In the current initial implementation of cpu_stop, the
2595 : : * above condition is already met when the control reaches
2596 : : * this point and the following smp_mb() is not strictly
2597 : : * necessary. Do smp_mb() anyway for documentation and
2598 : : * robustness against future implementation changes.
2599 : : */
2600 : 0 : smp_mb(); /* See above comment block. */
2601 : 0 : return 0;
2602 : : }
2603 : :
2604 : : /**
2605 : : * synchronize_sched_expedited - Brute-force RCU-sched grace period
2606 : : *
2607 : : * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
2608 : : * approach to force the grace period to end quickly. This consumes
2609 : : * significant time on all CPUs and is unfriendly to real-time workloads,
2610 : : * so is thus not recommended for any sort of common-case code. In fact,
2611 : : * if you are using synchronize_sched_expedited() in a loop, please
2612 : : * restructure your code to batch your updates, and then use a single
2613 : : * synchronize_sched() instead.
2614 : : *
2615 : : * Note that it is illegal to call this function while holding any lock
2616 : : * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
2617 : : * to call this function from a CPU-hotplug notifier. Failing to observe
2618 : : * these restriction will result in deadlock.
2619 : : *
2620 : : * This implementation can be thought of as an application of ticket
2621 : : * locking to RCU, with sync_sched_expedited_started and
2622 : : * sync_sched_expedited_done taking on the roles of the halves
2623 : : * of the ticket-lock word. Each task atomically increments
2624 : : * sync_sched_expedited_started upon entry, snapshotting the old value,
2625 : : * then attempts to stop all the CPUs. If this succeeds, then each
2626 : : * CPU will have executed a context switch, resulting in an RCU-sched
2627 : : * grace period. We are then done, so we use atomic_cmpxchg() to
2628 : : * update sync_sched_expedited_done to match our snapshot -- but
2629 : : * only if someone else has not already advanced past our snapshot.
2630 : : *
2631 : : * On the other hand, if try_stop_cpus() fails, we check the value
2632 : : * of sync_sched_expedited_done. If it has advanced past our
2633 : : * initial snapshot, then someone else must have forced a grace period
2634 : : * some time after we took our snapshot. In this case, our work is
2635 : : * done for us, and we can simply return. Otherwise, we try again,
2636 : : * but keep our initial snapshot for purposes of checking for someone
2637 : : * doing our work for us.
2638 : : *
2639 : : * If we fail too many times in a row, we fall back to synchronize_sched().
2640 : : */
2641 : 0 : void synchronize_sched_expedited(void)
2642 : : {
2643 : : long firstsnap, s, snap;
2644 : : int trycount = 0;
2645 : : struct rcu_state *rsp = &rcu_sched_state;
2646 : :
2647 : : /*
2648 : : * If we are in danger of counter wrap, just do synchronize_sched().
2649 : : * By allowing sync_sched_expedited_started to advance no more than
2650 : : * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
2651 : : * that more than 3.5 billion CPUs would be required to force a
2652 : : * counter wrap on a 32-bit system. Quite a few more CPUs would of
2653 : : * course be required on a 64-bit system.
2654 : : */
2655 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
2656 : : (ulong)atomic_long_read(&rsp->expedited_done) +
2657 : : ULONG_MAX / 8)) {
2658 : 0 : synchronize_sched();
2659 : : atomic_long_inc(&rsp->expedited_wrap);
2660 : : return;
2661 : : }
2662 : :
2663 : : /*
2664 : : * Take a ticket. Note that atomic_inc_return() implies a
2665 : : * full memory barrier.
2666 : : */
2667 : : snap = atomic_long_inc_return(&rsp->expedited_start);
2668 : : firstsnap = snap;
2669 : 0 : get_online_cpus();
2670 [ # # ][ # # ]: 0 : WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
[ # # ]
2671 : :
2672 : : /*
2673 : : * Each pass through the following loop attempts to force a
2674 : : * context switch on each CPU.
2675 : : */
2676 [ # # ]: 0 : while (try_stop_cpus(cpu_online_mask,
2677 : : synchronize_sched_expedited_cpu_stop,
2678 : : NULL) == -EAGAIN) {
2679 : 0 : put_online_cpus();
2680 : : atomic_long_inc(&rsp->expedited_tryfail);
2681 : :
2682 : : /* Check to see if someone else did our work for us. */
2683 : : s = atomic_long_read(&rsp->expedited_done);
2684 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2685 : : /* ensure test happens before caller kfree */
2686 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2687 : : atomic_long_inc(&rsp->expedited_workdone1);
2688 : : return;
2689 : : }
2690 : :
2691 : : /* No joy, try again later. Or just synchronize_sched(). */
2692 [ # # ]: 0 : if (trycount++ < 10) {
2693 : 0 : udelay(trycount * num_online_cpus());
2694 : : } else {
2695 : 0 : wait_rcu_gp(call_rcu_sched);
2696 : : atomic_long_inc(&rsp->expedited_normal);
2697 : : return;
2698 : : }
2699 : :
2700 : : /* Recheck to see if someone else did our work for us. */
2701 : : s = atomic_long_read(&rsp->expedited_done);
2702 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2703 : : /* ensure test happens before caller kfree */
2704 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2705 : : atomic_long_inc(&rsp->expedited_workdone2);
2706 : : return;
2707 : : }
2708 : :
2709 : : /*
2710 : : * Refetching sync_sched_expedited_started allows later
2711 : : * callers to piggyback on our grace period. We retry
2712 : : * after they started, so our grace period works for them,
2713 : : * and they started after our first try, so their grace
2714 : : * period works for us.
2715 : : */
2716 : 0 : get_online_cpus();
2717 : : snap = atomic_long_read(&rsp->expedited_start);
2718 : 0 : smp_mb(); /* ensure read is before try_stop_cpus(). */
2719 : : }
2720 : : atomic_long_inc(&rsp->expedited_stoppedcpus);
2721 : :
2722 : : /*
2723 : : * Everyone up to our most recent fetch is covered by our grace
2724 : : * period. Update the counter, but only if our work is still
2725 : : * relevant -- which it won't be if someone who started later
2726 : : * than we did already did their update.
2727 : : */
2728 : : do {
2729 : : atomic_long_inc(&rsp->expedited_done_tries);
2730 : : s = atomic_long_read(&rsp->expedited_done);
2731 [ # # ]: 0 : if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2732 : : /* ensure test happens before caller kfree */
2733 : 0 : smp_mb__before_atomic_inc(); /* ^^^ */
2734 : : atomic_long_inc(&rsp->expedited_done_lost);
2735 : : break;
2736 : : }
2737 [ # # ]: 0 : } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
2738 : : atomic_long_inc(&rsp->expedited_done_exit);
2739 : :
2740 : 0 : put_online_cpus();
2741 : : }
2742 : : EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2743 : :
2744 : : /*
2745 : : * Check to see if there is any immediate RCU-related work to be done
2746 : : * by the current CPU, for the specified type of RCU, returning 1 if so.
2747 : : * The checks are in order of increasing expense: checks that can be
2748 : : * carried out against CPU-local state are performed first. However,
2749 : : * we must check for CPU stalls first, else we might not get a chance.
2750 : : */
2751 : 0 : static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2752 : : {
2753 : 14988960 : struct rcu_node *rnp = rdp->mynode;
2754 : :
2755 : 14988960 : rdp->n_rcu_pending++;
2756 : :
2757 : : /* Check for CPU stalls, if enabled. */
2758 : 14988960 : check_cpu_stall(rsp, rdp);
2759 : :
2760 : : /* Is the RCU core waiting for a quiescent state from this CPU? */
2761 [ + + ][ + + ]: 29981454 : if (rcu_scheduler_fully_active &&
2762 [ + + ]: 453027 : rdp->qs_pending && !rdp->passed_quiesce) {
2763 : 43446 : rdp->n_rp_qs_pending++;
2764 [ + + ][ + ]: 14949048 : } else if (rdp->qs_pending && rdp->passed_quiesce) {
2765 : 409593 : rdp->n_rp_report_qs++;
2766 : 409593 : return 1;
2767 : : }
2768 : :
2769 : : /* Does this CPU have callbacks ready to invoke? */
2770 [ + + ]: 14582901 : if (cpu_has_callbacks_ready_to_invoke(rdp)) {
2771 : 23735 : rdp->n_rp_cb_ready++;
2772 : 23735 : return 1;
2773 : : }
2774 : :
2775 : : /* Has RCU gone idle with this CPU needing another grace period? */
2776 [ + + ]: 14559166 : if (cpu_needs_another_gp(rsp, rdp)) {
2777 : 91365 : rdp->n_rp_cpu_needs_gp++;
2778 : 91365 : return 1;
2779 : : }
2780 : :
2781 : : /* Has another RCU grace period completed? */
2782 [ + + ]: 14467514 : if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
2783 : 108564 : rdp->n_rp_gp_completed++;
2784 : 108564 : return 1;
2785 : : }
2786 : :
2787 : : /* Has a new RCU grace period started? */
2788 [ + + ]: 14358950 : if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
2789 : 78136 : rdp->n_rp_gp_started++;
2790 : 78136 : return 1;
2791 : : }
2792 : :
2793 : : /* nothing to do */
2794 : 14280814 : rdp->n_rp_need_nothing++;
2795 : 14280814 : return 0;
2796 : : }
2797 : :
2798 : : /*
2799 : : * Check to see if there is any immediate RCU-related work to be done
2800 : : * by the current CPU, returning 1 if so. This function is part of the
2801 : : * RCU implementation; it is -not- an exported member of the RCU API.
2802 : : */
2803 : 0 : static int rcu_pending(int cpu)
2804 : : {
2805 : : struct rcu_state *rsp;
2806 : :
2807 [ + + ]: 22136178 : for_each_rcu_flavor(rsp)
2808 [ + + ]: 14986997 : if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2809 : : return 1;
2810 : : return 0;
2811 : : }
2812 : :
2813 : : /*
2814 : : * Return true if the specified CPU has any callback. If all_lazy is
2815 : : * non-NULL, store an indication of whether all callbacks are lazy.
2816 : : * (If there are no callbacks, all of them are deemed to be lazy.)
2817 : : */
2818 : 0 : static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2819 : : {
2820 : : bool al = true;
2821 : : bool hc = false;
2822 : : struct rcu_data *rdp;
2823 : : struct rcu_state *rsp;
2824 : :
2825 [ + + ]: 13356681 : for_each_rcu_flavor(rsp) {
2826 : 9761843 : rdp = per_cpu_ptr(rsp->rda, cpu);
2827 [ + + ]: 9761843 : if (!rdp->nxtlist)
2828 : 7190056 : continue;
2829 : : hc = true;
2830 [ - + ][ # # ]: 2571787 : if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
2831 : : al = false;
2832 : : break;
2833 : : }
2834 : : }
2835 [ # # ]: 6166660 : if (all_lazy)
2836 : 0 : *all_lazy = al;
2837 : 0 : return hc;
2838 : : }
2839 : :
2840 : : /*
2841 : : * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2842 : : * the compiler is expected to optimize this away.
2843 : : */
2844 : : static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
2845 : : int cpu, unsigned long done)
2846 : : {
2847 : : trace_rcu_barrier(rsp->name, s, cpu,
2848 : : atomic_read(&rsp->barrier_cpu_count), done);
2849 : : }
2850 : :
2851 : : /*
2852 : : * RCU callback function for _rcu_barrier(). If we are last, wake
2853 : : * up the task executing _rcu_barrier().
2854 : : */
2855 : 0 : static void rcu_barrier_callback(struct rcu_head *rhp)
2856 : : {
2857 : : struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2858 : 0 : struct rcu_state *rsp = rdp->rsp;
2859 : :
2860 [ # # ]: 0 : if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2861 : : _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2862 : 0 : complete(&rsp->barrier_completion);
2863 : : } else {
2864 : : _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2865 : : }
2866 : 0 : }
2867 : :
2868 : : /*
2869 : : * Called with preemption disabled, and from cross-cpu IRQ context.
2870 : : */
2871 : 0 : static void rcu_barrier_func(void *type)
2872 : : {
2873 : : struct rcu_state *rsp = type;
2874 : 0 : struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2875 : :
2876 : : _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2877 : 0 : atomic_inc(&rsp->barrier_cpu_count);
2878 : 0 : rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2879 : 0 : }
2880 : :
2881 : : /*
2882 : : * Orchestrate the specified type of RCU barrier, waiting for all
2883 : : * RCU callbacks of the specified type to complete.
2884 : : */
2885 : 0 : static void _rcu_barrier(struct rcu_state *rsp)
2886 : : {
2887 : : int cpu;
2888 : : struct rcu_data *rdp;
2889 : 0 : unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2890 : : unsigned long snap_done;
2891 : :
2892 : : _rcu_barrier_trace(rsp, "Begin", -1, snap);
2893 : :
2894 : : /* Take mutex to serialize concurrent rcu_barrier() requests. */
2895 : 0 : mutex_lock(&rsp->barrier_mutex);
2896 : :
2897 : : /*
2898 : : * Ensure that all prior references, including to ->n_barrier_done,
2899 : : * are ordered before the _rcu_barrier() machinery.
2900 : : */
2901 : 0 : smp_mb(); /* See above block comment. */
2902 : :
2903 : : /*
2904 : : * Recheck ->n_barrier_done to see if others did our work for us.
2905 : : * This means checking ->n_barrier_done for an even-to-odd-to-even
2906 : : * transition. The "if" expression below therefore rounds the old
2907 : : * value up to the next even number and adds two before comparing.
2908 : : */
2909 : 0 : snap_done = rsp->n_barrier_done;
2910 : : _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2911 : :
2912 : : /*
2913 : : * If the value in snap is odd, we needed to wait for the current
2914 : : * rcu_barrier() to complete, then wait for the next one, in other
2915 : : * words, we need the value of snap_done to be three larger than
2916 : : * the value of snap. On the other hand, if the value in snap is
2917 : : * even, we only had to wait for the next rcu_barrier() to complete,
2918 : : * in other words, we need the value of snap_done to be only two
2919 : : * greater than the value of snap. The "(snap + 3) & ~0x1" computes
2920 : : * this for us (thank you, Linus!).
2921 : : */
2922 [ # # ]: 0 : if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
2923 : : _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2924 : 0 : smp_mb(); /* caller's subsequent code after above check. */
2925 : 0 : mutex_unlock(&rsp->barrier_mutex);
2926 : 0 : return;
2927 : : }
2928 : :
2929 : : /*
2930 : : * Increment ->n_barrier_done to avoid duplicate work. Use
2931 : : * ACCESS_ONCE() to prevent the compiler from speculating
2932 : : * the increment to precede the early-exit check.
2933 : : */
2934 : 0 : ACCESS_ONCE(rsp->n_barrier_done)++;
2935 [ # # ][ # # ]: 0 : WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
[ # # ]
2936 : : _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2937 : 0 : smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
2938 : :
2939 : : /*
2940 : : * Initialize the count to one rather than to zero in order to
2941 : : * avoid a too-soon return to zero in case of a short grace period
2942 : : * (or preemption of this task). Exclude CPU-hotplug operations
2943 : : * to ensure that no offline CPU has callbacks queued.
2944 : : */
2945 : : init_completion(&rsp->barrier_completion);
2946 : 0 : atomic_set(&rsp->barrier_cpu_count, 1);
2947 : 0 : get_online_cpus();
2948 : :
2949 : : /*
2950 : : * Force each CPU with callbacks to register a new callback.
2951 : : * When that callback is invoked, we will know that all of the
2952 : : * corresponding CPU's preceding callbacks have been invoked.
2953 : : */
2954 [ # # ]: 0 : for_each_possible_cpu(cpu) {
2955 [ # # ]: 0 : if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
2956 : 0 : continue;
2957 : 0 : rdp = per_cpu_ptr(rsp->rda, cpu);
2958 : : if (rcu_is_nocb_cpu(cpu)) {
2959 : : _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
2960 : : rsp->n_barrier_done);
2961 : : atomic_inc(&rsp->barrier_cpu_count);
2962 : : __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
2963 : : rsp, cpu, 0);
2964 [ # # ]: 0 : } else if (ACCESS_ONCE(rdp->qlen)) {
2965 : : _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2966 : : rsp->n_barrier_done);
2967 : 0 : smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2968 : : } else {
2969 : : _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2970 : : rsp->n_barrier_done);
2971 : : }
2972 : : }
2973 : 0 : put_online_cpus();
2974 : :
2975 : : /*
2976 : : * Now that we have an rcu_barrier_callback() callback on each
2977 : : * CPU, and thus each counted, remove the initial count.
2978 : : */
2979 [ # # ]: 0 : if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2980 : 0 : complete(&rsp->barrier_completion);
2981 : :
2982 : : /* Increment ->n_barrier_done to prevent duplicate work. */
2983 : 0 : smp_mb(); /* Keep increment after above mechanism. */
2984 : 0 : ACCESS_ONCE(rsp->n_barrier_done)++;
2985 [ # # ][ # # ]: 0 : WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
[ # # ]
2986 : : _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2987 : 0 : smp_mb(); /* Keep increment before caller's subsequent code. */
2988 : :
2989 : : /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2990 : 0 : wait_for_completion(&rsp->barrier_completion);
2991 : :
2992 : : /* Other rcu_barrier() invocations can now safely proceed. */
2993 : 0 : mutex_unlock(&rsp->barrier_mutex);
2994 : : }
2995 : :
2996 : : /**
2997 : : * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
2998 : : */
2999 : 0 : void rcu_barrier_bh(void)
3000 : : {
3001 : 0 : _rcu_barrier(&rcu_bh_state);
3002 : 0 : }
3003 : : EXPORT_SYMBOL_GPL(rcu_barrier_bh);
3004 : :
3005 : : /**
3006 : : * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
3007 : : */
3008 : 0 : void rcu_barrier_sched(void)
3009 : : {
3010 : 0 : _rcu_barrier(&rcu_sched_state);
3011 : 0 : }
3012 : : EXPORT_SYMBOL_GPL(rcu_barrier_sched);
3013 : :
3014 : : /*
3015 : : * Do boot-time initialization of a CPU's per-CPU RCU data.
3016 : : */
3017 : : static void __init
3018 : 0 : rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3019 : : {
3020 : : unsigned long flags;
3021 : 0 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3022 : : struct rcu_node *rnp = rcu_get_root(rsp);
3023 : :
3024 : : /* Set up local state, ensuring consistent view of global state. */
3025 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
3026 : 0 : rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3027 : : init_callback_list(rdp);
3028 : 0 : rdp->qlen_lazy = 0;
3029 : 0 : ACCESS_ONCE(rdp->qlen) = 0;
3030 : 0 : rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3031 [ # # ][ # # ]: 0 : WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
[ # # ]
3032 [ # # ][ # # ]: 0 : WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
[ # # ]
3033 : 0 : rdp->cpu = cpu;
3034 : 0 : rdp->rsp = rsp;
3035 : : rcu_boot_init_nocb_percpu_data(rdp);
3036 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
3037 : 0 : }
3038 : :
3039 : : /*
3040 : : * Initialize a CPU's per-CPU RCU data. Note that only one online or
3041 : : * offline event can be happening at a given time. Note also that we
3042 : : * can accept some slop in the rsp->completed access due to the fact
3043 : : * that this CPU cannot possibly have any RCU callbacks in flight yet.
3044 : : */
3045 : : static void
3046 : 0 : rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
3047 : : {
3048 : : unsigned long flags;
3049 : : unsigned long mask;
3050 : 0 : struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3051 : : struct rcu_node *rnp = rcu_get_root(rsp);
3052 : :
3053 : : /* Exclude new grace periods. */
3054 : 0 : mutex_lock(&rsp->onoff_mutex);
3055 : :
3056 : : /* Set up local state, ensuring consistent view of global state. */
3057 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
3058 : 0 : rdp->beenonline = 1; /* We have now been online. */
3059 : 0 : rdp->preemptible = preemptible;
3060 : 0 : rdp->qlen_last_fqs_check = 0;
3061 : 0 : rdp->n_force_qs_snap = rsp->n_force_qs;
3062 : 0 : rdp->blimit = blimit;
3063 : : init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
3064 : 0 : rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3065 : : rcu_sysidle_init_percpu_data(rdp->dynticks);
3066 : 0 : atomic_set(&rdp->dynticks->dynticks,
3067 : : (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
3068 : : raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
3069 : :
3070 : : /* Add CPU to rcu_node bitmasks. */
3071 : 0 : rnp = rdp->mynode;
3072 : 0 : mask = rdp->grpmask;
3073 : : do {
3074 : : /* Exclude any attempts to start a new GP on small systems. */
3075 : 0 : raw_spin_lock(&rnp->lock); /* irqs already disabled. */
3076 : 0 : rnp->qsmaskinit |= mask;
3077 : 0 : mask = rnp->grpmask;
3078 [ # # ]: 0 : if (rnp == rdp->mynode) {
3079 : : /*
3080 : : * If there is a grace period in progress, we will
3081 : : * set up to wait for it next time we run the
3082 : : * RCU core code.
3083 : : */
3084 : 0 : rdp->gpnum = rnp->completed;
3085 : 0 : rdp->completed = rnp->completed;
3086 : 0 : rdp->passed_quiesce = 0;
3087 : 0 : rdp->qs_pending = 0;
3088 : : trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3089 : : }
3090 : : raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
3091 : 0 : rnp = rnp->parent;
3092 [ # # ][ # # ]: 0 : } while (rnp != NULL && !(rnp->qsmaskinit & mask));
3093 [ # # ]: 0 : local_irq_restore(flags);
3094 : :
3095 : 0 : mutex_unlock(&rsp->onoff_mutex);
3096 : 0 : }
3097 : :
3098 : 0 : static void rcu_prepare_cpu(int cpu)
3099 : : {
3100 : : struct rcu_state *rsp;
3101 : :
3102 [ # # ]: 0 : for_each_rcu_flavor(rsp)
3103 : 0 : rcu_init_percpu_data(cpu, rsp,
3104 : 0 : strcmp(rsp->name, "rcu_preempt") == 0);
3105 : 0 : }
3106 : :
3107 : : /*
3108 : : * Handle CPU online/offline notification events.
3109 : : */
3110 : 0 : static int rcu_cpu_notify(struct notifier_block *self,
3111 : : unsigned long action, void *hcpu)
3112 : : {
3113 : 0 : long cpu = (long)hcpu;
3114 : 0 : struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
3115 : : struct rcu_node *rnp = rdp->mynode;
3116 : : struct rcu_state *rsp;
3117 : :
3118 : 0 : trace_rcu_utilization(TPS("Start CPU hotplug"));
3119 : : switch (action) {
3120 : : case CPU_UP_PREPARE:
3121 : : case CPU_UP_PREPARE_FROZEN:
3122 : 0 : rcu_prepare_cpu(cpu);
3123 : : rcu_prepare_kthreads(cpu);
3124 : : break;
3125 : : case CPU_ONLINE:
3126 : : case CPU_DOWN_FAILED:
3127 : : rcu_boost_kthread_setaffinity(rnp, -1);
3128 : : break;
3129 : : case CPU_DOWN_PREPARE:
3130 : : rcu_boost_kthread_setaffinity(rnp, cpu);
3131 : : break;
3132 : : case CPU_DYING:
3133 : : case CPU_DYING_FROZEN:
3134 [ # # ]: 0 : for_each_rcu_flavor(rsp)
3135 : : rcu_cleanup_dying_cpu(rsp);
3136 : : break;
3137 : : case CPU_DEAD:
3138 : : case CPU_DEAD_FROZEN:
3139 : : case CPU_UP_CANCELED:
3140 : : case CPU_UP_CANCELED_FROZEN:
3141 [ # # ]: 0 : for_each_rcu_flavor(rsp)
3142 : 0 : rcu_cleanup_dead_cpu(cpu, rsp);
3143 : : break;
3144 : : default:
3145 : : break;
3146 : : }
3147 : 0 : trace_rcu_utilization(TPS("End CPU hotplug"));
3148 : 0 : return NOTIFY_OK;
3149 : : }
3150 : :
3151 : 0 : static int rcu_pm_notify(struct notifier_block *self,
3152 : : unsigned long action, void *hcpu)
3153 : : {
3154 [ # # # ]: 0 : switch (action) {
3155 : : case PM_HIBERNATION_PREPARE:
3156 : : case PM_SUSPEND_PREPARE:
3157 [ # # ]: 0 : if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
3158 : 0 : rcu_expedited = 1;
3159 : : break;
3160 : : case PM_POST_HIBERNATION:
3161 : : case PM_POST_SUSPEND:
3162 : 0 : rcu_expedited = 0;
3163 : 0 : break;
3164 : : default:
3165 : : break;
3166 : : }
3167 : 0 : return NOTIFY_OK;
3168 : : }
3169 : :
3170 : : /*
3171 : : * Spawn the kthread that handles this RCU flavor's grace periods.
3172 : : */
3173 : 0 : static int __init rcu_spawn_gp_kthread(void)
3174 : : {
3175 : : unsigned long flags;
3176 : : struct rcu_node *rnp;
3177 : : struct rcu_state *rsp;
3178 : : struct task_struct *t;
3179 : :
3180 [ # # ]: 0 : for_each_rcu_flavor(rsp) {
3181 [ # # ]: 0 : t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3182 [ # # ]: 0 : BUG_ON(IS_ERR(t));
3183 : : rnp = rcu_get_root(rsp);
3184 : 0 : raw_spin_lock_irqsave(&rnp->lock, flags);
3185 : 0 : rsp->gp_kthread = t;
3186 : 0 : raw_spin_unlock_irqrestore(&rnp->lock, flags);
3187 : : rcu_spawn_nocb_kthreads(rsp);
3188 : : }
3189 : 0 : return 0;
3190 : : }
3191 : : early_initcall(rcu_spawn_gp_kthread);
3192 : :
3193 : : /*
3194 : : * This function is invoked towards the end of the scheduler's initialization
3195 : : * process. Before this is called, the idle task might contain
3196 : : * RCU read-side critical sections (during which time, this idle
3197 : : * task is booting the system). After this function is called, the
3198 : : * idle tasks are prohibited from containing RCU read-side critical
3199 : : * sections. This function also enables RCU lockdep checking.
3200 : : */
3201 : 0 : void rcu_scheduler_starting(void)
3202 : : {
3203 [ # # ]: 0 : WARN_ON(num_online_cpus() != 1);
3204 [ # # ]: 0 : WARN_ON(nr_context_switches() > 0);
3205 : 0 : rcu_scheduler_active = 1;
3206 : 0 : }
3207 : :
3208 : : /*
3209 : : * Compute the per-level fanout, either using the exact fanout specified
3210 : : * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
3211 : : */
3212 : : #ifdef CONFIG_RCU_FANOUT_EXACT
3213 : : static void __init rcu_init_levelspread(struct rcu_state *rsp)
3214 : : {
3215 : : int i;
3216 : :
3217 : : for (i = rcu_num_lvls - 1; i > 0; i--)
3218 : : rsp->levelspread[i] = CONFIG_RCU_FANOUT;
3219 : : rsp->levelspread[0] = rcu_fanout_leaf;
3220 : : }
3221 : : #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
3222 : 0 : static void __init rcu_init_levelspread(struct rcu_state *rsp)
3223 : : {
3224 : : int ccur;
3225 : : int cprv;
3226 : : int i;
3227 : :
3228 : 0 : cprv = nr_cpu_ids;
3229 [ # # ]: 0 : for (i = rcu_num_lvls - 1; i >= 0; i--) {
3230 : 0 : ccur = rsp->levelcnt[i];
3231 : 0 : rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
3232 : : cprv = ccur;
3233 : : }
3234 : 0 : }
3235 : : #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
3236 : :
3237 : : /*
3238 : : * Helper function for rcu_init() that initializes one rcu_state structure.
3239 : : */
3240 : 0 : static void __init rcu_init_one(struct rcu_state *rsp,
3241 : : struct rcu_data __percpu *rda)
3242 : : {
3243 : : static char *buf[] = { "rcu_node_0",
3244 : : "rcu_node_1",
3245 : : "rcu_node_2",
3246 : : "rcu_node_3" }; /* Match MAX_RCU_LVLS */
3247 : : static char *fqs[] = { "rcu_node_fqs_0",
3248 : : "rcu_node_fqs_1",
3249 : : "rcu_node_fqs_2",
3250 : : "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
3251 : : int cpustride = 1;
3252 : : int i;
3253 : : int j;
3254 : : struct rcu_node *rnp;
3255 : :
3256 : : BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
3257 : :
3258 : : /* Silence gcc 4.8 warning about array index out of range. */
3259 [ # # ]: 0 : if (rcu_num_lvls > RCU_NUM_LVLS)
3260 : 0 : panic("rcu_init_one: rcu_num_lvls overflow");
3261 : :
3262 : : /* Initialize the level-tracking arrays. */
3263 : :
3264 [ # # ]: 0 : for (i = 0; i < rcu_num_lvls; i++)
3265 : 0 : rsp->levelcnt[i] = num_rcu_lvl[i];
3266 [ # # ]: 0 : for (i = 1; i < rcu_num_lvls; i++)
3267 : 0 : rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
3268 : 0 : rcu_init_levelspread(rsp);
3269 : :
3270 : : /* Initialize the elements themselves, starting from the leaves. */
3271 : :
3272 [ # # ]: 0 : for (i = rcu_num_lvls - 1; i >= 0; i--) {
3273 : 0 : cpustride *= rsp->levelspread[i];
3274 : 0 : rnp = rsp->level[i];
3275 [ # # ]: 0 : for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
3276 : 0 : raw_spin_lock_init(&rnp->lock);
3277 : : lockdep_set_class_and_name(&rnp->lock,
3278 : : &rcu_node_class[i], buf[i]);
3279 : 0 : raw_spin_lock_init(&rnp->fqslock);
3280 : : lockdep_set_class_and_name(&rnp->fqslock,
3281 : : &rcu_fqs_class[i], fqs[i]);
3282 : 0 : rnp->gpnum = rsp->gpnum;
3283 : 0 : rnp->completed = rsp->completed;
3284 : 0 : rnp->qsmask = 0;
3285 : 0 : rnp->qsmaskinit = 0;
3286 : 0 : rnp->grplo = j * cpustride;
3287 : 0 : rnp->grphi = (j + 1) * cpustride - 1;
3288 [ # # ]: 0 : if (rnp->grphi >= NR_CPUS)
3289 : 0 : rnp->grphi = NR_CPUS - 1;
3290 [ # # ]: 0 : if (i == 0) {
3291 : 0 : rnp->grpnum = 0;
3292 : 0 : rnp->grpmask = 0;
3293 : 0 : rnp->parent = NULL;
3294 : : } else {
3295 : 0 : rnp->grpnum = j % rsp->levelspread[i - 1];
3296 : 0 : rnp->grpmask = 1UL << rnp->grpnum;
3297 : 0 : rnp->parent = rsp->level[i - 1] +
3298 : 0 : j / rsp->levelspread[i - 1];
3299 : : }
3300 : 0 : rnp->level = i;
3301 : 0 : INIT_LIST_HEAD(&rnp->blkd_tasks);
3302 : : rcu_init_one_nocb(rnp);
3303 : : }
3304 : : }
3305 : :
3306 : 0 : rsp->rda = rda;
3307 : 0 : init_waitqueue_head(&rsp->gp_wq);
3308 : : init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3309 : 0 : rnp = rsp->level[rcu_num_lvls - 1];
3310 [ # # ]: 0 : for_each_possible_cpu(i) {
3311 [ # # ]: 0 : while (i > rnp->grphi)
3312 : 0 : rnp++;
3313 : 0 : per_cpu_ptr(rsp->rda, i)->mynode = rnp;
3314 : 0 : rcu_boot_init_percpu_data(i, rsp);
3315 : : }
3316 : 0 : list_add(&rsp->flavors, &rcu_struct_flavors);
3317 : 0 : }
3318 : :
3319 : : /*
3320 : : * Compute the rcu_node tree geometry from kernel parameters. This cannot
3321 : : * replace the definitions in tree.h because those are needed to size
3322 : : * the ->node array in the rcu_state structure.
3323 : : */
3324 : 0 : static void __init rcu_init_geometry(void)
3325 : : {
3326 : : ulong d;
3327 : : int i;
3328 : : int j;
3329 : 0 : int n = nr_cpu_ids;
3330 : : int rcu_capacity[MAX_RCU_LVLS + 1];
3331 : :
3332 : : /*
3333 : : * Initialize any unspecified boot parameters.
3334 : : * The default values of jiffies_till_first_fqs and
3335 : : * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
3336 : : * value, which is a function of HZ, then adding one for each
3337 : : * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
3338 : : */
3339 : 0 : d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3340 [ # # ]: 0 : if (jiffies_till_first_fqs == ULONG_MAX)
3341 : 0 : jiffies_till_first_fqs = d;
3342 [ # # ]: 0 : if (jiffies_till_next_fqs == ULONG_MAX)
3343 : 0 : jiffies_till_next_fqs = d;
3344 : :
3345 : : /* If the compile-time values are accurate, just leave. */
3346 [ # # ][ # # ]: 0 : if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3347 : : nr_cpu_ids == NR_CPUS)
3348 : 0 : return;
3349 : :
3350 : : /*
3351 : : * Compute number of nodes that can be handled an rcu_node tree
3352 : : * with the given number of levels. Setting rcu_capacity[0] makes
3353 : : * some of the arithmetic easier.
3354 : : */
3355 : 0 : rcu_capacity[0] = 1;
3356 : 0 : rcu_capacity[1] = rcu_fanout_leaf;
3357 [ # # ]: 0 : for (i = 2; i <= MAX_RCU_LVLS; i++)
3358 : 0 : rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
3359 : :
3360 : : /*
3361 : : * The boot-time rcu_fanout_leaf parameter is only permitted
3362 : : * to increase the leaf-level fanout, not decrease it. Of course,
3363 : : * the leaf-level fanout cannot exceed the number of bits in
3364 : : * the rcu_node masks. Finally, the tree must be able to accommodate
3365 : : * the configured number of CPUs. Complain and fall back to the
3366 : : * compile-time values if these limits are exceeded.
3367 : : */
3368 [ # # ]: 0 : if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
3369 [ # # ]: 0 : rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
3370 : 0 : n > rcu_capacity[MAX_RCU_LVLS]) {
3371 : 0 : WARN_ON(1);
3372 : 0 : return;
3373 : : }
3374 : :
3375 : : /* Calculate the number of rcu_nodes at each level of the tree. */
3376 [ # # ]: 0 : for (i = 1; i <= MAX_RCU_LVLS; i++)
3377 [ # # ]: 0 : if (n <= rcu_capacity[i]) {
3378 [ # # ]: 0 : for (j = 0; j <= i; j++)
3379 : 0 : num_rcu_lvl[j] =
3380 : 0 : DIV_ROUND_UP(n, rcu_capacity[i - j]);
3381 : 0 : rcu_num_lvls = i;
3382 [ # # ]: 0 : for (j = i + 1; j <= MAX_RCU_LVLS; j++)
3383 : 0 : num_rcu_lvl[j] = 0;
3384 : : break;
3385 : : }
3386 : :
3387 : : /* Calculate the total number of rcu_node structures. */
3388 : 0 : rcu_num_nodes = 0;
3389 [ # # ]: 0 : for (i = 0; i <= MAX_RCU_LVLS; i++)
3390 : 0 : rcu_num_nodes += num_rcu_lvl[i];
3391 : 0 : rcu_num_nodes -= n;
3392 : : }
3393 : :
3394 : 0 : void __init rcu_init(void)
3395 : : {
3396 : : int cpu;
3397 : :
3398 : 0 : rcu_bootup_announce();
3399 : 0 : rcu_init_geometry();
3400 : 0 : rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3401 : 0 : rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3402 : : __rcu_init_preempt();
3403 : 0 : open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3404 : :
3405 : : /*
3406 : : * We don't need protection against CPU-hotplug here because
3407 : : * this is called early in boot, before either interrupts
3408 : : * or the scheduler are operational.
3409 : : */
3410 : 0 : cpu_notifier(rcu_cpu_notify, 0);
3411 : 0 : pm_notifier(rcu_pm_notify, 0);
3412 [ # # ]: 0 : for_each_online_cpu(cpu)
3413 : 0 : rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3414 : 0 : }
3415 : :
3416 : : #include "tree_plugin.h"
|