Branch data Line data Source code
1 : : /*
2 : : * Functions to sequence FLUSH and FUA writes.
3 : : *
4 : : * Copyright (C) 2011 Max Planck Institute for Gravitational Physics
5 : : * Copyright (C) 2011 Tejun Heo <tj@kernel.org>
6 : : *
7 : : * This file is released under the GPLv2.
8 : : *
9 : : * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
10 : : * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
11 : : * properties and hardware capability.
12 : : *
13 : : * If a request doesn't have data, only REQ_FLUSH makes sense, which
14 : : * indicates a simple flush request. If there is data, REQ_FLUSH indicates
15 : : * that the device cache should be flushed before the data is executed, and
16 : : * REQ_FUA means that the data must be on non-volatile media on request
17 : : * completion.
18 : : *
19 : : * If the device doesn't have writeback cache, FLUSH and FUA don't make any
20 : : * difference. The requests are either completed immediately if there's no
21 : : * data or executed as normal requests otherwise.
22 : : *
23 : : * If the device has writeback cache and supports FUA, REQ_FLUSH is
24 : : * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
25 : : *
26 : : * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
27 : : * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
28 : : *
29 : : * The actual execution of flush is double buffered. Whenever a request
30 : : * needs to execute PRE or POSTFLUSH, it queues at
31 : : * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a
32 : : * flush is issued and the pending_idx is toggled. When the flush
33 : : * completes, all the requests which were pending are proceeded to the next
34 : : * step. This allows arbitrary merging of different types of FLUSH/FUA
35 : : * requests.
36 : : *
37 : : * Currently, the following conditions are used to determine when to issue
38 : : * flush.
39 : : *
40 : : * C1. At any given time, only one flush shall be in progress. This makes
41 : : * double buffering sufficient.
42 : : *
43 : : * C2. Flush is deferred if any request is executing DATA of its sequence.
44 : : * This avoids issuing separate POSTFLUSHes for requests which shared
45 : : * PREFLUSH.
46 : : *
47 : : * C3. The second condition is ignored if there is a request which has
48 : : * waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid
49 : : * starvation in the unlikely case where there are continuous stream of
50 : : * FUA (without FLUSH) requests.
51 : : *
52 : : * For devices which support FUA, it isn't clear whether C2 (and thus C3)
53 : : * is beneficial.
54 : : *
55 : : * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
56 : : * Once while executing DATA and again after the whole sequence is
57 : : * complete. The first completion updates the contained bio but doesn't
58 : : * finish it so that the bio submitter is notified only after the whole
59 : : * sequence is complete. This is implemented by testing REQ_FLUSH_SEQ in
60 : : * req_bio_endio().
61 : : *
62 : : * The above peculiarity requires that each FLUSH/FUA request has only one
63 : : * bio attached to it, which is guaranteed as they aren't allowed to be
64 : : * merged in the usual way.
65 : : */
66 : :
67 : : #include <linux/kernel.h>
68 : : #include <linux/module.h>
69 : : #include <linux/bio.h>
70 : : #include <linux/blkdev.h>
71 : : #include <linux/gfp.h>
72 : : #include <linux/blk-mq.h>
73 : :
74 : : #include "blk.h"
75 : : #include "blk-mq.h"
76 : :
77 : : /* FLUSH/FUA sequences */
78 : : enum {
79 : : REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */
80 : : REQ_FSEQ_DATA = (1 << 1), /* data write in progress */
81 : : REQ_FSEQ_POSTFLUSH = (1 << 2), /* post-flushing in progress */
82 : : REQ_FSEQ_DONE = (1 << 3),
83 : :
84 : : REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
85 : : REQ_FSEQ_POSTFLUSH,
86 : :
87 : : /*
88 : : * If flush has been pending longer than the following timeout,
89 : : * it's issued even if flush_data requests are still in flight.
90 : : */
91 : : FLUSH_PENDING_TIMEOUT = 5 * HZ,
92 : : };
93 : :
94 : : static bool blk_kick_flush(struct request_queue *q);
95 : :
96 : 0 : static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
97 : : {
98 : : unsigned int policy = 0;
99 : :
100 [ # # ]: 0 : if (blk_rq_sectors(rq))
101 : : policy |= REQ_FSEQ_DATA;
102 : :
103 [ # # ]: 0 : if (fflags & REQ_FLUSH) {
104 [ # # ]: 0 : if (rq->cmd_flags & REQ_FLUSH)
105 : 0 : policy |= REQ_FSEQ_PREFLUSH;
106 [ # # ][ # # ]: 0 : if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
107 : 0 : policy |= REQ_FSEQ_POSTFLUSH;
108 : : }
109 : 0 : return policy;
110 : : }
111 : :
112 : 0 : static unsigned int blk_flush_cur_seq(struct request *rq)
113 : : {
114 : 0 : return 1 << ffz(rq->flush.seq);
115 : : }
116 : :
117 : 0 : static void blk_flush_restore_request(struct request *rq)
118 : : {
119 : : /*
120 : : * After flush data completion, @rq->bio is %NULL but we need to
121 : : * complete the bio again. @rq->biotail is guaranteed to equal the
122 : : * original @rq->bio. Restore it.
123 : : */
124 : 0 : rq->bio = rq->biotail;
125 : :
126 : : /* make @rq a normal request */
127 : 0 : rq->cmd_flags &= ~REQ_FLUSH_SEQ;
128 : 0 : rq->end_io = rq->flush.saved_end_io;
129 : :
130 : : blk_clear_rq_complete(rq);
131 : 0 : }
132 : :
133 : 0 : static void mq_flush_run(struct work_struct *work)
134 : : {
135 : : struct request *rq;
136 : :
137 : 0 : rq = container_of(work, struct request, mq_flush_work);
138 : :
139 : 0 : memset(&rq->csd, 0, sizeof(rq->csd));
140 : 0 : blk_mq_insert_request(rq, false, true, false);
141 : 0 : }
142 : :
143 : 0 : static bool blk_flush_queue_rq(struct request *rq, bool add_front)
144 : : {
145 [ # # ]: 0 : if (rq->q->mq_ops) {
146 : 0 : INIT_WORK(&rq->mq_flush_work, mq_flush_run);
147 : 0 : kblockd_schedule_work(rq->q, &rq->mq_flush_work);
148 : 0 : return false;
149 : : } else {
150 [ # # ]: 0 : if (add_front)
151 : 0 : list_add(&rq->queuelist, &rq->q->queue_head);
152 : : else
153 : 0 : list_add_tail(&rq->queuelist, &rq->q->queue_head);
154 : : return true;
155 : : }
156 : : }
157 : :
158 : : /**
159 : : * blk_flush_complete_seq - complete flush sequence
160 : : * @rq: FLUSH/FUA request being sequenced
161 : : * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
162 : : * @error: whether an error occurred
163 : : *
164 : : * @rq just completed @seq part of its flush sequence, record the
165 : : * completion and trigger the next step.
166 : : *
167 : : * CONTEXT:
168 : : * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
169 : : *
170 : : * RETURNS:
171 : : * %true if requests were added to the dispatch queue, %false otherwise.
172 : : */
173 : 0 : static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
174 : : int error)
175 : : {
176 : 0 : struct request_queue *q = rq->q;
177 : 0 : struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
178 : : bool queued = false, kicked;
179 : :
180 [ # # ]: 0 : BUG_ON(rq->flush.seq & seq);
181 : 0 : rq->flush.seq |= seq;
182 : :
183 [ # # ]: 0 : if (likely(!error))
184 : 0 : seq = blk_flush_cur_seq(rq);
185 : : else
186 : : seq = REQ_FSEQ_DONE;
187 : :
188 [ # # # # ]: 0 : switch (seq) {
189 : : case REQ_FSEQ_PREFLUSH:
190 : : case REQ_FSEQ_POSTFLUSH:
191 : : /* queue for flush */
192 [ # # ]: 0 : if (list_empty(pending))
193 : 0 : q->flush_pending_since = jiffies;
194 : 0 : list_move_tail(&rq->flush.list, pending);
195 : : break;
196 : :
197 : : case REQ_FSEQ_DATA:
198 : 0 : list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
199 : 0 : queued = blk_flush_queue_rq(rq, true);
200 : 0 : break;
201 : :
202 : : case REQ_FSEQ_DONE:
203 : : /*
204 : : * @rq was previously adjusted by blk_flush_issue() for
205 : : * flush sequencing and may already have gone through the
206 : : * flush data request completion path. Restore @rq for
207 : : * normal completion and end it.
208 : : */
209 [ # # ]: 0 : BUG_ON(!list_empty(&rq->queuelist));
210 : 0 : list_del_init(&rq->flush.list);
211 : 0 : blk_flush_restore_request(rq);
212 [ # # ]: 0 : if (q->mq_ops)
213 : : blk_mq_end_io(rq, error);
214 : : else
215 : 0 : __blk_end_request_all(rq, error);
216 : : break;
217 : :
218 : : default:
219 : 0 : BUG();
220 : : }
221 : :
222 : 0 : kicked = blk_kick_flush(q);
223 : 0 : return kicked | queued;
224 : : }
225 : :
226 : 0 : static void flush_end_io(struct request *flush_rq, int error)
227 : : {
228 : 0 : struct request_queue *q = flush_rq->q;
229 : : struct list_head *running;
230 : : bool queued = false;
231 : 0 : struct request *rq, *n;
232 : : unsigned long flags = 0;
233 : :
234 [ # # ]: 0 : if (q->mq_ops)
235 : 0 : spin_lock_irqsave(&q->mq_flush_lock, flags);
236 : :
237 : 0 : running = &q->flush_queue[q->flush_running_idx];
238 [ # # ]: 0 : BUG_ON(q->flush_pending_idx == q->flush_running_idx);
239 : :
240 : : /* account completion of the flush request */
241 : 0 : q->flush_running_idx ^= 1;
242 : :
243 [ # # ]: 0 : if (!q->mq_ops)
244 : 0 : elv_completed_request(q, flush_rq);
245 : :
246 : : /* and push the waiting requests to the next stage */
247 [ # # ]: 0 : list_for_each_entry_safe(rq, n, running, flush.list) {
248 : 0 : unsigned int seq = blk_flush_cur_seq(rq);
249 : :
250 [ # # ]: 0 : BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
251 : 0 : queued |= blk_flush_complete_seq(rq, seq, error);
252 : : }
253 : :
254 : : /*
255 : : * Kick the queue to avoid stall for two cases:
256 : : * 1. Moving a request silently to empty queue_head may stall the
257 : : * queue.
258 : : * 2. When flush request is running in non-queueable queue, the
259 : : * queue is hold. Restart the queue after flush request is finished
260 : : * to avoid stall.
261 : : * This function is called from request completion path and calling
262 : : * directly into request_fn may confuse the driver. Always use
263 : : * kblockd.
264 : : */
265 [ # # ][ # # ]: 0 : if (queued || q->flush_queue_delayed) {
266 [ # # ]: 0 : WARN_ON(q->mq_ops);
267 : 0 : blk_run_queue_async(q);
268 : : }
269 : 0 : q->flush_queue_delayed = 0;
270 [ # # ]: 0 : if (q->mq_ops)
271 : : spin_unlock_irqrestore(&q->mq_flush_lock, flags);
272 : 0 : }
273 : :
274 : : /**
275 : : * blk_kick_flush - consider issuing flush request
276 : : * @q: request_queue being kicked
277 : : *
278 : : * Flush related states of @q have changed, consider issuing flush request.
279 : : * Please read the comment at the top of this file for more info.
280 : : *
281 : : * CONTEXT:
282 : : * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
283 : : *
284 : : * RETURNS:
285 : : * %true if flush was issued, %false otherwise.
286 : : */
287 : 0 : static bool blk_kick_flush(struct request_queue *q)
288 : : {
289 : 0 : struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
290 : : struct request *first_rq =
291 : 0 : list_first_entry(pending, struct request, flush.list);
292 : :
293 : : /* C1 described at the top of this file */
294 [ # # ][ # # ]: 0 : if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
295 : : return false;
296 : :
297 : : /* C2 and C3 */
298 [ # # ]: 0 : if (!list_empty(&q->flush_data_in_flight) &&
299 [ # # ]: 0 : time_before(jiffies,
300 : : q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
301 : : return false;
302 : :
303 : : /*
304 : : * Issue flush and toggle pending_idx. This makes pending_idx
305 : : * different from running_idx, which means flush is in flight.
306 : : */
307 : 0 : q->flush_pending_idx ^= 1;
308 : :
309 [ # # ]: 0 : if (q->mq_ops) {
310 : 0 : struct blk_mq_ctx *ctx = first_rq->mq_ctx;
311 : 0 : struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
312 : :
313 : 0 : blk_mq_rq_init(hctx, q->flush_rq);
314 : 0 : q->flush_rq->mq_ctx = ctx;
315 : :
316 : : /*
317 : : * Reuse the tag value from the fist waiting request,
318 : : * with blk-mq the tag is generated during request
319 : : * allocation and drivers can rely on it being inside
320 : : * the range they asked for.
321 : : */
322 : 0 : q->flush_rq->tag = first_rq->tag;
323 : : } else {
324 : 0 : blk_rq_init(q, q->flush_rq);
325 : : }
326 : :
327 : 0 : q->flush_rq->cmd_type = REQ_TYPE_FS;
328 : 0 : q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
329 : 0 : q->flush_rq->rq_disk = first_rq->rq_disk;
330 : 0 : q->flush_rq->end_io = flush_end_io;
331 : :
332 : 0 : return blk_flush_queue_rq(q->flush_rq, false);
333 : : }
334 : :
335 : 0 : static void flush_data_end_io(struct request *rq, int error)
336 : : {
337 : 0 : struct request_queue *q = rq->q;
338 : :
339 : : /*
340 : : * After populating an empty queue, kick it to avoid stall. Read
341 : : * the comment in flush_end_io().
342 : : */
343 [ # # ]: 0 : if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
344 : 0 : blk_run_queue_async(q);
345 : 0 : }
346 : :
347 : 0 : static void mq_flush_data_end_io(struct request *rq, int error)
348 : : {
349 : 0 : struct request_queue *q = rq->q;
350 : : struct blk_mq_hw_ctx *hctx;
351 : : struct blk_mq_ctx *ctx;
352 : : unsigned long flags;
353 : :
354 : 0 : ctx = rq->mq_ctx;
355 : 0 : hctx = q->mq_ops->map_queue(q, ctx->cpu);
356 : :
357 : : /*
358 : : * After populating an empty queue, kick it to avoid stall. Read
359 : : * the comment in flush_end_io().
360 : : */
361 : 0 : spin_lock_irqsave(&q->mq_flush_lock, flags);
362 [ # # ]: 0 : if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
363 : 0 : blk_mq_run_hw_queue(hctx, true);
364 : : spin_unlock_irqrestore(&q->mq_flush_lock, flags);
365 : 0 : }
366 : :
367 : : /**
368 : : * blk_insert_flush - insert a new FLUSH/FUA request
369 : : * @rq: request to insert
370 : : *
371 : : * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
372 : : * or __blk_mq_run_hw_queue() to dispatch request.
373 : : * @rq is being submitted. Analyze what needs to be done and put it on the
374 : : * right queue.
375 : : *
376 : : * CONTEXT:
377 : : * spin_lock_irq(q->queue_lock) in !mq case
378 : : */
379 : 0 : void blk_insert_flush(struct request *rq)
380 : : {
381 : 0 : struct request_queue *q = rq->q;
382 : 0 : unsigned int fflags = q->flush_flags; /* may change, cache */
383 : 0 : unsigned int policy = blk_flush_policy(fflags, rq);
384 : :
385 : : /*
386 : : * @policy now records what operations need to be done. Adjust
387 : : * REQ_FLUSH and FUA for the driver.
388 : : */
389 : 0 : rq->cmd_flags &= ~REQ_FLUSH;
390 [ # # ]: 0 : if (!(fflags & REQ_FUA))
391 : 0 : rq->cmd_flags &= ~REQ_FUA;
392 : :
393 : : /*
394 : : * An empty flush handed down from a stacking driver may
395 : : * translate into nothing if the underlying device does not
396 : : * advertise a write-back cache. In this case, simply
397 : : * complete the request.
398 : : */
399 [ # # ]: 0 : if (!policy) {
400 [ # # ]: 0 : if (q->mq_ops)
401 : : blk_mq_end_io(rq, 0);
402 : : else
403 : 0 : __blk_end_bidi_request(rq, 0, 0, 0);
404 : : return;
405 : : }
406 : :
407 [ # # ]: 0 : BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
408 : :
409 : : /*
410 : : * If there's data but flush is not necessary, the request can be
411 : : * processed directly without going through flush machinery. Queue
412 : : * for normal execution.
413 : : */
414 [ # # ]: 0 : if ((policy & REQ_FSEQ_DATA) &&
415 : : !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
416 [ # # ]: 0 : if (q->mq_ops) {
417 : 0 : blk_mq_insert_request(rq, false, false, true);
418 : : } else
419 : 0 : list_add_tail(&rq->queuelist, &q->queue_head);
420 : : return;
421 : : }
422 : :
423 : : /*
424 : : * @rq should go through flush machinery. Mark it part of flush
425 : : * sequence and submit for further processing.
426 : : */
427 : 0 : memset(&rq->flush, 0, sizeof(rq->flush));
428 : 0 : INIT_LIST_HEAD(&rq->flush.list);
429 : 0 : rq->cmd_flags |= REQ_FLUSH_SEQ;
430 : 0 : rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
431 [ # # ]: 0 : if (q->mq_ops) {
432 : 0 : rq->end_io = mq_flush_data_end_io;
433 : :
434 : : spin_lock_irq(&q->mq_flush_lock);
435 : 0 : blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
436 : : spin_unlock_irq(&q->mq_flush_lock);
437 : : return;
438 : : }
439 : 0 : rq->end_io = flush_data_end_io;
440 : :
441 : 0 : blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
442 : : }
443 : :
444 : : /**
445 : : * blk_abort_flushes - @q is being aborted, abort flush requests
446 : : * @q: request_queue being aborted
447 : : *
448 : : * To be called from elv_abort_queue(). @q is being aborted. Prepare all
449 : : * FLUSH/FUA requests for abortion.
450 : : *
451 : : * CONTEXT:
452 : : * spin_lock_irq(q->queue_lock)
453 : : */
454 : 0 : void blk_abort_flushes(struct request_queue *q)
455 : : {
456 : : struct request *rq, *n;
457 : : int i;
458 : :
459 : : /*
460 : : * Requests in flight for data are already owned by the dispatch
461 : : * queue or the device driver. Just restore for normal completion.
462 : : */
463 [ # # ]: 0 : list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
464 : : list_del_init(&rq->flush.list);
465 : 0 : blk_flush_restore_request(rq);
466 : : }
467 : :
468 : : /*
469 : : * We need to give away requests on flush queues. Restore for
470 : : * normal completion and put them on the dispatch queue.
471 : : */
472 [ # # ]: 0 : for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
473 [ # # ]: 0 : list_for_each_entry_safe(rq, n, &q->flush_queue[i],
474 : : flush.list) {
475 : : list_del_init(&rq->flush.list);
476 : 0 : blk_flush_restore_request(rq);
477 : 0 : list_add_tail(&rq->queuelist, &q->queue_head);
478 : : }
479 : : }
480 : 0 : }
481 : :
482 : : /**
483 : : * blkdev_issue_flush - queue a flush
484 : : * @bdev: blockdev to issue flush for
485 : : * @gfp_mask: memory allocation flags (for bio_alloc)
486 : : * @error_sector: error sector
487 : : *
488 : : * Description:
489 : : * Issue a flush for the block device in question. Caller can supply
490 : : * room for storing the error offset in case of a flush error, if they
491 : : * wish to. If WAIT flag is not passed then caller may check only what
492 : : * request was pushed in some internal queue for later handling.
493 : : */
494 : 0 : int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
495 : : sector_t *error_sector)
496 : : {
497 : : struct request_queue *q;
498 : : struct bio *bio;
499 : : int ret = 0;
500 : :
501 [ + + ]: 2056 : if (bdev->bd_disk == NULL)
502 : : return -ENXIO;
503 : :
504 : : q = bdev_get_queue(bdev);
505 [ + ]: 2048 : if (!q)
506 : : return -ENXIO;
507 : :
508 : : /*
509 : : * some block devices may not have their queue correctly set up here
510 : : * (e.g. loop device without a backing file) and so issuing a flush
511 : : * here will panic. Ensure there is a request function before issuing
512 : : * the flush.
513 : : */
514 [ + + ]: 2050 : if (!q->make_request_fn)
515 : : return -ENXIO;
516 : :
517 : : bio = bio_alloc(gfp_mask, 0);
518 : 2058 : bio->bi_bdev = bdev;
519 : :
520 : 2058 : ret = submit_bio_wait(WRITE_FLUSH, bio);
521 : :
522 : : /*
523 : : * The driver must store the error location in ->bi_sector, if
524 : : * it supports it. For non-stacked drivers, this should be
525 : : * copied from blk_rq_pos(rq).
526 : : */
527 [ - + ]: 2066 : if (error_sector)
528 : 0 : *error_sector = bio->bi_iter.bi_sector;
529 : :
530 : 2066 : bio_put(bio);
531 : 2066 : return ret;
532 : : }
533 : : EXPORT_SYMBOL(blkdev_issue_flush);
534 : :
535 : 0 : void blk_mq_init_flush(struct request_queue *q)
536 : : {
537 : 0 : spin_lock_init(&q->mq_flush_lock);
538 : 0 : }
|