LCOV - coverage.info - fs/btrfs/scrub.c

LCOV - code coverage report

Current view:	top level - fs/btrfs - scrub.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	1362	0.0 %
Date:	2014-04-16	Functions:	0	58	0.0 %
		Branches:	0	860	0.0 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  * Copyright (C) 2011, 2012 STRATO.  All rights reserved.
       3                 :            :  *
       4                 :            :  * This program is free software; you can redistribute it and/or
       5                 :            :  * modify it under the terms of the GNU General Public
       6                 :            :  * License v2 as published by the Free Software Foundation.
       7                 :            :  *
       8                 :            :  * This program is distributed in the hope that it will be useful,
       9                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11                 :            :  * General Public License for more details.
      12                 :            :  *
      13                 :            :  * You should have received a copy of the GNU General Public
      14                 :            :  * License along with this program; if not, write to the
      15                 :            :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16                 :            :  * Boston, MA 021110-1307, USA.
      17                 :            :  */
      18                 :            : 
      19                 :            : #include <linux/blkdev.h>
      20                 :            : #include <linux/ratelimit.h>
      21                 :            : #include "ctree.h"
      22                 :            : #include "volumes.h"
      23                 :            : #include "disk-io.h"
      24                 :            : #include "ordered-data.h"
      25                 :            : #include "transaction.h"
      26                 :            : #include "backref.h"
      27                 :            : #include "extent_io.h"
      28                 :            : #include "dev-replace.h"
      29                 :            : #include "check-integrity.h"
      30                 :            : #include "rcu-string.h"
      31                 :            : #include "raid56.h"
      32                 :            : 
      33                 :            : /*
      34                 :            :  * This is only the first step towards a full-features scrub. It reads all
      35                 :            :  * extent and super block and verifies the checksums. In case a bad checksum
      36                 :            :  * is found or the extent cannot be read, good data will be written back if
      37                 :            :  * any can be found.
      38                 :            :  *
      39                 :            :  * Future enhancements:
      40                 :            :  *  - In case an unrepairable extent is encountered, track which files are
      41                 :            :  *    affected and report them
      42                 :            :  *  - track and record media errors, throw out bad devices
      43                 :            :  *  - add a mode to also read unallocated space
      44                 :            :  */
      45                 :            : 
      46                 :            : struct scrub_block;
      47                 :            : struct scrub_ctx;
      48                 :            : 
      49                 :            : /*
      50                 :            :  * the following three values only influence the performance.
      51                 :            :  * The last one configures the number of parallel and outstanding I/O
      52                 :            :  * operations. The first two values configure an upper limit for the number
      53                 :            :  * of (dynamically allocated) pages that are added to a bio.
      54                 :            :  */
      55                 :            : #define SCRUB_PAGES_PER_RD_BIO  32      /* 128k per bio */
      56                 :            : #define SCRUB_PAGES_PER_WR_BIO  32      /* 128k per bio */
      57                 :            : #define SCRUB_BIOS_PER_SCTX     64      /* 8MB per device in flight */
      58                 :            : 
      59                 :            : /*
      60                 :            :  * the following value times PAGE_SIZE needs to be large enough to match the
      61                 :            :  * largest node/leaf/sector size that shall be supported.
      62                 :            :  * Values larger than BTRFS_STRIPE_LEN are not supported.
      63                 :            :  */
      64                 :            : #define SCRUB_MAX_PAGES_PER_BLOCK       16      /* 64k per node/leaf/sector */
      65                 :            : 
      66                 :            : struct scrub_page {
      67                 :            :         struct scrub_block      *sblock;
      68                 :            :         struct page             *page;
      69                 :            :         struct btrfs_device     *dev;
      70                 :            :         u64                     flags;  /* extent flags */
      71                 :            :         u64                     generation;
      72                 :            :         u64                     logical;
      73                 :            :         u64                     physical;
      74                 :            :         u64                     physical_for_dev_replace;
      75                 :            :         atomic_t                ref_count;
      76                 :            :         struct {
      77                 :            :                 unsigned int    mirror_num:8;
      78                 :            :                 unsigned int    have_csum:1;
      79                 :            :                 unsigned int    io_error:1;
      80                 :            :         };
      81                 :            :         u8                      csum[BTRFS_CSUM_SIZE];
      82                 :            : };
      83                 :            : 
      84                 :            : struct scrub_bio {
      85                 :            :         int                     index;
      86                 :            :         struct scrub_ctx        *sctx;
      87                 :            :         struct btrfs_device     *dev;
      88                 :            :         struct bio              *bio;
      89                 :            :         int                     err;
      90                 :            :         u64                     logical;
      91                 :            :         u64                     physical;
      92                 :            : #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
      93                 :            :         struct scrub_page       *pagev[SCRUB_PAGES_PER_WR_BIO];
      94                 :            : #else
      95                 :            :         struct scrub_page       *pagev[SCRUB_PAGES_PER_RD_BIO];
      96                 :            : #endif
      97                 :            :         int                     page_count;
      98                 :            :         int                     next_free;
      99                 :            :         struct btrfs_work       work;
     100                 :            : };
     101                 :            : 
     102                 :            : struct scrub_block {
     103                 :            :         struct scrub_page       *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
     104                 :            :         int                     page_count;
     105                 :            :         atomic_t                outstanding_pages;
     106                 :            :         atomic_t                ref_count; /* free mem on transition to zero */
     107                 :            :         struct scrub_ctx        *sctx;
     108                 :            :         struct {
     109                 :            :                 unsigned int    header_error:1;
     110                 :            :                 unsigned int    checksum_error:1;
     111                 :            :                 unsigned int    no_io_error_seen:1;
     112                 :            :                 unsigned int    generation_error:1; /* also sets header_error */
     113                 :            :         };
     114                 :            : };
     115                 :            : 
     116                 :            : struct scrub_wr_ctx {
     117                 :            :         struct scrub_bio *wr_curr_bio;
     118                 :            :         struct btrfs_device *tgtdev;
     119                 :            :         int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
     120                 :            :         atomic_t flush_all_writes;
     121                 :            :         struct mutex wr_lock;
     122                 :            : };
     123                 :            : 
     124                 :            : struct scrub_ctx {
     125                 :            :         struct scrub_bio        *bios[SCRUB_BIOS_PER_SCTX];
     126                 :            :         struct btrfs_root       *dev_root;
     127                 :            :         int                     first_free;
     128                 :            :         int                     curr;
     129                 :            :         atomic_t                bios_in_flight;
     130                 :            :         atomic_t                workers_pending;
     131                 :            :         spinlock_t              list_lock;
     132                 :            :         wait_queue_head_t       list_wait;
     133                 :            :         u16                     csum_size;
     134                 :            :         struct list_head        csum_list;
     135                 :            :         atomic_t                cancel_req;
     136                 :            :         int                     readonly;
     137                 :            :         int                     pages_per_rd_bio;
     138                 :            :         u32                     sectorsize;
     139                 :            :         u32                     nodesize;
     140                 :            :         u32                     leafsize;
     141                 :            : 
     142                 :            :         int                     is_dev_replace;
     143                 :            :         struct scrub_wr_ctx     wr_ctx;
     144                 :            : 
     145                 :            :         /*
     146                 :            :          * statistics
     147                 :            :          */
     148                 :            :         struct btrfs_scrub_progress stat;
     149                 :            :         spinlock_t              stat_lock;
     150                 :            : };
     151                 :            : 
     152                 :            : struct scrub_fixup_nodatasum {
     153                 :            :         struct scrub_ctx        *sctx;
     154                 :            :         struct btrfs_device     *dev;
     155                 :            :         u64                     logical;
     156                 :            :         struct btrfs_root       *root;
     157                 :            :         struct btrfs_work       work;
     158                 :            :         int                     mirror_num;
     159                 :            : };
     160                 :            : 
     161                 :            : struct scrub_nocow_inode {
     162                 :            :         u64                     inum;
     163                 :            :         u64                     offset;
     164                 :            :         u64                     root;
     165                 :            :         struct list_head        list;
     166                 :            : };
     167                 :            : 
     168                 :            : struct scrub_copy_nocow_ctx {
     169                 :            :         struct scrub_ctx        *sctx;
     170                 :            :         u64                     logical;
     171                 :            :         u64                     len;
     172                 :            :         int                     mirror_num;
     173                 :            :         u64                     physical_for_dev_replace;
     174                 :            :         struct list_head        inodes;
     175                 :            :         struct btrfs_work       work;
     176                 :            : };
     177                 :            : 
     178                 :            : struct scrub_warning {
     179                 :            :         struct btrfs_path       *path;
     180                 :            :         u64                     extent_item_size;
     181                 :            :         char                    *scratch_buf;
     182                 :            :         char                    *msg_buf;
     183                 :            :         const char              *errstr;
     184                 :            :         sector_t                sector;
     185                 :            :         u64                     logical;
     186                 :            :         struct btrfs_device     *dev;
     187                 :            :         int                     msg_bufsize;
     188                 :            :         int                     scratch_bufsize;
     189                 :            : };
     190                 :            : 
     191                 :            : 
     192                 :            : static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
     193                 :            : static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
     194                 :            : static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
     195                 :            : static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
     196                 :            : static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
     197                 :            : static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
     198                 :            :                                      struct btrfs_fs_info *fs_info,
     199                 :            :                                      struct scrub_block *original_sblock,
     200                 :            :                                      u64 length, u64 logical,
     201                 :            :                                      struct scrub_block *sblocks_for_recheck);
     202                 :            : static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
     203                 :            :                                 struct scrub_block *sblock, int is_metadata,
     204                 :            :                                 int have_csum, u8 *csum, u64 generation,
     205                 :            :                                 u16 csum_size);
     206                 :            : static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
     207                 :            :                                          struct scrub_block *sblock,
     208                 :            :                                          int is_metadata, int have_csum,
     209                 :            :                                          const u8 *csum, u64 generation,
     210                 :            :                                          u16 csum_size);
     211                 :            : static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
     212                 :            :                                              struct scrub_block *sblock_good,
     213                 :            :                                              int force_write);
     214                 :            : static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
     215                 :            :                                             struct scrub_block *sblock_good,
     216                 :            :                                             int page_num, int force_write);
     217                 :            : static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
     218                 :            : static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
     219                 :            :                                            int page_num);
     220                 :            : static int scrub_checksum_data(struct scrub_block *sblock);
     221                 :            : static int scrub_checksum_tree_block(struct scrub_block *sblock);
     222                 :            : static int scrub_checksum_super(struct scrub_block *sblock);
     223                 :            : static void scrub_block_get(struct scrub_block *sblock);
     224                 :            : static void scrub_block_put(struct scrub_block *sblock);
     225                 :            : static void scrub_page_get(struct scrub_page *spage);
     226                 :            : static void scrub_page_put(struct scrub_page *spage);
     227                 :            : static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
     228                 :            :                                     struct scrub_page *spage);
     229                 :            : static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
     230                 :            :                        u64 physical, struct btrfs_device *dev, u64 flags,
     231                 :            :                        u64 gen, int mirror_num, u8 *csum, int force,
     232                 :            :                        u64 physical_for_dev_replace);
     233                 :            : static void scrub_bio_end_io(struct bio *bio, int err);
     234                 :            : static void scrub_bio_end_io_worker(struct btrfs_work *work);
     235                 :            : static void scrub_block_complete(struct scrub_block *sblock);
     236                 :            : static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
     237                 :            :                                u64 extent_logical, u64 extent_len,
     238                 :            :                                u64 *extent_physical,
     239                 :            :                                struct btrfs_device **extent_dev,
     240                 :            :                                int *extent_mirror_num);
     241                 :            : static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
     242                 :            :                               struct scrub_wr_ctx *wr_ctx,
     243                 :            :                               struct btrfs_fs_info *fs_info,
     244                 :            :                               struct btrfs_device *dev,
     245                 :            :                               int is_dev_replace);
     246                 :            : static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
     247                 :            : static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
     248                 :            :                                     struct scrub_page *spage);
     249                 :            : static void scrub_wr_submit(struct scrub_ctx *sctx);
     250                 :            : static void scrub_wr_bio_end_io(struct bio *bio, int err);
     251                 :            : static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
     252                 :            : static int write_page_nocow(struct scrub_ctx *sctx,
     253                 :            :                             u64 physical_for_dev_replace, struct page *page);
     254                 :            : static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
     255                 :            :                                       struct scrub_copy_nocow_ctx *ctx);
     256                 :            : static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
     257                 :            :                             int mirror_num, u64 physical_for_dev_replace);
     258                 :            : static void copy_nocow_pages_worker(struct btrfs_work *work);
     259                 :            : static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
     260                 :            : static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
     261                 :            : 
     262                 :            : 
     263                 :            : static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
     264                 :            : {
     265                 :          0 :         atomic_inc(&sctx->bios_in_flight);
     266                 :            : }
     267                 :            : 
     268                 :          0 : static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
     269                 :            : {
     270                 :          0 :         atomic_dec(&sctx->bios_in_flight);
     271                 :          0 :         wake_up(&sctx->list_wait);
     272                 :          0 : }
     273                 :            : 
     274                 :          0 : static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
     275                 :            : {
     276         [ #  # ]:          0 :         while (atomic_read(&fs_info->scrub_pause_req)) {
     277                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
     278 [ #  # ][ #  # ]:          0 :                 wait_event(fs_info->scrub_pause_wait,
     279                 :            :                    atomic_read(&fs_info->scrub_pause_req) == 0);
     280                 :          0 :                 mutex_lock(&fs_info->scrub_lock);
     281                 :            :         }
     282                 :          0 : }
     283                 :            : 
     284                 :          0 : static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
     285                 :            : {
     286                 :          0 :         atomic_inc(&fs_info->scrubs_paused);
     287                 :          0 :         wake_up(&fs_info->scrub_pause_wait);
     288                 :            : 
     289                 :          0 :         mutex_lock(&fs_info->scrub_lock);
     290                 :          0 :         __scrub_blocked_if_needed(fs_info);
     291                 :            :         atomic_dec(&fs_info->scrubs_paused);
     292                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
     293                 :            : 
     294                 :          0 :         wake_up(&fs_info->scrub_pause_wait);
     295                 :          0 : }
     296                 :            : 
     297                 :            : /*
     298                 :            :  * used for workers that require transaction commits (i.e., for the
     299                 :            :  * NOCOW case)
     300                 :            :  */
     301                 :          0 : static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
     302                 :            : {
     303                 :          0 :         struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
     304                 :            : 
     305                 :            :         /*
     306                 :            :          * increment scrubs_running to prevent cancel requests from
     307                 :            :          * completing as long as a worker is running. we must also
     308                 :            :          * increment scrubs_paused to prevent deadlocking on pause
     309                 :            :          * requests used for transactions commits (as the worker uses a
     310                 :            :          * transaction context). it is safe to regard the worker
     311                 :            :          * as paused for all matters practical. effectively, we only
     312                 :            :          * avoid cancellation requests from completing.
     313                 :            :          */
     314                 :          0 :         mutex_lock(&fs_info->scrub_lock);
     315                 :          0 :         atomic_inc(&fs_info->scrubs_running);
     316                 :          0 :         atomic_inc(&fs_info->scrubs_paused);
     317                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
     318                 :          0 :         atomic_inc(&sctx->workers_pending);
     319                 :          0 : }
     320                 :            : 
     321                 :            : /* used for workers that require transaction commits */
     322                 :          0 : static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
     323                 :            : {
     324                 :          0 :         struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
     325                 :            : 
     326                 :            :         /*
     327                 :            :          * see scrub_pending_trans_workers_inc() why we're pretending
     328                 :            :          * to be paused in the scrub counters
     329                 :            :          */
     330                 :          0 :         mutex_lock(&fs_info->scrub_lock);
     331                 :          0 :         atomic_dec(&fs_info->scrubs_running);
     332                 :          0 :         atomic_dec(&fs_info->scrubs_paused);
     333                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
     334                 :          0 :         atomic_dec(&sctx->workers_pending);
     335                 :          0 :         wake_up(&fs_info->scrub_pause_wait);
     336                 :          0 :         wake_up(&sctx->list_wait);
     337                 :          0 : }
     338                 :            : 
     339                 :          0 : static void scrub_free_csums(struct scrub_ctx *sctx)
     340                 :            : {
     341         [ #  # ]:          0 :         while (!list_empty(&sctx->csum_list)) {
     342                 :            :                 struct btrfs_ordered_sum *sum;
     343                 :          0 :                 sum = list_first_entry(&sctx->csum_list,
     344                 :            :                                        struct btrfs_ordered_sum, list);
     345                 :            :                 list_del(&sum->list);
     346                 :          0 :                 kfree(sum);
     347                 :            :         }
     348                 :          0 : }
     349                 :            : 
     350                 :          0 : static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
     351                 :            : {
     352                 :            :         int i;
     353                 :            : 
     354         [ #  # ]:          0 :         if (!sctx)
     355                 :          0 :                 return;
     356                 :            : 
     357                 :          0 :         scrub_free_wr_ctx(&sctx->wr_ctx);
     358                 :            : 
     359                 :            :         /* this can happen when scrub is cancelled */
     360         [ #  # ]:          0 :         if (sctx->curr != -1) {
     361                 :          0 :                 struct scrub_bio *sbio = sctx->bios[sctx->curr];
     362                 :            : 
     363         [ #  # ]:          0 :                 for (i = 0; i < sbio->page_count; i++) {
     364         [ #  # ]:          0 :                         WARN_ON(!sbio->pagev[i]->page);
     365                 :          0 :                         scrub_block_put(sbio->pagev[i]->sblock);
     366                 :            :                 }
     367                 :          0 :                 bio_put(sbio->bio);
     368                 :            :         }
     369                 :            : 
     370         [ #  # ]:          0 :         for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
     371                 :          0 :                 struct scrub_bio *sbio = sctx->bios[i];
     372                 :            : 
     373         [ #  # ]:          0 :                 if (!sbio)
     374                 :            :                         break;
     375                 :          0 :                 kfree(sbio);
     376                 :            :         }
     377                 :            : 
     378                 :          0 :         scrub_free_csums(sctx);
     379                 :          0 :         kfree(sctx);
     380                 :            : }
     381                 :            : 
     382                 :            : static noinline_for_stack
     383                 :          0 : struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
     384                 :            : {
     385                 :            :         struct scrub_ctx *sctx;
     386                 :            :         int             i;
     387                 :          0 :         struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
     388                 :            :         int pages_per_rd_bio;
     389                 :            :         int ret;
     390                 :            : 
     391                 :            :         /*
     392                 :            :          * the setting of pages_per_rd_bio is correct for scrub but might
     393                 :            :          * be wrong for the dev_replace code where we might read from
     394                 :            :          * different devices in the initial huge bios. However, that
     395                 :            :          * code is able to correctly handle the case when adding a page
     396                 :            :          * to a bio fails.
     397                 :            :          */
     398         [ #  # ]:          0 :         if (dev->bdev)
     399                 :          0 :                 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
     400                 :            :                                          bio_get_nr_vecs(dev->bdev));
     401                 :            :         else
     402                 :            :                 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
     403                 :            :         sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
     404         [ #  # ]:          0 :         if (!sctx)
     405                 :            :                 goto nomem;
     406                 :          0 :         sctx->is_dev_replace = is_dev_replace;
     407                 :          0 :         sctx->pages_per_rd_bio = pages_per_rd_bio;
     408                 :          0 :         sctx->curr = -1;
     409                 :          0 :         sctx->dev_root = dev->dev_root;
     410         [ #  # ]:          0 :         for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
     411                 :            :                 struct scrub_bio *sbio;
     412                 :            : 
     413                 :            :                 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
     414         [ #  # ]:          0 :                 if (!sbio)
     415                 :            :                         goto nomem;
     416                 :          0 :                 sctx->bios[i] = sbio;
     417                 :            : 
     418                 :          0 :                 sbio->index = i;
     419                 :          0 :                 sbio->sctx = sctx;
     420                 :          0 :                 sbio->page_count = 0;
     421                 :          0 :                 sbio->work.func = scrub_bio_end_io_worker;
     422                 :            : 
     423         [ #  # ]:          0 :                 if (i != SCRUB_BIOS_PER_SCTX - 1)
     424                 :          0 :                         sctx->bios[i]->next_free = i + 1;
     425                 :            :                 else
     426                 :          0 :                         sctx->bios[i]->next_free = -1;
     427                 :            :         }
     428                 :          0 :         sctx->first_free = 0;
     429                 :          0 :         sctx->nodesize = dev->dev_root->nodesize;
     430                 :          0 :         sctx->leafsize = dev->dev_root->leafsize;
     431                 :          0 :         sctx->sectorsize = dev->dev_root->sectorsize;
     432                 :          0 :         atomic_set(&sctx->bios_in_flight, 0);
     433                 :          0 :         atomic_set(&sctx->workers_pending, 0);
     434                 :          0 :         atomic_set(&sctx->cancel_req, 0);
     435                 :          0 :         sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
     436                 :          0 :         INIT_LIST_HEAD(&sctx->csum_list);
     437                 :            : 
     438                 :          0 :         spin_lock_init(&sctx->list_lock);
     439                 :          0 :         spin_lock_init(&sctx->stat_lock);
     440                 :          0 :         init_waitqueue_head(&sctx->list_wait);
     441                 :            : 
     442                 :          0 :         ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
     443                 :            :                                  fs_info->dev_replace.tgtdev, is_dev_replace);
     444         [ #  # ]:          0 :         if (ret) {
     445                 :          0 :                 scrub_free_ctx(sctx);
     446                 :            :                 return ERR_PTR(ret);
     447                 :            :         }
     448                 :            :         return sctx;
     449                 :            : 
     450                 :            : nomem:
     451                 :          0 :         scrub_free_ctx(sctx);
     452                 :            :         return ERR_PTR(-ENOMEM);
     453                 :            : }
     454                 :            : 
     455                 :          0 : static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
     456                 :            :                                      void *warn_ctx)
     457                 :            : {
     458                 :            :         u64 isize;
     459                 :            :         u32 nlink;
     460                 :            :         int ret;
     461                 :            :         int i;
     462                 :            :         struct extent_buffer *eb;
     463                 :            :         struct btrfs_inode_item *inode_item;
     464                 :            :         struct scrub_warning *swarn = warn_ctx;
     465                 :          0 :         struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
     466                 :            :         struct inode_fs_paths *ipath = NULL;
     467                 :            :         struct btrfs_root *local_root;
     468                 :            :         struct btrfs_key root_key;
     469                 :            : 
     470                 :          0 :         root_key.objectid = root;
     471                 :          0 :         root_key.type = BTRFS_ROOT_ITEM_KEY;
     472                 :          0 :         root_key.offset = (u64)-1;
     473                 :            :         local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
     474         [ #  # ]:          0 :         if (IS_ERR(local_root)) {
     475                 :            :                 ret = PTR_ERR(local_root);
     476                 :          0 :                 goto err;
     477                 :            :         }
     478                 :            : 
     479                 :          0 :         ret = inode_item_info(inum, 0, local_root, swarn->path);
     480         [ #  # ]:          0 :         if (ret) {
     481                 :          0 :                 btrfs_release_path(swarn->path);
     482                 :          0 :                 goto err;
     483                 :            :         }
     484                 :            : 
     485                 :          0 :         eb = swarn->path->nodes[0];
     486                 :          0 :         inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
     487                 :            :                                         struct btrfs_inode_item);
     488                 :            :         isize = btrfs_inode_size(eb, inode_item);
     489                 :            :         nlink = btrfs_inode_nlink(eb, inode_item);
     490                 :          0 :         btrfs_release_path(swarn->path);
     491                 :            : 
     492                 :          0 :         ipath = init_ipath(4096, local_root, swarn->path);
     493         [ #  # ]:          0 :         if (IS_ERR(ipath)) {
     494                 :            :                 ret = PTR_ERR(ipath);
     495                 :            :                 ipath = NULL;
     496                 :          0 :                 goto err;
     497                 :            :         }
     498                 :          0 :         ret = paths_from_inode(inum, ipath);
     499                 :            : 
     500         [ #  # ]:          0 :         if (ret < 0)
     501                 :            :                 goto err;
     502                 :            : 
     503                 :            :         /*
     504                 :            :          * we deliberately ignore the bit ipath might have been too small to
     505                 :            :          * hold all of the paths here
     506                 :            :          */
     507         [ #  # ]:          0 :         for (i = 0; i < ipath->fspath->elem_cnt; ++i)
     508                 :          0 :                 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
     509                 :            :                         "%s, sector %llu, root %llu, inode %llu, offset %llu, "
     510                 :            :                         "length %llu, links %u (path: %s)\n", swarn->errstr,
     511                 :            :                         swarn->logical, rcu_str_deref(swarn->dev->name),
     512                 :            :                         (unsigned long long)swarn->sector, root, inum, offset,
     513                 :            :                         min(isize - offset, (u64)PAGE_SIZE), nlink,
     514                 :            :                         (char *)(unsigned long)ipath->fspath->val[i]);
     515                 :            : 
     516                 :          0 :         free_ipath(ipath);
     517                 :          0 :         return 0;
     518                 :            : 
     519                 :            : err:
     520                 :          0 :         printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
     521                 :            :                 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
     522                 :            :                 "resolving failed with ret=%d\n", swarn->errstr,
     523                 :            :                 swarn->logical, rcu_str_deref(swarn->dev->name),
     524                 :            :                 (unsigned long long)swarn->sector, root, inum, offset, ret);
     525                 :            : 
     526                 :          0 :         free_ipath(ipath);
     527                 :          0 :         return 0;
     528                 :            : }
     529                 :            : 
     530                 :          0 : static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
     531                 :            : {
     532                 :            :         struct btrfs_device *dev;
     533                 :            :         struct btrfs_fs_info *fs_info;
     534                 :            :         struct btrfs_path *path;
     535                 :            :         struct btrfs_key found_key;
     536                 :            :         struct extent_buffer *eb;
     537                 :            :         struct btrfs_extent_item *ei;
     538                 :            :         struct scrub_warning swarn;
     539                 :          0 :         unsigned long ptr = 0;
     540                 :            :         u64 extent_item_pos;
     541                 :          0 :         u64 flags = 0;
     542                 :            :         u64 ref_root;
     543                 :            :         u32 item_size;
     544                 :            :         u8 ref_level;
     545                 :            :         const int bufsize = 4096;
     546                 :            :         int ret;
     547                 :            : 
     548         [ #  # ]:          0 :         WARN_ON(sblock->page_count < 1);
     549                 :          0 :         dev = sblock->pagev[0]->dev;
     550                 :          0 :         fs_info = sblock->sctx->dev_root->fs_info;
     551                 :            : 
     552                 :          0 :         path = btrfs_alloc_path();
     553                 :            : 
     554                 :          0 :         swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
     555                 :          0 :         swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
     556                 :          0 :         swarn.sector = (sblock->pagev[0]->physical) >> 9;
     557                 :          0 :         swarn.logical = sblock->pagev[0]->logical;
     558                 :          0 :         swarn.errstr = errstr;
     559                 :          0 :         swarn.dev = NULL;
     560                 :          0 :         swarn.msg_bufsize = bufsize;
     561                 :          0 :         swarn.scratch_bufsize = bufsize;
     562                 :            : 
     563 [ #  # ][ #  # ]:          0 :         if (!path || !swarn.scratch_buf || !swarn.msg_buf)
                 [ #  # ]
     564                 :            :                 goto out;
     565                 :            : 
     566                 :          0 :         ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
     567                 :            :                                   &flags);
     568         [ #  # ]:          0 :         if (ret < 0)
     569                 :            :                 goto out;
     570                 :            : 
     571                 :          0 :         extent_item_pos = swarn.logical - found_key.objectid;
     572                 :          0 :         swarn.extent_item_size = found_key.offset;
     573                 :            : 
     574                 :          0 :         eb = path->nodes[0];
     575                 :          0 :         ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
     576                 :          0 :         item_size = btrfs_item_size_nr(eb, path->slots[0]);
     577                 :            : 
     578         [ #  # ]:          0 :         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
     579                 :            :                 do {
     580                 :          0 :                         ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
     581                 :            :                                                         &ref_root, &ref_level);
     582 [ #  # ][ #  # ]:          0 :                         printk_in_rcu(KERN_WARNING
                 [ #  # ]
     583                 :            :                                 "BTRFS: %s at logical %llu on dev %s, "
     584                 :            :                                 "sector %llu: metadata %s (level %d) in tree "
     585                 :            :                                 "%llu\n", errstr, swarn.logical,
     586                 :            :                                 rcu_str_deref(dev->name),
     587                 :            :                                 (unsigned long long)swarn.sector,
     588                 :            :                                 ref_level ? "node" : "leaf",
     589                 :            :                                 ret < 0 ? -1 : ref_level,
     590                 :            :                                 ret < 0 ? -1 : ref_root);
     591         [ #  # ]:          0 :                 } while (ret != 1);
     592                 :          0 :                 btrfs_release_path(path);
     593                 :            :         } else {
     594                 :          0 :                 btrfs_release_path(path);
     595                 :          0 :                 swarn.path = path;
     596                 :          0 :                 swarn.dev = dev;
     597                 :          0 :                 iterate_extent_inodes(fs_info, found_key.objectid,
     598                 :            :                                         extent_item_pos, 1,
     599                 :            :                                         scrub_print_warning_inode, &swarn);
     600                 :            :         }
     601                 :            : 
     602                 :            : out:
     603                 :          0 :         btrfs_free_path(path);
     604                 :          0 :         kfree(swarn.scratch_buf);
     605                 :          0 :         kfree(swarn.msg_buf);
     606                 :          0 : }
     607                 :            : 
     608                 :          0 : static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
     609                 :            : {
     610                 :            :         struct page *page = NULL;
     611                 :            :         unsigned long index;
     612                 :            :         struct scrub_fixup_nodatasum *fixup = fixup_ctx;
     613                 :            :         int ret;
     614                 :            :         int corrected = 0;
     615                 :            :         struct btrfs_key key;
     616                 :            :         struct inode *inode = NULL;
     617                 :            :         struct btrfs_fs_info *fs_info;
     618                 :          0 :         u64 end = offset + PAGE_SIZE - 1;
     619                 :            :         struct btrfs_root *local_root;
     620                 :            :         int srcu_index;
     621                 :            : 
     622                 :          0 :         key.objectid = root;
     623                 :          0 :         key.type = BTRFS_ROOT_ITEM_KEY;
     624                 :          0 :         key.offset = (u64)-1;
     625                 :            : 
     626                 :          0 :         fs_info = fixup->root->fs_info;
     627                 :          0 :         srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
     628                 :            : 
     629                 :            :         local_root = btrfs_read_fs_root_no_name(fs_info, &key);
     630         [ #  # ]:          0 :         if (IS_ERR(local_root)) {
     631                 :            :                 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
     632                 :          0 :                 return PTR_ERR(local_root);
     633                 :            :         }
     634                 :            : 
     635                 :          0 :         key.type = BTRFS_INODE_ITEM_KEY;
     636                 :          0 :         key.objectid = inum;
     637                 :          0 :         key.offset = 0;
     638                 :          0 :         inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
     639                 :            :         srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
     640         [ #  # ]:          0 :         if (IS_ERR(inode))
     641                 :          0 :                 return PTR_ERR(inode);
     642                 :            : 
     643                 :          0 :         index = offset >> PAGE_CACHE_SHIFT;
     644                 :            : 
     645                 :          0 :         page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
     646         [ #  # ]:          0 :         if (!page) {
     647                 :            :                 ret = -ENOMEM;
     648                 :            :                 goto out;
     649                 :            :         }
     650                 :            : 
     651         [ #  # ]:          0 :         if (PageUptodate(page)) {
     652         [ #  # ]:          0 :                 if (PageDirty(page)) {
     653                 :            :                         /*
     654                 :            :                          * we need to write the data to the defect sector. the
     655                 :            :                          * data that was in that sector is not in memory,
     656                 :            :                          * because the page was modified. we must not write the
     657                 :            :                          * modified page to that sector.
     658                 :            :                          *
     659                 :            :                          * TODO: what could be done here: wait for the delalloc
     660                 :            :                          *       runner to write out that page (might involve
     661                 :            :                          *       COW) and see whether the sector is still
     662                 :            :                          *       referenced afterwards.
     663                 :            :                          *
     664                 :            :                          * For the meantime, we'll treat this error
     665                 :            :                          * incorrectable, although there is a chance that a
     666                 :            :                          * later scrub will find the bad sector again and that
     667                 :            :                          * there's no dirty page in memory, then.
     668                 :            :                          */
     669                 :            :                         ret = -EIO;
     670                 :            :                         goto out;
     671                 :            :                 }
     672                 :          0 :                 fs_info = BTRFS_I(inode)->root->fs_info;
     673                 :          0 :                 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
     674                 :            :                                         fixup->logical, page,
     675                 :            :                                         fixup->mirror_num);
     676                 :          0 :                 unlock_page(page);
     677                 :          0 :                 corrected = !ret;
     678                 :            :         } else {
     679                 :            :                 /*
     680                 :            :                  * we need to get good data first. the general readpage path
     681                 :            :                  * will call repair_io_failure for us, we just have to make
     682                 :            :                  * sure we read the bad mirror.
     683                 :            :                  */
     684                 :          0 :                 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
     685                 :            :                                         EXTENT_DAMAGED, GFP_NOFS);
     686         [ #  # ]:          0 :                 if (ret) {
     687                 :            :                         /* set_extent_bits should give proper error */
     688         [ #  # ]:          0 :                         WARN_ON(ret > 0);
     689         [ #  # ]:          0 :                         if (ret > 0)
     690                 :            :                                 ret = -EFAULT;
     691                 :            :                         goto out;
     692                 :            :                 }
     693                 :            : 
     694                 :          0 :                 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
     695                 :            :                                                 btrfs_get_extent,
     696                 :            :                                                 fixup->mirror_num);
     697                 :            :                 wait_on_page_locked(page);
     698                 :            : 
     699                 :          0 :                 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
     700                 :            :                                                 end, EXTENT_DAMAGED, 0, NULL);
     701         [ #  # ]:          0 :                 if (!corrected)
     702                 :          0 :                         clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
     703                 :            :                                                 EXTENT_DAMAGED, GFP_NOFS);
     704                 :            :         }
     705                 :            : 
     706                 :            : out:
     707         [ #  # ]:          0 :         if (page)
     708                 :          0 :                 put_page(page);
     709         [ #  # ]:          0 :         if (inode)
     710                 :          0 :                 iput(inode);
     711                 :            : 
     712         [ #  # ]:          0 :         if (ret < 0)
     713                 :            :                 return ret;
     714                 :            : 
     715         [ #  # ]:          0 :         if (ret == 0 && corrected) {
     716                 :            :                 /*
     717                 :            :                  * we only need to call readpage for one of the inodes belonging
     718                 :            :                  * to this extent. so make iterate_extent_inodes stop
     719                 :            :                  */
     720                 :            :                 return 1;
     721                 :            :         }
     722                 :            : 
     723                 :          0 :         return -EIO;
     724                 :            : }
     725                 :            : 
     726                 :          0 : static void scrub_fixup_nodatasum(struct btrfs_work *work)
     727                 :            : {
     728                 :            :         int ret;
     729                 :            :         struct scrub_fixup_nodatasum *fixup;
     730                 :            :         struct scrub_ctx *sctx;
     731                 :            :         struct btrfs_trans_handle *trans = NULL;
     732                 :            :         struct btrfs_path *path;
     733                 :            :         int uncorrectable = 0;
     734                 :            : 
     735                 :          0 :         fixup = container_of(work, struct scrub_fixup_nodatasum, work);
     736                 :          0 :         sctx = fixup->sctx;
     737                 :            : 
     738                 :          0 :         path = btrfs_alloc_path();
     739         [ #  # ]:          0 :         if (!path) {
     740                 :            :                 spin_lock(&sctx->stat_lock);
     741                 :          0 :                 ++sctx->stat.malloc_errors;
     742                 :            :                 spin_unlock(&sctx->stat_lock);
     743                 :            :                 uncorrectable = 1;
     744                 :          0 :                 goto out;
     745                 :            :         }
     746                 :            : 
     747                 :          0 :         trans = btrfs_join_transaction(fixup->root);
     748         [ #  # ]:          0 :         if (IS_ERR(trans)) {
     749                 :            :                 uncorrectable = 1;
     750                 :            :                 goto out;
     751                 :            :         }
     752                 :            : 
     753                 :            :         /*
     754                 :            :          * the idea is to trigger a regular read through the standard path. we
     755                 :            :          * read a page from the (failed) logical address by specifying the
     756                 :            :          * corresponding copynum of the failed sector. thus, that readpage is
     757                 :            :          * expected to fail.
     758                 :            :          * that is the point where on-the-fly error correction will kick in
     759                 :            :          * (once it's finished) and rewrite the failed sector if a good copy
     760                 :            :          * can be found.
     761                 :            :          */
     762                 :          0 :         ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
     763                 :            :                                                 path, scrub_fixup_readpage,
     764                 :            :                                                 fixup);
     765         [ #  # ]:          0 :         if (ret < 0) {
     766                 :            :                 uncorrectable = 1;
     767                 :            :                 goto out;
     768                 :            :         }
     769         [ #  # ]:          0 :         WARN_ON(ret != 1);
     770                 :            : 
     771                 :            :         spin_lock(&sctx->stat_lock);
     772                 :          0 :         ++sctx->stat.corrected_errors;
     773                 :            :         spin_unlock(&sctx->stat_lock);
     774                 :            : 
     775                 :            : out:
     776 [ #  # ][ #  # ]:          0 :         if (trans && !IS_ERR(trans))
     777                 :          0 :                 btrfs_end_transaction(trans, fixup->root);
     778         [ #  # ]:          0 :         if (uncorrectable) {
     779                 :            :                 spin_lock(&sctx->stat_lock);
     780                 :          0 :                 ++sctx->stat.uncorrectable_errors;
     781                 :            :                 spin_unlock(&sctx->stat_lock);
     782                 :          0 :                 btrfs_dev_replace_stats_inc(
     783                 :          0 :                         &sctx->dev_root->fs_info->dev_replace.
     784                 :            :                         num_uncorrectable_read_errors);
     785         [ #  # ]:          0 :                 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
     786                 :            :                     "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
     787                 :            :                         fixup->logical, rcu_str_deref(fixup->dev->name));
     788                 :            :         }
     789                 :            : 
     790                 :          0 :         btrfs_free_path(path);
     791                 :          0 :         kfree(fixup);
     792                 :            : 
     793                 :          0 :         scrub_pending_trans_workers_dec(sctx);
     794                 :          0 : }
     795                 :            : 
     796                 :            : /*
     797                 :            :  * scrub_handle_errored_block gets called when either verification of the
     798                 :            :  * pages failed or the bio failed to read, e.g. with EIO. In the latter
     799                 :            :  * case, this function handles all pages in the bio, even though only one
     800                 :            :  * may be bad.
     801                 :            :  * The goal of this function is to repair the errored block by using the
     802                 :            :  * contents of one of the mirrors.
     803                 :            :  */
     804                 :          0 : static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
     805                 :            : {
     806                 :          0 :         struct scrub_ctx *sctx = sblock_to_check->sctx;
     807                 :            :         struct btrfs_device *dev;
     808                 :            :         struct btrfs_fs_info *fs_info;
     809                 :            :         u64 length;
     810                 :            :         u64 logical;
     811                 :            :         u64 generation;
     812                 :            :         unsigned int failed_mirror_index;
     813                 :            :         unsigned int is_metadata;
     814                 :            :         unsigned int have_csum;
     815                 :            :         u8 *csum;
     816                 :            :         struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
     817                 :            :         struct scrub_block *sblock_bad;
     818                 :            :         int ret;
     819                 :            :         int mirror_index;
     820                 :            :         int page_num;
     821                 :            :         int success;
     822                 :            :         static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
     823                 :            :                                       DEFAULT_RATELIMIT_BURST);
     824                 :            : 
     825         [ #  # ]:          0 :         BUG_ON(sblock_to_check->page_count < 1);
     826                 :          0 :         fs_info = sctx->dev_root->fs_info;
     827         [ #  # ]:          0 :         if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
     828                 :            :                 /*
     829                 :            :                  * if we find an error in a super block, we just report it.
     830                 :            :                  * They will get written with the next transaction commit
     831                 :            :                  * anyway
     832                 :            :                  */
     833                 :            :                 spin_lock(&sctx->stat_lock);
     834                 :          0 :                 ++sctx->stat.super_errors;
     835                 :            :                 spin_unlock(&sctx->stat_lock);
     836                 :          0 :                 return 0;
     837                 :            :         }
     838                 :          0 :         length = sblock_to_check->page_count * PAGE_SIZE;
     839                 :          0 :         logical = sblock_to_check->pagev[0]->logical;
     840                 :          0 :         generation = sblock_to_check->pagev[0]->generation;
     841         [ #  # ]:          0 :         BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
     842                 :          0 :         failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
     843                 :          0 :         is_metadata = !(sblock_to_check->pagev[0]->flags &
     844                 :            :                         BTRFS_EXTENT_FLAG_DATA);
     845                 :          0 :         have_csum = sblock_to_check->pagev[0]->have_csum;
     846                 :          0 :         csum = sblock_to_check->pagev[0]->csum;
     847                 :          0 :         dev = sblock_to_check->pagev[0]->dev;
     848                 :            : 
     849 [ #  # ][ #  # ]:          0 :         if (sctx->is_dev_replace && !is_metadata && !have_csum) {
     850                 :            :                 sblocks_for_recheck = NULL;
     851                 :            :                 goto nodatasum_case;
     852                 :            :         }
     853                 :            : 
     854                 :            :         /*
     855                 :            :          * read all mirrors one after the other. This includes to
     856                 :            :          * re-read the extent or metadata block that failed (that was
     857                 :            :          * the cause that this fixup code is called) another time,
     858                 :            :          * page by page this time in order to know which pages
     859                 :            :          * caused I/O errors and which ones are good (for all mirrors).
     860                 :            :          * It is the goal to handle the situation when more than one
     861                 :            :          * mirror contains I/O errors, but the errors do not
     862                 :            :          * overlap, i.e. the data can be repaired by selecting the
     863                 :            :          * pages from those mirrors without I/O error on the
     864                 :            :          * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
     865                 :            :          * would be that mirror #1 has an I/O error on the first page,
     866                 :            :          * the second page is good, and mirror #2 has an I/O error on
     867                 :            :          * the second page, but the first page is good.
     868                 :            :          * Then the first page of the first mirror can be repaired by
     869                 :            :          * taking the first page of the second mirror, and the
     870                 :            :          * second page of the second mirror can be repaired by
     871                 :            :          * copying the contents of the 2nd page of the 1st mirror.
     872                 :            :          * One more note: if the pages of one mirror contain I/O
     873                 :            :          * errors, the checksum cannot be verified. In order to get
     874                 :            :          * the best data for repairing, the first attempt is to find
     875                 :            :          * a mirror without I/O errors and with a validated checksum.
     876                 :            :          * Only if this is not possible, the pages are picked from
     877                 :            :          * mirrors with I/O errors without considering the checksum.
     878                 :            :          * If the latter is the case, at the end, the checksum of the
     879                 :            :          * repaired area is verified in order to correctly maintain
     880                 :            :          * the statistics.
     881                 :            :          */
     882                 :            : 
     883                 :            :         sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
     884                 :            :                                      sizeof(*sblocks_for_recheck),
     885                 :            :                                      GFP_NOFS);
     886         [ #  # ]:          0 :         if (!sblocks_for_recheck) {
     887                 :            :                 spin_lock(&sctx->stat_lock);
     888                 :          0 :                 sctx->stat.malloc_errors++;
     889                 :          0 :                 sctx->stat.read_errors++;
     890                 :          0 :                 sctx->stat.uncorrectable_errors++;
     891                 :            :                 spin_unlock(&sctx->stat_lock);
     892                 :          0 :                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
     893                 :          0 :                 goto out;
     894                 :            :         }
     895                 :            : 
     896                 :            :         /* setup the context, map the logical blocks and alloc the pages */
     897                 :          0 :         ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
     898                 :            :                                         logical, sblocks_for_recheck);
     899         [ #  # ]:          0 :         if (ret) {
     900                 :            :                 spin_lock(&sctx->stat_lock);
     901                 :          0 :                 sctx->stat.read_errors++;
     902                 :          0 :                 sctx->stat.uncorrectable_errors++;
     903                 :            :                 spin_unlock(&sctx->stat_lock);
     904                 :          0 :                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
     905                 :          0 :                 goto out;
     906                 :            :         }
     907         [ #  # ]:          0 :         BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
     908                 :          0 :         sblock_bad = sblocks_for_recheck + failed_mirror_index;
     909                 :            : 
     910                 :            :         /* build and submit the bios for the failed mirror, check checksums */
     911                 :          0 :         scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
     912                 :            :                             csum, generation, sctx->csum_size);
     913                 :            : 
     914         [ #  # ]:          0 :         if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
     915                 :            :             sblock_bad->no_io_error_seen) {
     916                 :            :                 /*
     917                 :            :                  * the error disappeared after reading page by page, or
     918                 :            :                  * the area was part of a huge bio and other parts of the
     919                 :            :                  * bio caused I/O errors, or the block layer merged several
     920                 :            :                  * read requests into one and the error is caused by a
     921                 :            :                  * different bio (usually one of the two latter cases is
     922                 :            :                  * the cause)
     923                 :            :                  */
     924                 :            :                 spin_lock(&sctx->stat_lock);
     925                 :          0 :                 sctx->stat.unverified_errors++;
     926                 :            :                 spin_unlock(&sctx->stat_lock);
     927                 :            : 
     928         [ #  # ]:          0 :                 if (sctx->is_dev_replace)
     929                 :          0 :                         scrub_write_block_to_dev_replace(sblock_bad);
     930                 :            :                 goto out;
     931                 :            :         }
     932                 :            : 
     933         [ #  # ]:          0 :         if (!sblock_bad->no_io_error_seen) {
     934                 :            :                 spin_lock(&sctx->stat_lock);
     935                 :          0 :                 sctx->stat.read_errors++;
     936                 :            :                 spin_unlock(&sctx->stat_lock);
     937         [ #  # ]:          0 :                 if (__ratelimit(&_rs))
     938                 :          0 :                         scrub_print_warning("i/o error", sblock_to_check);
     939                 :          0 :                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
     940         [ #  # ]:          0 :         } else if (sblock_bad->checksum_error) {
     941                 :            :                 spin_lock(&sctx->stat_lock);
     942                 :          0 :                 sctx->stat.csum_errors++;
     943                 :            :                 spin_unlock(&sctx->stat_lock);
     944         [ #  # ]:          0 :                 if (__ratelimit(&_rs))
     945                 :          0 :                         scrub_print_warning("checksum error", sblock_to_check);
     946                 :          0 :                 btrfs_dev_stat_inc_and_print(dev,
     947                 :            :                                              BTRFS_DEV_STAT_CORRUPTION_ERRS);
     948         [ #  # ]:          0 :         } else if (sblock_bad->header_error) {
     949                 :            :                 spin_lock(&sctx->stat_lock);
     950                 :          0 :                 sctx->stat.verify_errors++;
     951                 :            :                 spin_unlock(&sctx->stat_lock);
     952         [ #  # ]:          0 :                 if (__ratelimit(&_rs))
     953                 :          0 :                         scrub_print_warning("checksum/header error",
     954                 :            :                                             sblock_to_check);
     955         [ #  # ]:          0 :                 if (sblock_bad->generation_error)
     956                 :          0 :                         btrfs_dev_stat_inc_and_print(dev,
     957                 :            :                                 BTRFS_DEV_STAT_GENERATION_ERRS);
     958                 :            :                 else
     959                 :          0 :                         btrfs_dev_stat_inc_and_print(dev,
     960                 :            :                                 BTRFS_DEV_STAT_CORRUPTION_ERRS);
     961                 :            :         }
     962                 :            : 
     963         [ #  # ]:          0 :         if (sctx->readonly) {
     964                 :            :                 ASSERT(!sctx->is_dev_replace);
     965                 :            :                 goto out;
     966                 :            :         }
     967                 :            : 
     968         [ #  # ]:          0 :         if (!is_metadata && !have_csum) {
     969                 :            :                 struct scrub_fixup_nodatasum *fixup_nodatasum;
     970                 :            : 
     971                 :            : nodatasum_case:
     972         [ #  # ]:          0 :                 WARN_ON(sctx->is_dev_replace);
     973                 :            : 
     974                 :            :                 /*
     975                 :            :                  * !is_metadata and !have_csum, this means that the data
     976                 :            :                  * might not be COW'ed, that it might be modified
     977                 :            :                  * concurrently. The general strategy to work on the
     978                 :            :                  * commit root does not help in the case when COW is not
     979                 :            :                  * used.
     980                 :            :                  */
     981                 :            :                 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
     982         [ #  # ]:          0 :                 if (!fixup_nodatasum)
     983                 :            :                         goto did_not_correct_error;
     984                 :          0 :                 fixup_nodatasum->sctx = sctx;
     985                 :          0 :                 fixup_nodatasum->dev = dev;
     986                 :          0 :                 fixup_nodatasum->logical = logical;
     987                 :          0 :                 fixup_nodatasum->root = fs_info->extent_root;
     988                 :          0 :                 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
     989                 :          0 :                 scrub_pending_trans_workers_inc(sctx);
     990                 :          0 :                 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
     991                 :          0 :                 btrfs_queue_worker(&fs_info->scrub_workers,
     992                 :            :                                    &fixup_nodatasum->work);
     993                 :          0 :                 goto out;
     994                 :            :         }
     995                 :            : 
     996                 :            :         /*
     997                 :            :          * now build and submit the bios for the other mirrors, check
     998                 :            :          * checksums.
     999                 :            :          * First try to pick the mirror which is completely without I/O
    1000                 :            :          * errors and also does not have a checksum error.
    1001                 :            :          * If one is found, and if a checksum is present, the full block
    1002                 :            :          * that is known to contain an error is rewritten. Afterwards
    1003                 :            :          * the block is known to be corrected.
    1004                 :            :          * If a mirror is found which is completely correct, and no
    1005                 :            :          * checksum is present, only those pages are rewritten that had
    1006                 :            :          * an I/O error in the block to be repaired, since it cannot be
    1007                 :            :          * determined, which copy of the other pages is better (and it
    1008                 :            :          * could happen otherwise that a correct page would be
    1009                 :            :          * overwritten by a bad one).
    1010                 :            :          */
    1011         [ #  # ]:          0 :         for (mirror_index = 0;
    1012         [ #  # ]:          0 :              mirror_index < BTRFS_MAX_MIRRORS &&
    1013                 :          0 :              sblocks_for_recheck[mirror_index].page_count > 0;
    1014                 :          0 :              mirror_index++) {
    1015                 :            :                 struct scrub_block *sblock_other;
    1016                 :            : 
    1017         [ #  # ]:          0 :                 if (mirror_index == failed_mirror_index)
    1018                 :          0 :                         continue;
    1019                 :            :                 sblock_other = sblocks_for_recheck + mirror_index;
    1020                 :            : 
    1021                 :            :                 /* build and submit the bios, check checksums */
    1022                 :          0 :                 scrub_recheck_block(fs_info, sblock_other, is_metadata,
    1023                 :            :                                     have_csum, csum, generation,
    1024                 :            :                                     sctx->csum_size);
    1025                 :            : 
    1026         [ #  # ]:          0 :                 if (!sblock_other->header_error &&
    1027                 :          0 :                     !sblock_other->checksum_error &&
    1028                 :            :                     sblock_other->no_io_error_seen) {
    1029         [ #  # ]:          0 :                         if (sctx->is_dev_replace) {
    1030                 :          0 :                                 scrub_write_block_to_dev_replace(sblock_other);
    1031                 :            :                         } else {
    1032                 :          0 :                                 int force_write = is_metadata || have_csum;
    1033                 :            : 
    1034                 :          0 :                                 ret = scrub_repair_block_from_good_copy(
    1035                 :            :                                                 sblock_bad, sblock_other,
    1036                 :            :                                                 force_write);
    1037                 :            :                         }
    1038         [ #  # ]:          0 :                         if (0 == ret)
    1039                 :            :                                 goto corrected_error;
    1040                 :            :                 }
    1041                 :            :         }
    1042                 :            : 
    1043                 :            :         /*
    1044                 :            :          * for dev_replace, pick good pages and write to the target device.
    1045                 :            :          */
    1046         [ #  # ]:          0 :         if (sctx->is_dev_replace) {
    1047                 :            :                 success = 1;
    1048         [ #  # ]:          0 :                 for (page_num = 0; page_num < sblock_bad->page_count;
    1049                 :          0 :                      page_num++) {
    1050                 :            :                         int sub_success;
    1051                 :            : 
    1052                 :            :                         sub_success = 0;
    1053         [ #  # ]:          0 :                         for (mirror_index = 0;
    1054         [ #  # ]:          0 :                              mirror_index < BTRFS_MAX_MIRRORS &&
    1055                 :          0 :                              sblocks_for_recheck[mirror_index].page_count > 0;
    1056                 :          0 :                              mirror_index++) {
    1057                 :            :                                 struct scrub_block *sblock_other =
    1058                 :            :                                         sblocks_for_recheck + mirror_index;
    1059                 :          0 :                                 struct scrub_page *page_other =
    1060                 :            :                                         sblock_other->pagev[page_num];
    1061                 :            : 
    1062         [ #  # ]:          0 :                                 if (!page_other->io_error) {
    1063                 :          0 :                                         ret = scrub_write_page_to_dev_replace(
    1064                 :            :                                                         sblock_other, page_num);
    1065         [ #  # ]:          0 :                                         if (ret == 0) {
    1066                 :            :                                                 /* succeeded for this page */
    1067                 :            :                                                 sub_success = 1;
    1068                 :            :                                                 break;
    1069                 :            :                                         } else {
    1070                 :          0 :                                                 btrfs_dev_replace_stats_inc(
    1071                 :          0 :                                                         &sctx->dev_root->
    1072                 :            :                                                         fs_info->dev_replace.
    1073                 :            :                                                         num_write_errors);
    1074                 :            :                                         }
    1075                 :            :                                 }
    1076                 :            :                         }
    1077                 :            : 
    1078         [ #  # ]:          0 :                         if (!sub_success) {
    1079                 :            :                                 /*
    1080                 :            :                                  * did not find a mirror to fetch the page
    1081                 :            :                                  * from. scrub_write_page_to_dev_replace()
    1082                 :            :                                  * handles this case (page->io_error), by
    1083                 :            :                                  * filling the block with zeros before
    1084                 :            :                                  * submitting the write request
    1085                 :            :                                  */
    1086                 :            :                                 success = 0;
    1087                 :          0 :                                 ret = scrub_write_page_to_dev_replace(
    1088                 :            :                                                 sblock_bad, page_num);
    1089         [ #  # ]:          0 :                                 if (ret)
    1090                 :          0 :                                         btrfs_dev_replace_stats_inc(
    1091                 :          0 :                                                 &sctx->dev_root->fs_info->
    1092                 :            :                                                 dev_replace.num_write_errors);
    1093                 :            :                         }
    1094                 :            :                 }
    1095                 :            : 
    1096                 :            :                 goto out;
    1097                 :            :         }
    1098                 :            : 
    1099                 :            :         /*
    1100                 :            :          * for regular scrub, repair those pages that are errored.
    1101                 :            :          * In case of I/O errors in the area that is supposed to be
    1102                 :            :          * repaired, continue by picking good copies of those pages.
    1103                 :            :          * Select the good pages from mirrors to rewrite bad pages from
    1104                 :            :          * the area to fix. Afterwards verify the checksum of the block
    1105                 :            :          * that is supposed to be repaired. This verification step is
    1106                 :            :          * only done for the purpose of statistic counting and for the
    1107                 :            :          * final scrub report, whether errors remain.
    1108                 :            :          * A perfect algorithm could make use of the checksum and try
    1109                 :            :          * all possible combinations of pages from the different mirrors
    1110                 :            :          * until the checksum verification succeeds. For example, when
    1111                 :            :          * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
    1112                 :            :          * of mirror #2 is readable but the final checksum test fails,
    1113                 :            :          * then the 2nd page of mirror #3 could be tried, whether now
    1114                 :            :          * the final checksum succeedes. But this would be a rare
    1115                 :            :          * exception and is therefore not implemented. At least it is
    1116                 :            :          * avoided that the good copy is overwritten.
    1117                 :            :          * A more useful improvement would be to pick the sectors
    1118                 :            :          * without I/O error based on sector sizes (512 bytes on legacy
    1119                 :            :          * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
    1120                 :            :          * mirror could be repaired by taking 512 byte of a different
    1121                 :            :          * mirror, even if other 512 byte sectors in the same PAGE_SIZE
    1122                 :            :          * area are unreadable.
    1123                 :            :          */
    1124                 :            : 
    1125                 :            :         /* can only fix I/O errors from here on */
    1126         [ #  # ]:          0 :         if (sblock_bad->no_io_error_seen)
    1127                 :            :                 goto did_not_correct_error;
    1128                 :            : 
    1129                 :            :         success = 1;
    1130         [ #  # ]:          0 :         for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
    1131                 :          0 :                 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
    1132                 :            : 
    1133         [ #  # ]:          0 :                 if (!page_bad->io_error)
    1134                 :          0 :                         continue;
    1135                 :            : 
    1136         [ #  # ]:          0 :                 for (mirror_index = 0;
    1137         [ #  # ]:          0 :                      mirror_index < BTRFS_MAX_MIRRORS &&
    1138                 :          0 :                      sblocks_for_recheck[mirror_index].page_count > 0;
    1139                 :          0 :                      mirror_index++) {
    1140                 :            :                         struct scrub_block *sblock_other = sblocks_for_recheck +
    1141                 :            :                                                            mirror_index;
    1142                 :          0 :                         struct scrub_page *page_other = sblock_other->pagev[
    1143                 :            :                                                         page_num];
    1144                 :            : 
    1145         [ #  # ]:          0 :                         if (!page_other->io_error) {
    1146                 :          0 :                                 ret = scrub_repair_page_from_good_copy(
    1147                 :            :                                         sblock_bad, sblock_other, page_num, 0);
    1148         [ #  # ]:          0 :                                 if (0 == ret) {
    1149                 :          0 :                                         page_bad->io_error = 0;
    1150                 :          0 :                                         break; /* succeeded for this page */
    1151                 :            :                                 }
    1152                 :            :                         }
    1153                 :            :                 }
    1154                 :            : 
    1155         [ #  # ]:          0 :                 if (page_bad->io_error) {
    1156                 :            :                         /* did not find a mirror to copy the page from */
    1157                 :            :                         success = 0;
    1158                 :            :                 }
    1159                 :            :         }
    1160                 :            : 
    1161         [ #  # ]:          0 :         if (success) {
    1162         [ #  # ]:          0 :                 if (is_metadata || have_csum) {
    1163                 :            :                         /*
    1164                 :            :                          * need to verify the checksum now that all
    1165                 :            :                          * sectors on disk are repaired (the write
    1166                 :            :                          * request for data to be repaired is on its way).
    1167                 :            :                          * Just be lazy and use scrub_recheck_block()
    1168                 :            :                          * which re-reads the data before the checksum
    1169                 :            :                          * is verified, but most likely the data comes out
    1170                 :            :                          * of the page cache.
    1171                 :            :                          */
    1172                 :          0 :                         scrub_recheck_block(fs_info, sblock_bad,
    1173                 :            :                                             is_metadata, have_csum, csum,
    1174                 :            :                                             generation, sctx->csum_size);
    1175         [ #  # ]:          0 :                         if (!sblock_bad->header_error &&
    1176                 :          0 :                             !sblock_bad->checksum_error &&
    1177                 :            :                             sblock_bad->no_io_error_seen)
    1178                 :            :                                 goto corrected_error;
    1179                 :            :                         else
    1180                 :            :                                 goto did_not_correct_error;
    1181                 :            :                 } else {
    1182                 :            : corrected_error:
    1183                 :            :                         spin_lock(&sctx->stat_lock);
    1184                 :          0 :                         sctx->stat.corrected_errors++;
    1185                 :            :                         spin_unlock(&sctx->stat_lock);
    1186         [ #  # ]:          0 :                         printk_ratelimited_in_rcu(KERN_ERR
    1187                 :            :                                 "BTRFS: fixed up error at logical %llu on dev %s\n",
    1188                 :            :                                 logical, rcu_str_deref(dev->name));
    1189                 :            :                 }
    1190                 :            :         } else {
    1191                 :            : did_not_correct_error:
    1192                 :            :                 spin_lock(&sctx->stat_lock);
    1193                 :          0 :                 sctx->stat.uncorrectable_errors++;
    1194                 :            :                 spin_unlock(&sctx->stat_lock);
    1195         [ #  # ]:          0 :                 printk_ratelimited_in_rcu(KERN_ERR
    1196                 :            :                         "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
    1197                 :            :                         logical, rcu_str_deref(dev->name));
    1198                 :            :         }
    1199                 :            : 
    1200                 :            : out:
    1201         [ #  # ]:          0 :         if (sblocks_for_recheck) {
    1202         [ #  # ]:          0 :                 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
    1203                 :          0 :                      mirror_index++) {
    1204                 :          0 :                         struct scrub_block *sblock = sblocks_for_recheck +
    1205                 :            :                                                      mirror_index;
    1206                 :            :                         int page_index;
    1207                 :            : 
    1208         [ #  # ]:          0 :                         for (page_index = 0; page_index < sblock->page_count;
    1209                 :          0 :                              page_index++) {
    1210                 :          0 :                                 sblock->pagev[page_index]->sblock = NULL;
    1211                 :          0 :                                 scrub_page_put(sblock->pagev[page_index]);
    1212                 :            :                         }
    1213                 :            :                 }
    1214                 :          0 :                 kfree(sblocks_for_recheck);
    1215                 :            :         }
    1216                 :            : 
    1217                 :            :         return 0;
    1218                 :            : }
    1219                 :            : 
    1220                 :          0 : static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
    1221                 :            :                                      struct btrfs_fs_info *fs_info,
    1222                 :            :                                      struct scrub_block *original_sblock,
    1223                 :            :                                      u64 length, u64 logical,
    1224                 :            :                                      struct scrub_block *sblocks_for_recheck)
    1225                 :            : {
    1226                 :            :         int page_index;
    1227                 :            :         int mirror_index;
    1228                 :            :         int ret;
    1229                 :            : 
    1230                 :            :         /*
    1231                 :            :          * note: the two members ref_count and outstanding_pages
    1232                 :            :          * are not used (and not set) in the blocks that are used for
    1233                 :            :          * the recheck procedure
    1234                 :            :          */
    1235                 :            : 
    1236                 :            :         page_index = 0;
    1237         [ #  # ]:          0 :         while (length > 0) {
    1238                 :          0 :                 u64 sublen = min_t(u64, length, PAGE_SIZE);
    1239                 :          0 :                 u64 mapped_length = sublen;
    1240                 :          0 :                 struct btrfs_bio *bbio = NULL;
    1241                 :            : 
    1242                 :            :                 /*
    1243                 :            :                  * with a length of PAGE_SIZE, each returned stripe
    1244                 :            :                  * represents one mirror
    1245                 :            :                  */
    1246                 :          0 :                 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
    1247                 :            :                                       &mapped_length, &bbio, 0);
    1248 [ #  # ][ #  # ]:          0 :                 if (ret || !bbio || mapped_length < sublen) {
                 [ #  # ]
    1249                 :          0 :                         kfree(bbio);
    1250                 :          0 :                         return -EIO;
    1251                 :            :                 }
    1252                 :            : 
    1253         [ #  # ]:          0 :                 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
    1254         [ #  # ]:          0 :                 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
    1255                 :          0 :                      mirror_index++) {
    1256                 :            :                         struct scrub_block *sblock;
    1257                 :            :                         struct scrub_page *page;
    1258                 :            : 
    1259         [ #  # ]:          0 :                         if (mirror_index >= BTRFS_MAX_MIRRORS)
    1260                 :          0 :                                 continue;
    1261                 :            : 
    1262                 :          0 :                         sblock = sblocks_for_recheck + mirror_index;
    1263                 :          0 :                         sblock->sctx = sctx;
    1264                 :            :                         page = kzalloc(sizeof(*page), GFP_NOFS);
    1265         [ #  # ]:          0 :                         if (!page) {
    1266                 :            : leave_nomem:
    1267                 :            :                                 spin_lock(&sctx->stat_lock);
    1268                 :          0 :                                 sctx->stat.malloc_errors++;
    1269                 :            :                                 spin_unlock(&sctx->stat_lock);
    1270                 :          0 :                                 kfree(bbio);
    1271                 :          0 :                                 return -ENOMEM;
    1272                 :            :                         }
    1273                 :            :                         scrub_page_get(page);
    1274                 :          0 :                         sblock->pagev[page_index] = page;
    1275                 :          0 :                         page->logical = logical;
    1276                 :          0 :                         page->physical = bbio->stripes[mirror_index].physical;
    1277         [ #  # ]:          0 :                         BUG_ON(page_index >= original_sblock->page_count);
    1278                 :          0 :                         page->physical_for_dev_replace =
    1279                 :          0 :                                 original_sblock->pagev[page_index]->
    1280                 :            :                                 physical_for_dev_replace;
    1281                 :            :                         /* for missing devices, dev->bdev is NULL */
    1282                 :          0 :                         page->dev = bbio->stripes[mirror_index].dev;
    1283                 :          0 :                         page->mirror_num = mirror_index + 1;
    1284                 :          0 :                         sblock->page_count++;
    1285                 :          0 :                         page->page = alloc_page(GFP_NOFS);
    1286         [ #  # ]:          0 :                         if (!page->page)
    1287                 :            :                                 goto leave_nomem;
    1288                 :            :                 }
    1289                 :          0 :                 kfree(bbio);
    1290                 :          0 :                 length -= sublen;
    1291                 :          0 :                 logical += sublen;
    1292                 :          0 :                 page_index++;
    1293                 :            :         }
    1294                 :            : 
    1295                 :            :         return 0;
    1296                 :            : }
    1297                 :            : 
    1298                 :            : /*
    1299                 :            :  * this function will check the on disk data for checksum errors, header
    1300                 :            :  * errors and read I/O errors. If any I/O errors happen, the exact pages
    1301                 :            :  * which are errored are marked as being bad. The goal is to enable scrub
    1302                 :            :  * to take those pages that are not errored from all the mirrors so that
    1303                 :            :  * the pages that are errored in the just handled mirror can be repaired.
    1304                 :            :  */
    1305                 :          0 : static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
    1306                 :            :                                 struct scrub_block *sblock, int is_metadata,
    1307                 :            :                                 int have_csum, u8 *csum, u64 generation,
    1308                 :            :                                 u16 csum_size)
    1309                 :            : {
    1310                 :            :         int page_num;
    1311                 :            : 
    1312                 :          0 :         sblock->no_io_error_seen = 1;
    1313                 :          0 :         sblock->header_error = 0;
    1314                 :          0 :         sblock->checksum_error = 0;
    1315                 :            : 
    1316         [ #  # ]:          0 :         for (page_num = 0; page_num < sblock->page_count; page_num++) {
    1317                 :            :                 struct bio *bio;
    1318                 :          0 :                 struct scrub_page *page = sblock->pagev[page_num];
    1319                 :            : 
    1320         [ #  # ]:          0 :                 if (page->dev->bdev == NULL) {
    1321                 :          0 :                         page->io_error = 1;
    1322                 :          0 :                         sblock->no_io_error_seen = 0;
    1323                 :          0 :                         continue;
    1324                 :            :                 }
    1325                 :            : 
    1326         [ #  # ]:          0 :                 WARN_ON(!page->page);
    1327                 :          0 :                 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
    1328         [ #  # ]:          0 :                 if (!bio) {
    1329                 :          0 :                         page->io_error = 1;
    1330                 :          0 :                         sblock->no_io_error_seen = 0;
    1331                 :          0 :                         continue;
    1332                 :            :                 }
    1333                 :          0 :                 bio->bi_bdev = page->dev->bdev;
    1334                 :          0 :                 bio->bi_iter.bi_sector = page->physical >> 9;
    1335                 :            : 
    1336                 :          0 :                 bio_add_page(bio, page->page, PAGE_SIZE, 0);
    1337         [ #  # ]:          0 :                 if (btrfsic_submit_bio_wait(READ, bio))
    1338                 :          0 :                         sblock->no_io_error_seen = 0;
    1339                 :            : 
    1340                 :          0 :                 bio_put(bio);
    1341                 :            :         }
    1342                 :            : 
    1343         [ #  # ]:          0 :         if (sblock->no_io_error_seen)
    1344                 :          0 :                 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
    1345                 :            :                                              have_csum, csum, generation,
    1346                 :            :                                              csum_size);
    1347                 :            : 
    1348                 :          0 :         return;
    1349                 :            : }
    1350                 :            : 
    1351                 :          0 : static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
    1352                 :            :                                          struct scrub_block *sblock,
    1353                 :            :                                          int is_metadata, int have_csum,
    1354                 :            :                                          const u8 *csum, u64 generation,
    1355                 :            :                                          u16 csum_size)
    1356                 :            : {
    1357                 :            :         int page_num;
    1358                 :            :         u8 calculated_csum[BTRFS_CSUM_SIZE];
    1359                 :            :         u32 crc = ~(u32)0;
    1360                 :            :         void *mapped_buffer;
    1361                 :            : 
    1362         [ #  # ]:          0 :         WARN_ON(!sblock->pagev[0]->page);
    1363         [ #  # ]:          0 :         if (is_metadata) {
    1364                 :            :                 struct btrfs_header *h;
    1365                 :            : 
    1366                 :          0 :                 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
    1367                 :            :                 h = (struct btrfs_header *)mapped_buffer;
    1368                 :            : 
    1369 [ #  # ][ #  # ]:          0 :                 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
    1370         [ #  # ]:          0 :                     memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
    1371                 :          0 :                     memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
    1372                 :            :                            BTRFS_UUID_SIZE)) {
    1373                 :          0 :                         sblock->header_error = 1;
    1374         [ #  # ]:          0 :                 } else if (generation != btrfs_stack_header_generation(h)) {
    1375                 :          0 :                         sblock->header_error = 1;
    1376                 :          0 :                         sblock->generation_error = 1;
    1377                 :            :                 }
    1378                 :          0 :                 csum = h->csum;
    1379                 :            :         } else {
    1380         [ #  # ]:          0 :                 if (!have_csum)
    1381                 :          0 :                         return;
    1382                 :            : 
    1383                 :          0 :                 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
    1384                 :            :         }
    1385                 :            : 
    1386                 :            :         for (page_num = 0;;) {
    1387         [ #  # ]:          0 :                 if (page_num == 0 && is_metadata)
    1388                 :          0 :                         crc = btrfs_csum_data(
    1389                 :            :                                 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
    1390                 :            :                                 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
    1391                 :            :                 else
    1392                 :          0 :                         crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
    1393                 :            : 
    1394                 :          0 :                 kunmap_atomic(mapped_buffer);
    1395                 :          0 :                 page_num++;
    1396         [ #  # ]:          0 :                 if (page_num >= sblock->page_count)
    1397                 :            :                         break;
    1398         [ #  # ]:          0 :                 WARN_ON(!sblock->pagev[page_num]->page);
    1399                 :            : 
    1400                 :          0 :                 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
    1401                 :          0 :         }
    1402                 :            : 
    1403                 :          0 :         btrfs_csum_final(crc, calculated_csum);
    1404         [ #  # ]:          0 :         if (memcmp(calculated_csum, csum, csum_size))
    1405                 :          0 :                 sblock->checksum_error = 1;
    1406                 :            : }
    1407                 :            : 
    1408                 :          0 : static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
    1409                 :            :                                              struct scrub_block *sblock_good,
    1410                 :            :                                              int force_write)
    1411                 :            : {
    1412                 :            :         int page_num;
    1413                 :            :         int ret = 0;
    1414                 :            : 
    1415         [ #  # ]:          0 :         for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
    1416                 :            :                 int ret_sub;
    1417                 :            : 
    1418                 :          0 :                 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
    1419                 :            :                                                            sblock_good,
    1420                 :            :                                                            page_num,
    1421                 :            :                                                            force_write);
    1422         [ #  # ]:          0 :                 if (ret_sub)
    1423                 :            :                         ret = ret_sub;
    1424                 :            :         }
    1425                 :            : 
    1426                 :          0 :         return ret;
    1427                 :            : }
    1428                 :            : 
    1429                 :          0 : static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
    1430                 :            :                                             struct scrub_block *sblock_good,
    1431                 :            :                                             int page_num, int force_write)
    1432                 :            : {
    1433                 :          0 :         struct scrub_page *page_bad = sblock_bad->pagev[page_num];
    1434                 :          0 :         struct scrub_page *page_good = sblock_good->pagev[page_num];
    1435                 :            : 
    1436         [ #  # ]:          0 :         BUG_ON(page_bad->page == NULL);
    1437         [ #  # ]:          0 :         BUG_ON(page_good->page == NULL);
    1438 [ #  # ][ #  # ]:          0 :         if (force_write || sblock_bad->header_error ||
    1439         [ #  # ]:          0 :             sblock_bad->checksum_error || page_bad->io_error) {
    1440                 :            :                 struct bio *bio;
    1441                 :            :                 int ret;
    1442                 :            : 
    1443         [ #  # ]:          0 :                 if (!page_bad->dev->bdev) {
    1444         [ #  # ]:          0 :                         printk_ratelimited(KERN_WARNING "BTRFS: "
    1445                 :            :                                 "scrub_repair_page_from_good_copy(bdev == NULL) "
    1446                 :            :                                 "is unexpected!\n");
    1447                 :            :                         return -EIO;
    1448                 :            :                 }
    1449                 :            : 
    1450                 :          0 :                 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
    1451         [ #  # ]:          0 :                 if (!bio)
    1452                 :            :                         return -EIO;
    1453                 :          0 :                 bio->bi_bdev = page_bad->dev->bdev;
    1454                 :          0 :                 bio->bi_iter.bi_sector = page_bad->physical >> 9;
    1455                 :            : 
    1456                 :          0 :                 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
    1457         [ #  # ]:          0 :                 if (PAGE_SIZE != ret) {
    1458                 :          0 :                         bio_put(bio);
    1459                 :          0 :                         return -EIO;
    1460                 :            :                 }
    1461                 :            : 
    1462         [ #  # ]:          0 :                 if (btrfsic_submit_bio_wait(WRITE, bio)) {
    1463                 :          0 :                         btrfs_dev_stat_inc_and_print(page_bad->dev,
    1464                 :            :                                 BTRFS_DEV_STAT_WRITE_ERRS);
    1465                 :          0 :                         btrfs_dev_replace_stats_inc(
    1466                 :          0 :                                 &sblock_bad->sctx->dev_root->fs_info->
    1467                 :            :                                 dev_replace.num_write_errors);
    1468                 :          0 :                         bio_put(bio);
    1469                 :          0 :                         return -EIO;
    1470                 :            :                 }
    1471                 :          0 :                 bio_put(bio);
    1472                 :            :         }
    1473                 :            : 
    1474                 :            :         return 0;
    1475                 :            : }
    1476                 :            : 
    1477                 :          0 : static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
    1478                 :            : {
    1479                 :            :         int page_num;
    1480                 :            : 
    1481         [ #  # ]:          0 :         for (page_num = 0; page_num < sblock->page_count; page_num++) {
    1482                 :            :                 int ret;
    1483                 :            : 
    1484                 :          0 :                 ret = scrub_write_page_to_dev_replace(sblock, page_num);
    1485         [ #  # ]:          0 :                 if (ret)
    1486                 :          0 :                         btrfs_dev_replace_stats_inc(
    1487                 :          0 :                                 &sblock->sctx->dev_root->fs_info->dev_replace.
    1488                 :            :                                 num_write_errors);
    1489                 :            :         }
    1490                 :          0 : }
    1491                 :            : 
    1492                 :          0 : static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
    1493                 :            :                                            int page_num)
    1494                 :            : {
    1495                 :          0 :         struct scrub_page *spage = sblock->pagev[page_num];
    1496                 :            : 
    1497         [ #  # ]:          0 :         BUG_ON(spage->page == NULL);
    1498         [ #  # ]:          0 :         if (spage->io_error) {
    1499                 :          0 :                 void *mapped_buffer = kmap_atomic(spage->page);
    1500                 :            : 
    1501                 :          0 :                 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
    1502                 :          0 :                 flush_dcache_page(spage->page);
    1503                 :          0 :                 kunmap_atomic(mapped_buffer);
    1504                 :            :         }
    1505                 :          0 :         return scrub_add_page_to_wr_bio(sblock->sctx, spage);
    1506                 :            : }
    1507                 :            : 
    1508                 :          0 : static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
    1509                 :            :                                     struct scrub_page *spage)
    1510                 :            : {
    1511                 :            :         struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
    1512                 :            :         struct scrub_bio *sbio;
    1513                 :            :         int ret;
    1514                 :            : 
    1515                 :          0 :         mutex_lock(&wr_ctx->wr_lock);
    1516                 :            : again:
    1517         [ #  # ]:          0 :         if (!wr_ctx->wr_curr_bio) {
    1518                 :          0 :                 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
    1519                 :            :                                               GFP_NOFS);
    1520         [ #  # ]:          0 :                 if (!wr_ctx->wr_curr_bio) {
    1521                 :          0 :                         mutex_unlock(&wr_ctx->wr_lock);
    1522                 :          0 :                         return -ENOMEM;
    1523                 :            :                 }
    1524                 :          0 :                 wr_ctx->wr_curr_bio->sctx = sctx;
    1525                 :          0 :                 wr_ctx->wr_curr_bio->page_count = 0;
    1526                 :            :         }
    1527                 :          0 :         sbio = wr_ctx->wr_curr_bio;
    1528         [ #  # ]:          0 :         if (sbio->page_count == 0) {
    1529                 :            :                 struct bio *bio;
    1530                 :            : 
    1531                 :          0 :                 sbio->physical = spage->physical_for_dev_replace;
    1532                 :          0 :                 sbio->logical = spage->logical;
    1533                 :          0 :                 sbio->dev = wr_ctx->tgtdev;
    1534                 :          0 :                 bio = sbio->bio;
    1535         [ #  # ]:          0 :                 if (!bio) {
    1536                 :          0 :                         bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
    1537         [ #  # ]:          0 :                         if (!bio) {
    1538                 :          0 :                                 mutex_unlock(&wr_ctx->wr_lock);
    1539                 :          0 :                                 return -ENOMEM;
    1540                 :            :                         }
    1541                 :          0 :                         sbio->bio = bio;
    1542                 :            :                 }
    1543                 :            : 
    1544                 :          0 :                 bio->bi_private = sbio;
    1545                 :          0 :                 bio->bi_end_io = scrub_wr_bio_end_io;
    1546                 :          0 :                 bio->bi_bdev = sbio->dev->bdev;
    1547                 :          0 :                 bio->bi_iter.bi_sector = sbio->physical >> 9;
    1548                 :          0 :                 sbio->err = 0;
    1549         [ #  # ]:          0 :         } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
    1550         [ #  # ]:          0 :                    spage->physical_for_dev_replace ||
    1551                 :          0 :                    sbio->logical + sbio->page_count * PAGE_SIZE !=
    1552                 :          0 :                    spage->logical) {
    1553                 :          0 :                 scrub_wr_submit(sctx);
    1554                 :          0 :                 goto again;
    1555                 :            :         }
    1556                 :            : 
    1557                 :          0 :         ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
    1558         [ #  # ]:          0 :         if (ret != PAGE_SIZE) {
    1559         [ #  # ]:          0 :                 if (sbio->page_count < 1) {
    1560                 :          0 :                         bio_put(sbio->bio);
    1561                 :          0 :                         sbio->bio = NULL;
    1562                 :          0 :                         mutex_unlock(&wr_ctx->wr_lock);
    1563                 :          0 :                         return -EIO;
    1564                 :            :                 }
    1565                 :          0 :                 scrub_wr_submit(sctx);
    1566                 :          0 :                 goto again;
    1567                 :            :         }
    1568                 :            : 
    1569                 :          0 :         sbio->pagev[sbio->page_count] = spage;
    1570                 :            :         scrub_page_get(spage);
    1571                 :          0 :         sbio->page_count++;
    1572         [ #  # ]:          0 :         if (sbio->page_count == wr_ctx->pages_per_wr_bio)
    1573                 :          0 :                 scrub_wr_submit(sctx);
    1574                 :          0 :         mutex_unlock(&wr_ctx->wr_lock);
    1575                 :            : 
    1576                 :          0 :         return 0;
    1577                 :            : }
    1578                 :            : 
    1579                 :          0 : static void scrub_wr_submit(struct scrub_ctx *sctx)
    1580                 :            : {
    1581                 :            :         struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
    1582                 :            :         struct scrub_bio *sbio;
    1583                 :            : 
    1584         [ #  # ]:          0 :         if (!wr_ctx->wr_curr_bio)
    1585                 :          0 :                 return;
    1586                 :            : 
    1587                 :            :         sbio = wr_ctx->wr_curr_bio;
    1588                 :          0 :         wr_ctx->wr_curr_bio = NULL;
    1589         [ #  # ]:          0 :         WARN_ON(!sbio->bio->bi_bdev);
    1590                 :            :         scrub_pending_bio_inc(sctx);
    1591                 :            :         /* process all writes in a single worker thread. Then the block layer
    1592                 :            :          * orders the requests before sending them to the driver which
    1593                 :            :          * doubled the write performance on spinning disks when measured
    1594                 :            :          * with Linux 3.5 */
    1595                 :          0 :         btrfsic_submit_bio(WRITE, sbio->bio);
    1596                 :            : }
    1597                 :            : 
    1598                 :          0 : static void scrub_wr_bio_end_io(struct bio *bio, int err)
    1599                 :            : {
    1600                 :          0 :         struct scrub_bio *sbio = bio->bi_private;
    1601                 :          0 :         struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
    1602                 :            : 
    1603                 :          0 :         sbio->err = err;
    1604                 :          0 :         sbio->bio = bio;
    1605                 :            : 
    1606                 :          0 :         sbio->work.func = scrub_wr_bio_end_io_worker;
    1607                 :          0 :         btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
    1608                 :          0 : }
    1609                 :            : 
    1610                 :          0 : static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
    1611                 :            : {
    1612                 :          0 :         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
    1613                 :          0 :         struct scrub_ctx *sctx = sbio->sctx;
    1614                 :            :         int i;
    1615                 :            : 
    1616         [ #  # ]:          0 :         WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
    1617         [ #  # ]:          0 :         if (sbio->err) {
    1618                 :            :                 struct btrfs_dev_replace *dev_replace =
    1619                 :          0 :                         &sbio->sctx->dev_root->fs_info->dev_replace;
    1620                 :            : 
    1621         [ #  # ]:          0 :                 for (i = 0; i < sbio->page_count; i++) {
    1622                 :          0 :                         struct scrub_page *spage = sbio->pagev[i];
    1623                 :            : 
    1624                 :          0 :                         spage->io_error = 1;
    1625                 :          0 :                         btrfs_dev_replace_stats_inc(&dev_replace->
    1626                 :            :                                                     num_write_errors);
    1627                 :            :                 }
    1628                 :            :         }
    1629                 :            : 
    1630         [ #  # ]:          0 :         for (i = 0; i < sbio->page_count; i++)
    1631                 :          0 :                 scrub_page_put(sbio->pagev[i]);
    1632                 :            : 
    1633                 :          0 :         bio_put(sbio->bio);
    1634                 :          0 :         kfree(sbio);
    1635                 :          0 :         scrub_pending_bio_dec(sctx);
    1636                 :          0 : }
    1637                 :            : 
    1638                 :          0 : static int scrub_checksum(struct scrub_block *sblock)
    1639                 :            : {
    1640                 :            :         u64 flags;
    1641                 :            :         int ret;
    1642                 :            : 
    1643         [ #  # ]:          0 :         WARN_ON(sblock->page_count < 1);
    1644                 :          0 :         flags = sblock->pagev[0]->flags;
    1645                 :            :         ret = 0;
    1646         [ #  # ]:          0 :         if (flags & BTRFS_EXTENT_FLAG_DATA)
    1647                 :          0 :                 ret = scrub_checksum_data(sblock);
    1648         [ #  # ]:          0 :         else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
    1649                 :          0 :                 ret = scrub_checksum_tree_block(sblock);
    1650         [ #  # ]:          0 :         else if (flags & BTRFS_EXTENT_FLAG_SUPER)
    1651                 :          0 :                 (void)scrub_checksum_super(sblock);
    1652                 :            :         else
    1653                 :          0 :                 WARN_ON(1);
    1654         [ #  # ]:          0 :         if (ret)
    1655                 :          0 :                 scrub_handle_errored_block(sblock);
    1656                 :            : 
    1657                 :          0 :         return ret;
    1658                 :            : }
    1659                 :            : 
    1660                 :          0 : static int scrub_checksum_data(struct scrub_block *sblock)
    1661                 :            : {
    1662                 :          0 :         struct scrub_ctx *sctx = sblock->sctx;
    1663                 :            :         u8 csum[BTRFS_CSUM_SIZE];
    1664                 :            :         u8 *on_disk_csum;
    1665                 :            :         struct page *page;
    1666                 :            :         void *buffer;
    1667                 :            :         u32 crc = ~(u32)0;
    1668                 :            :         int fail = 0;
    1669                 :            :         u64 len;
    1670                 :            :         int index;
    1671                 :            : 
    1672         [ #  # ]:          0 :         BUG_ON(sblock->page_count < 1);
    1673         [ #  # ]:          0 :         if (!sblock->pagev[0]->have_csum)
    1674                 :            :                 return 0;
    1675                 :            : 
    1676                 :          0 :         on_disk_csum = sblock->pagev[0]->csum;
    1677                 :          0 :         page = sblock->pagev[0]->page;
    1678                 :          0 :         buffer = kmap_atomic(page);
    1679                 :            : 
    1680                 :          0 :         len = sctx->sectorsize;
    1681                 :            :         index = 0;
    1682                 :            :         for (;;) {
    1683                 :          0 :                 u64 l = min_t(u64, len, PAGE_SIZE);
    1684                 :            : 
    1685                 :          0 :                 crc = btrfs_csum_data(buffer, crc, l);
    1686                 :          0 :                 kunmap_atomic(buffer);
    1687                 :          0 :                 len -= l;
    1688         [ #  # ]:          0 :                 if (len == 0)
    1689                 :            :                         break;
    1690                 :          0 :                 index++;
    1691         [ #  # ]:          0 :                 BUG_ON(index >= sblock->page_count);
    1692         [ #  # ]:          0 :                 BUG_ON(!sblock->pagev[index]->page);
    1693                 :            :                 page = sblock->pagev[index]->page;
    1694                 :          0 :                 buffer = kmap_atomic(page);
    1695                 :          0 :         }
    1696                 :            : 
    1697                 :          0 :         btrfs_csum_final(crc, csum);
    1698         [ #  # ]:          0 :         if (memcmp(csum, on_disk_csum, sctx->csum_size))
    1699                 :            :                 fail = 1;
    1700                 :            : 
    1701                 :          0 :         return fail;
    1702                 :            : }
    1703                 :            : 
    1704                 :          0 : static int scrub_checksum_tree_block(struct scrub_block *sblock)
    1705                 :            : {
    1706                 :          0 :         struct scrub_ctx *sctx = sblock->sctx;
    1707                 :            :         struct btrfs_header *h;
    1708                 :          0 :         struct btrfs_root *root = sctx->dev_root;
    1709                 :          0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1710                 :            :         u8 calculated_csum[BTRFS_CSUM_SIZE];
    1711                 :            :         u8 on_disk_csum[BTRFS_CSUM_SIZE];
    1712                 :            :         struct page *page;
    1713                 :            :         void *mapped_buffer;
    1714                 :            :         u64 mapped_size;
    1715                 :            :         void *p;
    1716                 :            :         u32 crc = ~(u32)0;
    1717                 :            :         int fail = 0;
    1718                 :            :         int crc_fail = 0;
    1719                 :            :         u64 len;
    1720                 :            :         int index;
    1721                 :            : 
    1722         [ #  # ]:          0 :         BUG_ON(sblock->page_count < 1);
    1723                 :          0 :         page = sblock->pagev[0]->page;
    1724                 :          0 :         mapped_buffer = kmap_atomic(page);
    1725                 :            :         h = (struct btrfs_header *)mapped_buffer;
    1726                 :          0 :         memcpy(on_disk_csum, h->csum, sctx->csum_size);
    1727                 :            : 
    1728                 :            :         /*
    1729                 :            :          * we don't use the getter functions here, as we
    1730                 :            :          * a) don't have an extent buffer and
    1731                 :            :          * b) the page is already kmapped
    1732                 :            :          */
    1733                 :            : 
    1734         [ #  # ]:          0 :         if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
    1735                 :            :                 ++fail;
    1736                 :            : 
    1737         [ #  # ]:          0 :         if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
    1738                 :          0 :                 ++fail;
    1739                 :            : 
    1740         [ #  # ]:          0 :         if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
    1741                 :          0 :                 ++fail;
    1742                 :            : 
    1743         [ #  # ]:          0 :         if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
    1744                 :            :                    BTRFS_UUID_SIZE))
    1745                 :          0 :                 ++fail;
    1746                 :            : 
    1747         [ #  # ]:          0 :         WARN_ON(sctx->nodesize != sctx->leafsize);
    1748                 :          0 :         len = sctx->nodesize - BTRFS_CSUM_SIZE;
    1749                 :            :         mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
    1750                 :          0 :         p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
    1751                 :            :         index = 0;
    1752                 :            :         for (;;) {
    1753                 :          0 :                 u64 l = min_t(u64, len, mapped_size);
    1754                 :            : 
    1755                 :          0 :                 crc = btrfs_csum_data(p, crc, l);
    1756                 :          0 :                 kunmap_atomic(mapped_buffer);
    1757                 :          0 :                 len -= l;
    1758         [ #  # ]:          0 :                 if (len == 0)
    1759                 :            :                         break;
    1760                 :          0 :                 index++;
    1761         [ #  # ]:          0 :                 BUG_ON(index >= sblock->page_count);
    1762         [ #  # ]:          0 :                 BUG_ON(!sblock->pagev[index]->page);
    1763                 :            :                 page = sblock->pagev[index]->page;
    1764                 :          0 :                 mapped_buffer = kmap_atomic(page);
    1765                 :            :                 mapped_size = PAGE_SIZE;
    1766                 :            :                 p = mapped_buffer;
    1767                 :          0 :         }
    1768                 :            : 
    1769                 :          0 :         btrfs_csum_final(crc, calculated_csum);
    1770         [ #  # ]:          0 :         if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
    1771                 :            :                 ++crc_fail;
    1772                 :            : 
    1773                 :          0 :         return fail || crc_fail;
    1774                 :            : }
    1775                 :            : 
    1776                 :          0 : static int scrub_checksum_super(struct scrub_block *sblock)
    1777                 :            : {
    1778                 :            :         struct btrfs_super_block *s;
    1779                 :          0 :         struct scrub_ctx *sctx = sblock->sctx;
    1780                 :          0 :         struct btrfs_root *root = sctx->dev_root;
    1781                 :          0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1782                 :            :         u8 calculated_csum[BTRFS_CSUM_SIZE];
    1783                 :            :         u8 on_disk_csum[BTRFS_CSUM_SIZE];
    1784                 :            :         struct page *page;
    1785                 :            :         void *mapped_buffer;
    1786                 :            :         u64 mapped_size;
    1787                 :            :         void *p;
    1788                 :            :         u32 crc = ~(u32)0;
    1789                 :            :         int fail_gen = 0;
    1790                 :            :         int fail_cor = 0;
    1791                 :            :         u64 len;
    1792                 :            :         int index;
    1793                 :            : 
    1794         [ #  # ]:          0 :         BUG_ON(sblock->page_count < 1);
    1795                 :          0 :         page = sblock->pagev[0]->page;
    1796                 :          0 :         mapped_buffer = kmap_atomic(page);
    1797                 :            :         s = (struct btrfs_super_block *)mapped_buffer;
    1798                 :          0 :         memcpy(on_disk_csum, s->csum, sctx->csum_size);
    1799                 :            : 
    1800         [ #  # ]:          0 :         if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
    1801                 :            :                 ++fail_cor;
    1802                 :            : 
    1803         [ #  # ]:          0 :         if (sblock->pagev[0]->generation != btrfs_super_generation(s))
    1804                 :            :                 ++fail_gen;
    1805                 :            : 
    1806         [ #  # ]:          0 :         if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
    1807                 :          0 :                 ++fail_cor;
    1808                 :            : 
    1809                 :            :         len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
    1810                 :            :         mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
    1811                 :          0 :         p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
    1812                 :            :         index = 0;
    1813                 :            :         for (;;) {
    1814                 :            :                 u64 l = min_t(u64, len, mapped_size);
    1815                 :            : 
    1816                 :          0 :                 crc = btrfs_csum_data(p, crc, l);
    1817                 :          0 :                 kunmap_atomic(mapped_buffer);
    1818                 :            :                 len -= l;
    1819                 :            :                 if (len == 0)
    1820                 :            :                         break;
    1821                 :            :                 index++;
    1822                 :            :                 BUG_ON(index >= sblock->page_count);
    1823                 :            :                 BUG_ON(!sblock->pagev[index]->page);
    1824                 :            :                 page = sblock->pagev[index]->page;
    1825                 :            :                 mapped_buffer = kmap_atomic(page);
    1826                 :            :                 mapped_size = PAGE_SIZE;
    1827                 :            :                 p = mapped_buffer;
    1828                 :            :         }
    1829                 :            : 
    1830                 :          0 :         btrfs_csum_final(crc, calculated_csum);
    1831         [ #  # ]:          0 :         if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
    1832                 :          0 :                 ++fail_cor;
    1833                 :            : 
    1834         [ #  # ]:          0 :         if (fail_cor + fail_gen) {
    1835                 :            :                 /*
    1836                 :            :                  * if we find an error in a super block, we just report it.
    1837                 :            :                  * They will get written with the next transaction commit
    1838                 :            :                  * anyway
    1839                 :            :                  */
    1840                 :            :                 spin_lock(&sctx->stat_lock);
    1841                 :          0 :                 ++sctx->stat.super_errors;
    1842                 :            :                 spin_unlock(&sctx->stat_lock);
    1843         [ #  # ]:          0 :                 if (fail_cor)
    1844                 :          0 :                         btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
    1845                 :            :                                 BTRFS_DEV_STAT_CORRUPTION_ERRS);
    1846                 :            :                 else
    1847                 :          0 :                         btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
    1848                 :            :                                 BTRFS_DEV_STAT_GENERATION_ERRS);
    1849                 :            :         }
    1850                 :            : 
    1851                 :          0 :         return fail_cor + fail_gen;
    1852                 :            : }
    1853                 :            : 
    1854                 :            : static void scrub_block_get(struct scrub_block *sblock)
    1855                 :            : {
    1856                 :          0 :         atomic_inc(&sblock->ref_count);
    1857                 :            : }
    1858                 :            : 
    1859                 :          0 : static void scrub_block_put(struct scrub_block *sblock)
    1860                 :            : {
    1861         [ #  # ]:          0 :         if (atomic_dec_and_test(&sblock->ref_count)) {
    1862                 :            :                 int i;
    1863                 :            : 
    1864         [ #  # ]:          0 :                 for (i = 0; i < sblock->page_count; i++)
    1865                 :          0 :                         scrub_page_put(sblock->pagev[i]);
    1866                 :          0 :                 kfree(sblock);
    1867                 :            :         }
    1868                 :          0 : }
    1869                 :            : 
    1870                 :            : static void scrub_page_get(struct scrub_page *spage)
    1871                 :            : {
    1872                 :          0 :         atomic_inc(&spage->ref_count);
    1873                 :            : }
    1874                 :            : 
    1875                 :          0 : static void scrub_page_put(struct scrub_page *spage)
    1876                 :            : {
    1877         [ #  # ]:          0 :         if (atomic_dec_and_test(&spage->ref_count)) {
    1878         [ #  # ]:          0 :                 if (spage->page)
    1879                 :          0 :                         __free_page(spage->page);
    1880                 :          0 :                 kfree(spage);
    1881                 :            :         }
    1882                 :          0 : }
    1883                 :            : 
    1884                 :          0 : static void scrub_submit(struct scrub_ctx *sctx)
    1885                 :            : {
    1886                 :            :         struct scrub_bio *sbio;
    1887                 :            : 
    1888         [ #  # ]:          0 :         if (sctx->curr == -1)
    1889                 :          0 :                 return;
    1890                 :            : 
    1891                 :          0 :         sbio = sctx->bios[sctx->curr];
    1892                 :          0 :         sctx->curr = -1;
    1893                 :            :         scrub_pending_bio_inc(sctx);
    1894                 :            : 
    1895         [ #  # ]:          0 :         if (!sbio->bio->bi_bdev) {
    1896                 :            :                 /*
    1897                 :            :                  * this case should not happen. If btrfs_map_block() is
    1898                 :            :                  * wrong, it could happen for dev-replace operations on
    1899                 :            :                  * missing devices when no mirrors are available, but in
    1900                 :            :                  * this case it should already fail the mount.
    1901                 :            :                  * This case is handled correctly (but _very_ slowly).
    1902                 :            :                  */
    1903         [ #  # ]:          0 :                 printk_ratelimited(KERN_WARNING
    1904                 :            :                         "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
    1905                 :          0 :                 bio_endio(sbio->bio, -EIO);
    1906                 :            :         } else {
    1907                 :          0 :                 btrfsic_submit_bio(READ, sbio->bio);
    1908                 :            :         }
    1909                 :            : }
    1910                 :            : 
    1911                 :          0 : static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
    1912                 :            :                                     struct scrub_page *spage)
    1913                 :            : {
    1914                 :          0 :         struct scrub_block *sblock = spage->sblock;
    1915                 :            :         struct scrub_bio *sbio;
    1916                 :            :         int ret;
    1917                 :            : 
    1918                 :            : again:
    1919                 :            :         /*
    1920                 :            :          * grab a fresh bio or wait for one to become available
    1921                 :            :          */
    1922         [ #  # ]:          0 :         while (sctx->curr == -1) {
    1923                 :            :                 spin_lock(&sctx->list_lock);
    1924                 :          0 :                 sctx->curr = sctx->first_free;
    1925         [ #  # ]:          0 :                 if (sctx->curr != -1) {
    1926                 :          0 :                         sctx->first_free = sctx->bios[sctx->curr]->next_free;
    1927                 :          0 :                         sctx->bios[sctx->curr]->next_free = -1;
    1928                 :          0 :                         sctx->bios[sctx->curr]->page_count = 0;
    1929                 :            :                         spin_unlock(&sctx->list_lock);
    1930                 :            :                 } else {
    1931                 :            :                         spin_unlock(&sctx->list_lock);
    1932 [ #  # ][ #  # ]:          0 :                         wait_event(sctx->list_wait, sctx->first_free != -1);
    1933                 :            :                 }
    1934                 :            :         }
    1935                 :          0 :         sbio = sctx->bios[sctx->curr];
    1936         [ #  # ]:          0 :         if (sbio->page_count == 0) {
    1937                 :            :                 struct bio *bio;
    1938                 :            : 
    1939                 :          0 :                 sbio->physical = spage->physical;
    1940                 :          0 :                 sbio->logical = spage->logical;
    1941                 :          0 :                 sbio->dev = spage->dev;
    1942                 :          0 :                 bio = sbio->bio;
    1943         [ #  # ]:          0 :                 if (!bio) {
    1944                 :          0 :                         bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
    1945         [ #  # ]:          0 :                         if (!bio)
    1946                 :            :                                 return -ENOMEM;
    1947                 :          0 :                         sbio->bio = bio;
    1948                 :            :                 }
    1949                 :            : 
    1950                 :          0 :                 bio->bi_private = sbio;
    1951                 :          0 :                 bio->bi_end_io = scrub_bio_end_io;
    1952                 :          0 :                 bio->bi_bdev = sbio->dev->bdev;
    1953                 :          0 :                 bio->bi_iter.bi_sector = sbio->physical >> 9;
    1954                 :          0 :                 sbio->err = 0;
    1955         [ #  # ]:          0 :         } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
    1956         [ #  # ]:          0 :                    spage->physical ||
    1957                 :          0 :                    sbio->logical + sbio->page_count * PAGE_SIZE !=
    1958         [ #  # ]:          0 :                    spage->logical ||
    1959                 :          0 :                    sbio->dev != spage->dev) {
    1960                 :          0 :                 scrub_submit(sctx);
    1961                 :          0 :                 goto again;
    1962                 :            :         }
    1963                 :            : 
    1964                 :          0 :         sbio->pagev[sbio->page_count] = spage;
    1965                 :          0 :         ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
    1966         [ #  # ]:          0 :         if (ret != PAGE_SIZE) {
    1967         [ #  # ]:          0 :                 if (sbio->page_count < 1) {
    1968                 :          0 :                         bio_put(sbio->bio);
    1969                 :          0 :                         sbio->bio = NULL;
    1970                 :          0 :                         return -EIO;
    1971                 :            :                 }
    1972                 :          0 :                 scrub_submit(sctx);
    1973                 :          0 :                 goto again;
    1974                 :            :         }
    1975                 :            : 
    1976                 :            :         scrub_block_get(sblock); /* one for the page added to the bio */
    1977                 :          0 :         atomic_inc(&sblock->outstanding_pages);
    1978                 :          0 :         sbio->page_count++;
    1979         [ #  # ]:          0 :         if (sbio->page_count == sctx->pages_per_rd_bio)
    1980                 :          0 :                 scrub_submit(sctx);
    1981                 :            : 
    1982                 :            :         return 0;
    1983                 :            : }
    1984                 :            : 
    1985                 :          0 : static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
    1986                 :            :                        u64 physical, struct btrfs_device *dev, u64 flags,
    1987                 :            :                        u64 gen, int mirror_num, u8 *csum, int force,
    1988                 :            :                        u64 physical_for_dev_replace)
    1989                 :            : {
    1990                 :            :         struct scrub_block *sblock;
    1991                 :            :         int index;
    1992                 :            : 
    1993                 :            :         sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
    1994         [ #  # ]:          0 :         if (!sblock) {
    1995                 :            :                 spin_lock(&sctx->stat_lock);
    1996                 :          0 :                 sctx->stat.malloc_errors++;
    1997                 :            :                 spin_unlock(&sctx->stat_lock);
    1998                 :          0 :                 return -ENOMEM;
    1999                 :            :         }
    2000                 :            : 
    2001                 :            :         /* one ref inside this function, plus one for each page added to
    2002                 :            :          * a bio later on */
    2003                 :          0 :         atomic_set(&sblock->ref_count, 1);
    2004                 :          0 :         sblock->sctx = sctx;
    2005                 :          0 :         sblock->no_io_error_seen = 1;
    2006                 :            : 
    2007         [ #  # ]:          0 :         for (index = 0; len > 0; index++) {
    2008                 :            :                 struct scrub_page *spage;
    2009                 :          0 :                 u64 l = min_t(u64, len, PAGE_SIZE);
    2010                 :            : 
    2011                 :            :                 spage = kzalloc(sizeof(*spage), GFP_NOFS);
    2012         [ #  # ]:          0 :                 if (!spage) {
    2013                 :            : leave_nomem:
    2014                 :            :                         spin_lock(&sctx->stat_lock);
    2015                 :          0 :                         sctx->stat.malloc_errors++;
    2016                 :            :                         spin_unlock(&sctx->stat_lock);
    2017                 :          0 :                         scrub_block_put(sblock);
    2018                 :          0 :                         return -ENOMEM;
    2019                 :            :                 }
    2020         [ #  # ]:          0 :                 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
    2021                 :            :                 scrub_page_get(spage);
    2022                 :          0 :                 sblock->pagev[index] = spage;
    2023                 :          0 :                 spage->sblock = sblock;
    2024                 :          0 :                 spage->dev = dev;
    2025                 :          0 :                 spage->flags = flags;
    2026                 :          0 :                 spage->generation = gen;
    2027                 :          0 :                 spage->logical = logical;
    2028                 :          0 :                 spage->physical = physical;
    2029                 :          0 :                 spage->physical_for_dev_replace = physical_for_dev_replace;
    2030                 :          0 :                 spage->mirror_num = mirror_num;
    2031         [ #  # ]:          0 :                 if (csum) {
    2032                 :          0 :                         spage->have_csum = 1;
    2033                 :          0 :                         memcpy(spage->csum, csum, sctx->csum_size);
    2034                 :            :                 } else {
    2035                 :          0 :                         spage->have_csum = 0;
    2036                 :            :                 }
    2037                 :          0 :                 sblock->page_count++;
    2038                 :          0 :                 spage->page = alloc_page(GFP_NOFS);
    2039         [ #  # ]:          0 :                 if (!spage->page)
    2040                 :            :                         goto leave_nomem;
    2041                 :          0 :                 len -= l;
    2042                 :          0 :                 logical += l;
    2043                 :          0 :                 physical += l;
    2044                 :          0 :                 physical_for_dev_replace += l;
    2045                 :            :         }
    2046                 :            : 
    2047         [ #  # ]:          0 :         WARN_ON(sblock->page_count == 0);
    2048         [ #  # ]:          0 :         for (index = 0; index < sblock->page_count; index++) {
    2049                 :          0 :                 struct scrub_page *spage = sblock->pagev[index];
    2050                 :            :                 int ret;
    2051                 :            : 
    2052                 :          0 :                 ret = scrub_add_page_to_rd_bio(sctx, spage);
    2053         [ #  # ]:          0 :                 if (ret) {
    2054                 :          0 :                         scrub_block_put(sblock);
    2055                 :          0 :                         return ret;
    2056                 :            :                 }
    2057                 :            :         }
    2058                 :            : 
    2059         [ #  # ]:          0 :         if (force)
    2060                 :          0 :                 scrub_submit(sctx);
    2061                 :            : 
    2062                 :            :         /* last one frees, either here or in bio completion for last page */
    2063                 :          0 :         scrub_block_put(sblock);
    2064                 :          0 :         return 0;
    2065                 :            : }
    2066                 :            : 
    2067                 :          0 : static void scrub_bio_end_io(struct bio *bio, int err)
    2068                 :            : {
    2069                 :          0 :         struct scrub_bio *sbio = bio->bi_private;
    2070                 :          0 :         struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
    2071                 :            : 
    2072                 :          0 :         sbio->err = err;
    2073                 :          0 :         sbio->bio = bio;
    2074                 :            : 
    2075                 :          0 :         btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
    2076                 :          0 : }
    2077                 :            : 
    2078                 :          0 : static void scrub_bio_end_io_worker(struct btrfs_work *work)
    2079                 :            : {
    2080                 :            :         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
    2081                 :          0 :         struct scrub_ctx *sctx = sbio->sctx;
    2082                 :            :         int i;
    2083                 :            : 
    2084         [ #  # ]:          0 :         BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
    2085         [ #  # ]:          0 :         if (sbio->err) {
    2086         [ #  # ]:          0 :                 for (i = 0; i < sbio->page_count; i++) {
    2087                 :          0 :                         struct scrub_page *spage = sbio->pagev[i];
    2088                 :            : 
    2089                 :          0 :                         spage->io_error = 1;
    2090                 :          0 :                         spage->sblock->no_io_error_seen = 0;
    2091                 :            :                 }
    2092                 :            :         }
    2093                 :            : 
    2094                 :            :         /* now complete the scrub_block items that have all pages completed */
    2095         [ #  # ]:          0 :         for (i = 0; i < sbio->page_count; i++) {
    2096                 :          0 :                 struct scrub_page *spage = sbio->pagev[i];
    2097                 :          0 :                 struct scrub_block *sblock = spage->sblock;
    2098                 :            : 
    2099         [ #  # ]:          0 :                 if (atomic_dec_and_test(&sblock->outstanding_pages))
    2100                 :          0 :                         scrub_block_complete(sblock);
    2101                 :          0 :                 scrub_block_put(sblock);
    2102                 :            :         }
    2103                 :            : 
    2104                 :          0 :         bio_put(sbio->bio);
    2105                 :          0 :         sbio->bio = NULL;
    2106                 :            :         spin_lock(&sctx->list_lock);
    2107                 :          0 :         sbio->next_free = sctx->first_free;
    2108                 :          0 :         sctx->first_free = sbio->index;
    2109                 :            :         spin_unlock(&sctx->list_lock);
    2110                 :            : 
    2111 [ #  # ][ #  # ]:          0 :         if (sctx->is_dev_replace &&
    2112                 :          0 :             atomic_read(&sctx->wr_ctx.flush_all_writes)) {
    2113                 :          0 :                 mutex_lock(&sctx->wr_ctx.wr_lock);
    2114                 :          0 :                 scrub_wr_submit(sctx);
    2115                 :          0 :                 mutex_unlock(&sctx->wr_ctx.wr_lock);
    2116                 :            :         }
    2117                 :            : 
    2118                 :          0 :         scrub_pending_bio_dec(sctx);
    2119                 :          0 : }
    2120                 :            : 
    2121                 :          0 : static void scrub_block_complete(struct scrub_block *sblock)
    2122                 :            : {
    2123         [ #  # ]:          0 :         if (!sblock->no_io_error_seen) {
    2124                 :          0 :                 scrub_handle_errored_block(sblock);
    2125                 :            :         } else {
    2126                 :            :                 /*
    2127                 :            :                  * if has checksum error, write via repair mechanism in
    2128                 :            :                  * dev replace case, otherwise write here in dev replace
    2129                 :            :                  * case.
    2130                 :            :                  */
    2131 [ #  # ][ #  # ]:          0 :                 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
    2132                 :          0 :                         scrub_write_block_to_dev_replace(sblock);
    2133                 :            :         }
    2134                 :          0 : }
    2135                 :            : 
    2136                 :          0 : static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
    2137                 :            :                            u8 *csum)
    2138                 :            : {
    2139                 :            :         struct btrfs_ordered_sum *sum = NULL;
    2140                 :            :         unsigned long index;
    2141                 :            :         unsigned long num_sectors;
    2142                 :            : 
    2143         [ #  # ]:          0 :         while (!list_empty(&sctx->csum_list)) {
    2144                 :          0 :                 sum = list_first_entry(&sctx->csum_list,
    2145                 :            :                                        struct btrfs_ordered_sum, list);
    2146         [ #  # ]:          0 :                 if (sum->bytenr > logical)
    2147                 :            :                         return 0;
    2148         [ #  # ]:          0 :                 if (sum->bytenr + sum->len > logical)
    2149                 :            :                         break;
    2150                 :            : 
    2151                 :          0 :                 ++sctx->stat.csum_discards;
    2152                 :            :                 list_del(&sum->list);
    2153                 :          0 :                 kfree(sum);
    2154                 :            :                 sum = NULL;
    2155                 :            :         }
    2156         [ #  # ]:          0 :         if (!sum)
    2157                 :            :                 return 0;
    2158                 :            : 
    2159                 :          0 :         index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
    2160                 :          0 :         num_sectors = sum->len / sctx->sectorsize;
    2161                 :          0 :         memcpy(csum, sum->sums + index, sctx->csum_size);
    2162         [ #  # ]:          0 :         if (index == num_sectors - 1) {
    2163                 :            :                 list_del(&sum->list);
    2164                 :          0 :                 kfree(sum);
    2165                 :            :         }
    2166                 :            :         return 1;
    2167                 :            : }
    2168                 :            : 
    2169                 :            : /* scrub extent tries to collect up to 64 kB for each bio */
    2170                 :          0 : static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
    2171                 :            :                         u64 physical, struct btrfs_device *dev, u64 flags,
    2172                 :            :                         u64 gen, int mirror_num, u64 physical_for_dev_replace)
    2173                 :            : {
    2174                 :            :         int ret;
    2175                 :            :         u8 csum[BTRFS_CSUM_SIZE];
    2176                 :            :         u32 blocksize;
    2177                 :            : 
    2178         [ #  # ]:          0 :         if (flags & BTRFS_EXTENT_FLAG_DATA) {
    2179                 :          0 :                 blocksize = sctx->sectorsize;
    2180                 :            :                 spin_lock(&sctx->stat_lock);
    2181                 :          0 :                 sctx->stat.data_extents_scrubbed++;
    2182                 :          0 :                 sctx->stat.data_bytes_scrubbed += len;
    2183                 :            :                 spin_unlock(&sctx->stat_lock);
    2184         [ #  # ]:          0 :         } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
    2185         [ #  # ]:          0 :                 WARN_ON(sctx->nodesize != sctx->leafsize);
    2186                 :          0 :                 blocksize = sctx->nodesize;
    2187                 :            :                 spin_lock(&sctx->stat_lock);
    2188                 :          0 :                 sctx->stat.tree_extents_scrubbed++;
    2189                 :          0 :                 sctx->stat.tree_bytes_scrubbed += len;
    2190                 :            :                 spin_unlock(&sctx->stat_lock);
    2191                 :            :         } else {
    2192                 :          0 :                 blocksize = sctx->sectorsize;
    2193                 :          0 :                 WARN_ON(1);
    2194                 :            :         }
    2195                 :            : 
    2196         [ #  # ]:          0 :         while (len) {
    2197                 :          0 :                 u64 l = min_t(u64, len, blocksize);
    2198                 :            :                 int have_csum = 0;
    2199                 :            : 
    2200         [ #  # ]:          0 :                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
    2201                 :            :                         /* push csums to sbio */
    2202                 :          0 :                         have_csum = scrub_find_csum(sctx, logical, l, csum);
    2203         [ #  # ]:          0 :                         if (have_csum == 0)
    2204                 :          0 :                                 ++sctx->stat.no_csum;
    2205 [ #  # ][ #  # ]:          0 :                         if (sctx->is_dev_replace && !have_csum) {
    2206                 :          0 :                                 ret = copy_nocow_pages(sctx, logical, l,
    2207                 :            :                                                        mirror_num,
    2208                 :            :                                                       physical_for_dev_replace);
    2209                 :          0 :                                 goto behind_scrub_pages;
    2210                 :            :                         }
    2211                 :            :                 }
    2212         [ #  # ]:          0 :                 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
    2213                 :            :                                   mirror_num, have_csum ? csum : NULL, 0,
    2214                 :            :                                   physical_for_dev_replace);
    2215                 :            : behind_scrub_pages:
    2216         [ #  # ]:          0 :                 if (ret)
    2217                 :            :                         return ret;
    2218                 :          0 :                 len -= l;
    2219                 :          0 :                 logical += l;
    2220                 :          0 :                 physical += l;
    2221                 :          0 :                 physical_for_dev_replace += l;
    2222                 :            :         }
    2223                 :            :         return 0;
    2224                 :            : }
    2225                 :            : 
    2226                 :          0 : static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
    2227                 :            :                                            struct map_lookup *map,
    2228                 :            :                                            struct btrfs_device *scrub_dev,
    2229                 :            :                                            int num, u64 base, u64 length,
    2230                 :            :                                            int is_dev_replace)
    2231                 :            : {
    2232                 :            :         struct btrfs_path *path;
    2233                 :          0 :         struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
    2234                 :          0 :         struct btrfs_root *root = fs_info->extent_root;
    2235                 :          0 :         struct btrfs_root *csum_root = fs_info->csum_root;
    2236                 :            :         struct btrfs_extent_item *extent;
    2237                 :            :         struct blk_plug plug;
    2238                 :            :         u64 flags;
    2239                 :            :         int ret;
    2240                 :            :         int slot;
    2241                 :            :         u64 nstripes;
    2242                 :          0 :         struct extent_buffer *l;
    2243                 :            :         struct btrfs_key key;
    2244                 :            :         u64 physical;
    2245                 :            :         u64 logical;
    2246                 :            :         u64 logic_end;
    2247                 :            :         u64 generation;
    2248                 :            :         int mirror_num;
    2249                 :            :         struct reada_control *reada1;
    2250                 :            :         struct reada_control *reada2;
    2251                 :            :         struct btrfs_key key_start;
    2252                 :            :         struct btrfs_key key_end;
    2253                 :            :         u64 increment = map->stripe_len;
    2254                 :            :         u64 offset;
    2255                 :            :         u64 extent_logical;
    2256                 :            :         u64 extent_physical;
    2257                 :            :         u64 extent_len;
    2258                 :            :         struct btrfs_device *extent_dev;
    2259                 :            :         int extent_mirror_num;
    2260                 :            :         int stop_loop;
    2261                 :            : 
    2262         [ #  # ]:          0 :         if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
    2263                 :            :                          BTRFS_BLOCK_GROUP_RAID6)) {
    2264         [ #  # ]:          0 :                 if (num >= nr_data_stripes(map)) {
    2265                 :            :                         return 0;
    2266                 :            :                 }
    2267                 :            :         }
    2268                 :            : 
    2269                 :            :         nstripes = length;
    2270                 :            :         offset = 0;
    2271 [ #  # ][ #  # ]:          0 :         do_div(nstripes, map->stripe_len);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
    2272         [ #  # ]:          0 :         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
    2273                 :          0 :                 offset = map->stripe_len * num;
    2274                 :          0 :                 increment = map->stripe_len * map->num_stripes;
    2275                 :            :                 mirror_num = 1;
    2276         [ #  # ]:          0 :         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
    2277                 :          0 :                 int factor = map->num_stripes / map->sub_stripes;
    2278                 :          0 :                 offset = map->stripe_len * (num / map->sub_stripes);
    2279                 :          0 :                 increment = map->stripe_len * factor;
    2280                 :          0 :                 mirror_num = num % map->sub_stripes + 1;
    2281         [ #  # ]:          0 :         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
    2282                 :          0 :                 increment = map->stripe_len;
    2283                 :          0 :                 mirror_num = num % map->num_stripes + 1;
    2284         [ #  # ]:          0 :         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
    2285                 :          0 :                 increment = map->stripe_len;
    2286                 :          0 :                 mirror_num = num % map->num_stripes + 1;
    2287                 :            :         } else {
    2288                 :          0 :                 increment = map->stripe_len;
    2289                 :            :                 mirror_num = 1;
    2290                 :            :         }
    2291                 :            : 
    2292                 :          0 :         path = btrfs_alloc_path();
    2293         [ #  # ]:          0 :         if (!path)
    2294                 :            :                 return -ENOMEM;
    2295                 :            : 
    2296                 :            :         /*
    2297                 :            :          * work on commit root. The related disk blocks are static as
    2298                 :            :          * long as COW is applied. This means, it is save to rewrite
    2299                 :            :          * them to repair disk errors without any race conditions
    2300                 :            :          */
    2301                 :          0 :         path->search_commit_root = 1;
    2302                 :          0 :         path->skip_locking = 1;
    2303                 :            : 
    2304                 :            :         /*
    2305                 :            :          * trigger the readahead for extent tree csum tree and wait for
    2306                 :            :          * completion. During readahead, the scrub is officially paused
    2307                 :            :          * to not hold off transaction commits
    2308                 :            :          */
    2309                 :          0 :         logical = base + offset;
    2310                 :            : 
    2311 [ #  # ][ #  # ]:          0 :         wait_event(sctx->list_wait,
    2312                 :            :                    atomic_read(&sctx->bios_in_flight) == 0);
    2313                 :          0 :         scrub_blocked_if_needed(fs_info);
    2314                 :            : 
    2315                 :            :         /* FIXME it might be better to start readahead at commit root */
    2316                 :          0 :         key_start.objectid = logical;
    2317                 :          0 :         key_start.type = BTRFS_EXTENT_ITEM_KEY;
    2318                 :          0 :         key_start.offset = (u64)0;
    2319                 :          0 :         key_end.objectid = base + offset + nstripes * increment;
    2320                 :          0 :         key_end.type = BTRFS_METADATA_ITEM_KEY;
    2321                 :          0 :         key_end.offset = (u64)-1;
    2322                 :          0 :         reada1 = btrfs_reada_add(root, &key_start, &key_end);
    2323                 :            : 
    2324                 :          0 :         key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
    2325                 :          0 :         key_start.type = BTRFS_EXTENT_CSUM_KEY;
    2326                 :          0 :         key_start.offset = logical;
    2327                 :          0 :         key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
    2328                 :          0 :         key_end.type = BTRFS_EXTENT_CSUM_KEY;
    2329                 :          0 :         key_end.offset = base + offset + nstripes * increment;
    2330                 :          0 :         reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
    2331                 :            : 
    2332         [ #  # ]:          0 :         if (!IS_ERR(reada1))
    2333                 :          0 :                 btrfs_reada_wait(reada1);
    2334         [ #  # ]:          0 :         if (!IS_ERR(reada2))
    2335                 :          0 :                 btrfs_reada_wait(reada2);
    2336                 :            : 
    2337                 :            : 
    2338                 :            :         /*
    2339                 :            :          * collect all data csums for the stripe to avoid seeking during
    2340                 :            :          * the scrub. This might currently (crc32) end up to be about 1MB
    2341                 :            :          */
    2342                 :          0 :         blk_start_plug(&plug);
    2343                 :            : 
    2344                 :            :         /*
    2345                 :            :          * now find all extents for each stripe and scrub them
    2346                 :            :          */
    2347                 :            :         logical = base + offset;
    2348                 :          0 :         physical = map->stripes[num].physical;
    2349                 :            :         logic_end = logical + increment * nstripes;
    2350                 :            :         ret = 0;
    2351         [ #  # ]:          0 :         while (logical < logic_end) {
    2352                 :            :                 /*
    2353                 :            :                  * canceled?
    2354                 :            :                  */
    2355 [ #  # ][ #  # ]:          0 :                 if (atomic_read(&fs_info->scrub_cancel_req) ||
    2356                 :          0 :                     atomic_read(&sctx->cancel_req)) {
    2357                 :            :                         ret = -ECANCELED;
    2358                 :            :                         goto out;
    2359                 :            :                 }
    2360                 :            :                 /*
    2361                 :            :                  * check to see if we have to pause
    2362                 :            :                  */
    2363         [ #  # ]:          0 :                 if (atomic_read(&fs_info->scrub_pause_req)) {
    2364                 :            :                         /* push queued extents */
    2365                 :          0 :                         atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
    2366                 :          0 :                         scrub_submit(sctx);
    2367                 :          0 :                         mutex_lock(&sctx->wr_ctx.wr_lock);
    2368                 :          0 :                         scrub_wr_submit(sctx);
    2369                 :          0 :                         mutex_unlock(&sctx->wr_ctx.wr_lock);
    2370 [ #  # ][ #  # ]:          0 :                         wait_event(sctx->list_wait,
    2371                 :            :                                    atomic_read(&sctx->bios_in_flight) == 0);
    2372                 :          0 :                         atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
    2373                 :          0 :                         scrub_blocked_if_needed(fs_info);
    2374                 :            :                 }
    2375                 :            : 
    2376         [ #  # ]:          0 :                 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
    2377                 :          0 :                         key.type = BTRFS_METADATA_ITEM_KEY;
    2378                 :            :                 else
    2379                 :          0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    2380                 :          0 :                 key.objectid = logical;
    2381                 :          0 :                 key.offset = (u64)-1;
    2382                 :            : 
    2383                 :          0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    2384         [ #  # ]:          0 :                 if (ret < 0)
    2385                 :            :                         goto out;
    2386                 :            : 
    2387         [ #  # ]:          0 :                 if (ret > 0) {
    2388                 :          0 :                         ret = btrfs_previous_extent_item(root, path, 0);
    2389         [ #  # ]:          0 :                         if (ret < 0)
    2390                 :            :                                 goto out;
    2391         [ #  # ]:          0 :                         if (ret > 0) {
    2392                 :            :                                 /* there's no smaller item, so stick with the
    2393                 :            :                                  * larger one */
    2394                 :          0 :                                 btrfs_release_path(path);
    2395                 :          0 :                                 ret = btrfs_search_slot(NULL, root, &key,
    2396                 :            :                                                         path, 0, 0);
    2397         [ #  # ]:          0 :                                 if (ret < 0)
    2398                 :            :                                         goto out;
    2399                 :            :                         }
    2400                 :            :                 }
    2401                 :            : 
    2402                 :            :                 stop_loop = 0;
    2403                 :            :                 while (1) {
    2404                 :            :                         u64 bytes;
    2405                 :            : 
    2406                 :          0 :                         l = path->nodes[0];
    2407                 :          0 :                         slot = path->slots[0];
    2408         [ #  # ]:          0 :                         if (slot >= btrfs_header_nritems(l)) {
    2409                 :          0 :                                 ret = btrfs_next_leaf(root, path);
    2410         [ #  # ]:          0 :                                 if (ret == 0)
    2411                 :          0 :                                         continue;
    2412         [ #  # ]:          0 :                                 if (ret < 0)
    2413                 :            :                                         goto out;
    2414                 :            : 
    2415                 :            :                                 stop_loop = 1;
    2416                 :            :                                 break;
    2417                 :            :                         }
    2418                 :            :                         btrfs_item_key_to_cpu(l, &key, slot);
    2419                 :            : 
    2420         [ #  # ]:          0 :                         if (key.type == BTRFS_METADATA_ITEM_KEY)
    2421                 :          0 :                                 bytes = root->leafsize;
    2422                 :            :                         else
    2423                 :            :                                 bytes = key.offset;
    2424                 :            : 
    2425         [ #  # ]:          0 :                         if (key.objectid + bytes <= logical)
    2426                 :            :                                 goto next;
    2427                 :            : 
    2428         [ #  # ]:          0 :                         if (key.type != BTRFS_EXTENT_ITEM_KEY &&
    2429                 :            :                             key.type != BTRFS_METADATA_ITEM_KEY)
    2430                 :            :                                 goto next;
    2431                 :            : 
    2432         [ #  # ]:          0 :                         if (key.objectid >= logical + map->stripe_len) {
    2433                 :            :                                 /* out of this device extent */
    2434         [ #  # ]:          0 :                                 if (key.objectid >= logic_end)
    2435                 :            :                                         stop_loop = 1;
    2436                 :            :                                 break;
    2437                 :            :                         }
    2438                 :            : 
    2439                 :          0 :                         extent = btrfs_item_ptr(l, slot,
    2440                 :            :                                                 struct btrfs_extent_item);
    2441                 :            :                         flags = btrfs_extent_flags(l, extent);
    2442                 :            :                         generation = btrfs_extent_generation(l, extent);
    2443                 :            : 
    2444 [ #  # ][ #  # ]:          0 :                         if (key.objectid < logical &&
    2445                 :          0 :                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
    2446                 :          0 :                                 btrfs_err(fs_info,
    2447                 :            :                                            "scrub: tree block %llu spanning "
    2448                 :            :                                            "stripes, ignored. logical=%llu",
    2449                 :            :                                        key.objectid, logical);
    2450                 :          0 :                                 goto next;
    2451                 :            :                         }
    2452                 :            : 
    2453                 :            : again:
    2454                 :          0 :                         extent_logical = key.objectid;
    2455                 :            :                         extent_len = bytes;
    2456                 :            : 
    2457                 :            :                         /*
    2458                 :            :                          * trim extent to this stripe
    2459                 :            :                          */
    2460         [ #  # ]:          0 :                         if (extent_logical < logical) {
    2461                 :          0 :                                 extent_len -= logical - extent_logical;
    2462                 :            :                                 extent_logical = logical;
    2463                 :            :                         }
    2464         [ #  # ]:          0 :                         if (extent_logical + extent_len >
    2465                 :          0 :                             logical + map->stripe_len) {
    2466                 :          0 :                                 extent_len = logical + map->stripe_len -
    2467                 :            :                                              extent_logical;
    2468                 :            :                         }
    2469                 :            : 
    2470                 :          0 :                         extent_physical = extent_logical - logical + physical;
    2471                 :          0 :                         extent_dev = scrub_dev;
    2472                 :          0 :                         extent_mirror_num = mirror_num;
    2473         [ #  # ]:          0 :                         if (is_dev_replace)
    2474                 :          0 :                                 scrub_remap_extent(fs_info, extent_logical,
    2475                 :            :                                                    extent_len, &extent_physical,
    2476                 :            :                                                    &extent_dev,
    2477                 :            :                                                    &extent_mirror_num);
    2478                 :            : 
    2479                 :          0 :                         ret = btrfs_lookup_csums_range(csum_root, logical,
    2480                 :          0 :                                                 logical + map->stripe_len - 1,
    2481                 :            :                                                 &sctx->csum_list, 1);
    2482         [ #  # ]:          0 :                         if (ret)
    2483                 :            :                                 goto out;
    2484                 :            : 
    2485                 :          0 :                         ret = scrub_extent(sctx, extent_logical, extent_len,
    2486                 :            :                                            extent_physical, extent_dev, flags,
    2487                 :            :                                            generation, extent_mirror_num,
    2488                 :            :                                            extent_logical - logical + physical);
    2489         [ #  # ]:          0 :                         if (ret)
    2490                 :            :                                 goto out;
    2491                 :            : 
    2492                 :          0 :                         scrub_free_csums(sctx);
    2493         [ #  # ]:          0 :                         if (extent_logical + extent_len <
    2494                 :          0 :                             key.objectid + bytes) {
    2495                 :          0 :                                 logical += increment;
    2496                 :          0 :                                 physical += map->stripe_len;
    2497                 :            : 
    2498         [ #  # ]:          0 :                                 if (logical < key.objectid + bytes) {
    2499                 :          0 :                                         cond_resched();
    2500                 :          0 :                                         goto again;
    2501                 :            :                                 }
    2502                 :            : 
    2503         [ #  # ]:          0 :                                 if (logical >= logic_end) {
    2504                 :            :                                         stop_loop = 1;
    2505                 :            :                                         break;
    2506                 :            :                                 }
    2507                 :            :                         }
    2508                 :            : next:
    2509                 :          0 :                         path->slots[0]++;
    2510                 :            :                 }
    2511                 :          0 :                 btrfs_release_path(path);
    2512                 :          0 :                 logical += increment;
    2513                 :          0 :                 physical += map->stripe_len;
    2514                 :            :                 spin_lock(&sctx->stat_lock);
    2515         [ #  # ]:          0 :                 if (stop_loop)
    2516                 :          0 :                         sctx->stat.last_physical = map->stripes[num].physical +
    2517                 :            :                                                    length;
    2518                 :            :                 else
    2519                 :          0 :                         sctx->stat.last_physical = physical;
    2520                 :            :                 spin_unlock(&sctx->stat_lock);
    2521         [ #  # ]:          0 :                 if (stop_loop)
    2522                 :            :                         break;
    2523                 :            :         }
    2524                 :            : out:
    2525                 :            :         /* push queued extents */
    2526                 :          0 :         scrub_submit(sctx);
    2527                 :          0 :         mutex_lock(&sctx->wr_ctx.wr_lock);
    2528                 :          0 :         scrub_wr_submit(sctx);
    2529                 :          0 :         mutex_unlock(&sctx->wr_ctx.wr_lock);
    2530                 :            : 
    2531                 :          0 :         blk_finish_plug(&plug);
    2532                 :          0 :         btrfs_free_path(path);
    2533                 :          0 :         return ret < 0 ? ret : 0;
    2534                 :            : }
    2535                 :            : 
    2536                 :          0 : static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
    2537                 :            :                                           struct btrfs_device *scrub_dev,
    2538                 :            :                                           u64 chunk_tree, u64 chunk_objectid,
    2539                 :            :                                           u64 chunk_offset, u64 length,
    2540                 :            :                                           u64 dev_offset, int is_dev_replace)
    2541                 :            : {
    2542                 :            :         struct btrfs_mapping_tree *map_tree =
    2543                 :          0 :                 &sctx->dev_root->fs_info->mapping_tree;
    2544                 :            :         struct map_lookup *map;
    2545                 :            :         struct extent_map *em;
    2546                 :            :         int i;
    2547                 :            :         int ret = 0;
    2548                 :            : 
    2549                 :          0 :         read_lock(&map_tree->map_tree.lock);
    2550                 :          0 :         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
    2551                 :            :         read_unlock(&map_tree->map_tree.lock);
    2552                 :            : 
    2553         [ #  # ]:          0 :         if (!em)
    2554                 :            :                 return -EINVAL;
    2555                 :            : 
    2556                 :          0 :         map = (struct map_lookup *)em->bdev;
    2557         [ #  # ]:          0 :         if (em->start != chunk_offset)
    2558                 :            :                 goto out;
    2559                 :            : 
    2560         [ #  # ]:          0 :         if (em->len < length)
    2561                 :            :                 goto out;
    2562                 :            : 
    2563         [ #  # ]:          0 :         for (i = 0; i < map->num_stripes; ++i) {
    2564 [ #  # ][ #  # ]:          0 :                 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
    2565                 :          0 :                     map->stripes[i].physical == dev_offset) {
    2566                 :          0 :                         ret = scrub_stripe(sctx, map, scrub_dev, i,
    2567                 :            :                                            chunk_offset, length,
    2568                 :            :                                            is_dev_replace);
    2569         [ #  # ]:          0 :                         if (ret)
    2570                 :            :                                 goto out;
    2571                 :            :                 }
    2572                 :            :         }
    2573                 :            : out:
    2574                 :          0 :         free_extent_map(em);
    2575                 :            : 
    2576                 :            :         return ret;
    2577                 :            : }
    2578                 :            : 
    2579                 :            : static noinline_for_stack
    2580                 :          0 : int scrub_enumerate_chunks(struct scrub_ctx *sctx,
    2581                 :            :                            struct btrfs_device *scrub_dev, u64 start, u64 end,
    2582                 :            :                            int is_dev_replace)
    2583                 :            : {
    2584                 :            :         struct btrfs_dev_extent *dev_extent = NULL;
    2585                 :            :         struct btrfs_path *path;
    2586                 :          0 :         struct btrfs_root *root = sctx->dev_root;
    2587                 :          0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2588                 :            :         u64 length;
    2589                 :            :         u64 chunk_tree;
    2590                 :            :         u64 chunk_objectid;
    2591                 :            :         u64 chunk_offset;
    2592                 :            :         int ret;
    2593                 :            :         int slot;
    2594                 :            :         struct extent_buffer *l;
    2595                 :            :         struct btrfs_key key;
    2596                 :            :         struct btrfs_key found_key;
    2597                 :            :         struct btrfs_block_group_cache *cache;
    2598                 :            :         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
    2599                 :            : 
    2600                 :          0 :         path = btrfs_alloc_path();
    2601         [ #  # ]:          0 :         if (!path)
    2602                 :            :                 return -ENOMEM;
    2603                 :            : 
    2604                 :          0 :         path->reada = 2;
    2605                 :          0 :         path->search_commit_root = 1;
    2606                 :          0 :         path->skip_locking = 1;
    2607                 :            : 
    2608                 :          0 :         key.objectid = scrub_dev->devid;
    2609                 :          0 :         key.offset = 0ull;
    2610                 :          0 :         key.type = BTRFS_DEV_EXTENT_KEY;
    2611                 :            : 
    2612                 :            :         while (1) {
    2613                 :          0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    2614         [ #  # ]:          0 :                 if (ret < 0)
    2615                 :            :                         break;
    2616         [ #  # ]:          0 :                 if (ret > 0) {
    2617         [ #  # ]:          0 :                         if (path->slots[0] >=
    2618                 :          0 :                             btrfs_header_nritems(path->nodes[0])) {
    2619                 :          0 :                                 ret = btrfs_next_leaf(root, path);
    2620         [ #  # ]:          0 :                                 if (ret)
    2621                 :            :                                         break;
    2622                 :            :                         }
    2623                 :            :                 }
    2624                 :            : 
    2625                 :          0 :                 l = path->nodes[0];
    2626                 :          0 :                 slot = path->slots[0];
    2627                 :            : 
    2628                 :            :                 btrfs_item_key_to_cpu(l, &found_key, slot);
    2629                 :            : 
    2630         [ #  # ]:          0 :                 if (found_key.objectid != scrub_dev->devid)
    2631                 :            :                         break;
    2632                 :            : 
    2633         [ #  # ]:          0 :                 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
    2634                 :            :                         break;
    2635                 :            : 
    2636         [ #  # ]:          0 :                 if (found_key.offset >= end)
    2637                 :            :                         break;
    2638                 :            : 
    2639         [ #  # ]:          0 :                 if (found_key.offset < key.offset)
    2640                 :            :                         break;
    2641                 :            : 
    2642                 :          0 :                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
    2643                 :            :                 length = btrfs_dev_extent_length(l, dev_extent);
    2644                 :            : 
    2645         [ #  # ]:          0 :                 if (found_key.offset + length <= start) {
    2646                 :          0 :                         key.offset = found_key.offset + length;
    2647                 :          0 :                         btrfs_release_path(path);
    2648                 :          0 :                         continue;
    2649                 :            :                 }
    2650                 :            : 
    2651                 :            :                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
    2652                 :            :                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
    2653                 :            :                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
    2654                 :            : 
    2655                 :            :                 /*
    2656                 :            :                  * get a reference on the corresponding block group to prevent
    2657                 :            :                  * the chunk from going away while we scrub it
    2658                 :            :                  */
    2659                 :          0 :                 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
    2660         [ #  # ]:          0 :                 if (!cache) {
    2661                 :            :                         ret = -ENOENT;
    2662                 :            :                         break;
    2663                 :            :                 }
    2664                 :          0 :                 dev_replace->cursor_right = found_key.offset + length;
    2665                 :          0 :                 dev_replace->cursor_left = found_key.offset;
    2666                 :          0 :                 dev_replace->item_needs_writeback = 1;
    2667                 :          0 :                 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
    2668                 :            :                                   chunk_offset, length, found_key.offset,
    2669                 :            :                                   is_dev_replace);
    2670                 :            : 
    2671                 :            :                 /*
    2672                 :            :                  * flush, submit all pending read and write bios, afterwards
    2673                 :            :                  * wait for them.
    2674                 :            :                  * Note that in the dev replace case, a read request causes
    2675                 :            :                  * write requests that are submitted in the read completion
    2676                 :            :                  * worker. Therefore in the current situation, it is required
    2677                 :            :                  * that all write requests are flushed, so that all read and
    2678                 :            :                  * write requests are really completed when bios_in_flight
    2679                 :            :                  * changes to 0.
    2680                 :            :                  */
    2681                 :          0 :                 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
    2682                 :          0 :                 scrub_submit(sctx);
    2683                 :          0 :                 mutex_lock(&sctx->wr_ctx.wr_lock);
    2684                 :          0 :                 scrub_wr_submit(sctx);
    2685                 :          0 :                 mutex_unlock(&sctx->wr_ctx.wr_lock);
    2686                 :            : 
    2687 [ #  # ][ #  # ]:          0 :                 wait_event(sctx->list_wait,
    2688                 :            :                            atomic_read(&sctx->bios_in_flight) == 0);
    2689                 :          0 :                 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
    2690 [ #  # ][ #  # ]:          0 :                 wait_event(sctx->list_wait,
    2691                 :            :                            atomic_read(&sctx->workers_pending) == 0);
    2692                 :          0 :                 scrub_blocked_if_needed(fs_info);
    2693                 :            : 
    2694                 :          0 :                 btrfs_put_block_group(cache);
    2695         [ #  # ]:          0 :                 if (ret)
    2696                 :            :                         break;
    2697   [ #  #  #  # ]:          0 :                 if (is_dev_replace &&
    2698                 :          0 :                     atomic64_read(&dev_replace->num_write_errors) > 0) {
    2699                 :            :                         ret = -EIO;
    2700                 :            :                         break;
    2701                 :            :                 }
    2702         [ #  # ]:          0 :                 if (sctx->stat.malloc_errors > 0) {
    2703                 :            :                         ret = -ENOMEM;
    2704                 :            :                         break;
    2705                 :            :                 }
    2706                 :            : 
    2707                 :          0 :                 dev_replace->cursor_left = dev_replace->cursor_right;
    2708                 :          0 :                 dev_replace->item_needs_writeback = 1;
    2709                 :            : 
    2710                 :          0 :                 key.offset = found_key.offset + length;
    2711                 :          0 :                 btrfs_release_path(path);
    2712                 :            :         }
    2713                 :            : 
    2714                 :          0 :         btrfs_free_path(path);
    2715                 :            : 
    2716                 :            :         /*
    2717                 :            :          * ret can still be 1 from search_slot or next_leaf,
    2718                 :            :          * that's not an error
    2719                 :            :          */
    2720                 :          0 :         return ret < 0 ? ret : 0;
    2721                 :            : }
    2722                 :            : 
    2723                 :          0 : static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
    2724                 :            :                                            struct btrfs_device *scrub_dev)
    2725                 :            : {
    2726                 :            :         int     i;
    2727                 :            :         u64     bytenr;
    2728                 :            :         u64     gen;
    2729                 :            :         int     ret;
    2730                 :          0 :         struct btrfs_root *root = sctx->dev_root;
    2731                 :            : 
    2732         [ #  # ]:          0 :         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
    2733                 :            :                 return -EIO;
    2734                 :            : 
    2735                 :          0 :         gen = root->fs_info->last_trans_committed;
    2736                 :            : 
    2737         [ #  # ]:          0 :         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
    2738                 :            :                 bytenr = btrfs_sb_offset(i);
    2739         [ #  # ]:          0 :                 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
    2740                 :            :                         break;
    2741                 :            : 
    2742                 :          0 :                 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
    2743                 :            :                                   scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
    2744                 :            :                                   NULL, 1, bytenr);
    2745         [ #  # ]:          0 :                 if (ret)
    2746                 :            :                         return ret;
    2747                 :            :         }
    2748 [ #  # ][ #  # ]:          0 :         wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
    2749                 :            : 
    2750                 :            :         return 0;
    2751                 :            : }
    2752                 :            : 
    2753                 :            : /*
    2754                 :            :  * get a reference count on fs_info->scrub_workers. start worker if necessary
    2755                 :            :  */
    2756                 :          0 : static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
    2757                 :            :                                                 int is_dev_replace)
    2758                 :            : {
    2759                 :            :         int ret = 0;
    2760                 :            : 
    2761         [ #  # ]:          0 :         if (fs_info->scrub_workers_refcnt == 0) {
    2762         [ #  # ]:          0 :                 if (is_dev_replace)
    2763                 :          0 :                         btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
    2764                 :            :                                         &fs_info->generic_worker);
    2765                 :            :                 else
    2766                 :          0 :                         btrfs_init_workers(&fs_info->scrub_workers, "scrub",
    2767                 :            :                                         fs_info->thread_pool_size,
    2768                 :            :                                         &fs_info->generic_worker);
    2769                 :          0 :                 fs_info->scrub_workers.idle_thresh = 4;
    2770                 :          0 :                 ret = btrfs_start_workers(&fs_info->scrub_workers);
    2771         [ #  # ]:          0 :                 if (ret)
    2772                 :            :                         goto out;
    2773                 :          0 :                 btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
    2774                 :            :                                    "scrubwrc",
    2775                 :            :                                    fs_info->thread_pool_size,
    2776                 :            :                                    &fs_info->generic_worker);
    2777                 :          0 :                 fs_info->scrub_wr_completion_workers.idle_thresh = 2;
    2778                 :          0 :                 ret = btrfs_start_workers(
    2779                 :            :                                 &fs_info->scrub_wr_completion_workers);
    2780         [ #  # ]:          0 :                 if (ret)
    2781                 :            :                         goto out;
    2782                 :          0 :                 btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
    2783                 :            :                                    &fs_info->generic_worker);
    2784                 :          0 :                 ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
    2785         [ #  # ]:          0 :                 if (ret)
    2786                 :            :                         goto out;
    2787                 :            :         }
    2788                 :          0 :         ++fs_info->scrub_workers_refcnt;
    2789                 :            : out:
    2790                 :          0 :         return ret;
    2791                 :            : }
    2792                 :            : 
    2793                 :          0 : static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
    2794                 :            : {
    2795         [ #  # ]:          0 :         if (--fs_info->scrub_workers_refcnt == 0) {
    2796                 :          0 :                 btrfs_stop_workers(&fs_info->scrub_workers);
    2797                 :          0 :                 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
    2798                 :          0 :                 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
    2799                 :            :         }
    2800         [ #  # ]:          0 :         WARN_ON(fs_info->scrub_workers_refcnt < 0);
    2801                 :          0 : }
    2802                 :            : 
    2803                 :          0 : int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
    2804                 :            :                     u64 end, struct btrfs_scrub_progress *progress,
    2805                 :            :                     int readonly, int is_dev_replace)
    2806                 :            : {
    2807                 :            :         struct scrub_ctx *sctx;
    2808                 :            :         int ret;
    2809                 :          0 :         struct btrfs_device *dev;
    2810                 :            : 
    2811         [ #  # ]:          0 :         if (btrfs_fs_closing(fs_info))
    2812                 :            :                 return -EINVAL;
    2813                 :            : 
    2814                 :            :         /*
    2815                 :            :          * check some assumptions
    2816                 :            :          */
    2817         [ #  # ]:          0 :         if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
    2818                 :          0 :                 btrfs_err(fs_info,
    2819                 :            :                            "scrub: size assumption nodesize == leafsize (%d == %d) fails",
    2820                 :            :                        fs_info->chunk_root->nodesize,
    2821                 :            :                        fs_info->chunk_root->leafsize);
    2822                 :          0 :                 return -EINVAL;
    2823                 :            :         }
    2824                 :            : 
    2825         [ #  # ]:          0 :         if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
    2826                 :            :                 /*
    2827                 :            :                  * in this case scrub is unable to calculate the checksum
    2828                 :            :                  * the way scrub is implemented. Do not handle this
    2829                 :            :                  * situation at all because it won't ever happen.
    2830                 :            :                  */
    2831                 :          0 :                 btrfs_err(fs_info,
    2832                 :            :                            "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
    2833                 :            :                        fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
    2834                 :          0 :                 return -EINVAL;
    2835                 :            :         }
    2836                 :            : 
    2837         [ #  # ]:          0 :         if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
    2838                 :            :                 /* not supported for data w/o checksums */
    2839                 :          0 :                 btrfs_err(fs_info,
    2840                 :            :                            "scrub: size assumption sectorsize != PAGE_SIZE "
    2841                 :            :                            "(%d != %lu) fails",
    2842                 :            :                        fs_info->chunk_root->sectorsize, PAGE_SIZE);
    2843                 :          0 :                 return -EINVAL;
    2844                 :            :         }
    2845                 :            : 
    2846         [ #  # ]:          0 :         if (fs_info->chunk_root->nodesize >
    2847         [ #  # ]:          0 :             PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
    2848                 :            :             fs_info->chunk_root->sectorsize >
    2849                 :            :             PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
    2850                 :            :                 /*
    2851                 :            :                  * would exhaust the array bounds of pagev member in
    2852                 :            :                  * struct scrub_block
    2853                 :            :                  */
    2854                 :          0 :                 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
    2855                 :            :                            "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
    2856                 :            :                        fs_info->chunk_root->nodesize,
    2857                 :            :                        SCRUB_MAX_PAGES_PER_BLOCK,
    2858                 :            :                        fs_info->chunk_root->sectorsize,
    2859                 :            :                        SCRUB_MAX_PAGES_PER_BLOCK);
    2860                 :          0 :                 return -EINVAL;
    2861                 :            :         }
    2862                 :            : 
    2863                 :            : 
    2864                 :          0 :         mutex_lock(&fs_info->fs_devices->device_list_mutex);
    2865                 :          0 :         dev = btrfs_find_device(fs_info, devid, NULL, NULL);
    2866 [ #  # ][ #  # ]:          0 :         if (!dev || (dev->missing && !is_dev_replace)) {
                 [ #  # ]
    2867                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2868                 :          0 :                 return -ENODEV;
    2869                 :            :         }
    2870                 :            : 
    2871                 :          0 :         mutex_lock(&fs_info->scrub_lock);
    2872 [ #  # ][ #  # ]:          0 :         if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
    2873                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2874                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2875                 :          0 :                 return -EIO;
    2876                 :            :         }
    2877                 :            : 
    2878                 :          0 :         btrfs_dev_replace_lock(&fs_info->dev_replace);
    2879 [ #  # ][ #  # ]:          0 :         if (dev->scrub_device ||
    2880         [ #  # ]:          0 :             (!is_dev_replace &&
    2881                 :          0 :              btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
    2882                 :          0 :                 btrfs_dev_replace_unlock(&fs_info->dev_replace);
    2883                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2884                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2885                 :          0 :                 return -EINPROGRESS;
    2886                 :            :         }
    2887                 :          0 :         btrfs_dev_replace_unlock(&fs_info->dev_replace);
    2888                 :            : 
    2889                 :          0 :         ret = scrub_workers_get(fs_info, is_dev_replace);
    2890         [ #  # ]:          0 :         if (ret) {
    2891                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2892                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2893                 :          0 :                 return ret;
    2894                 :            :         }
    2895                 :            : 
    2896                 :          0 :         sctx = scrub_setup_ctx(dev, is_dev_replace);
    2897         [ #  # ]:          0 :         if (IS_ERR(sctx)) {
    2898                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2899                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2900                 :          0 :                 scrub_workers_put(fs_info);
    2901                 :          0 :                 return PTR_ERR(sctx);
    2902                 :            :         }
    2903                 :          0 :         sctx->readonly = readonly;
    2904                 :          0 :         dev->scrub_device = sctx;
    2905                 :          0 :         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2906                 :            : 
    2907                 :            :         /*
    2908                 :            :          * checking @scrub_pause_req here, we can avoid
    2909                 :            :          * race between committing transaction and scrubbing.
    2910                 :            :          */
    2911                 :          0 :         __scrub_blocked_if_needed(fs_info);
    2912                 :          0 :         atomic_inc(&fs_info->scrubs_running);
    2913                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
    2914                 :            : 
    2915         [ #  # ]:          0 :         if (!is_dev_replace) {
    2916                 :            :                 /*
    2917                 :            :                  * by holding device list mutex, we can
    2918                 :            :                  * kick off writing super in log tree sync.
    2919                 :            :                  */
    2920                 :          0 :                 mutex_lock(&fs_info->fs_devices->device_list_mutex);
    2921                 :          0 :                 ret = scrub_supers(sctx, dev);
    2922                 :          0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    2923                 :            :         }
    2924                 :            : 
    2925         [ #  # ]:          0 :         if (!ret)
    2926                 :          0 :                 ret = scrub_enumerate_chunks(sctx, dev, start, end,
    2927                 :            :                                              is_dev_replace);
    2928                 :            : 
    2929 [ #  # ][ #  # ]:          0 :         wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
    2930                 :            :         atomic_dec(&fs_info->scrubs_running);
    2931                 :          0 :         wake_up(&fs_info->scrub_pause_wait);
    2932                 :            : 
    2933 [ #  # ][ #  # ]:          0 :         wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
    2934                 :            : 
    2935         [ #  # ]:          0 :         if (progress)
    2936                 :          0 :                 memcpy(progress, &sctx->stat, sizeof(*progress));
    2937                 :            : 
    2938                 :          0 :         mutex_lock(&fs_info->scrub_lock);
    2939                 :          0 :         dev->scrub_device = NULL;
    2940                 :          0 :         scrub_workers_put(fs_info);
    2941                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
    2942                 :            : 
    2943                 :          0 :         scrub_free_ctx(sctx);
    2944                 :            : 
    2945                 :          0 :         return ret;
    2946                 :            : }
    2947                 :            : 
    2948                 :          0 : void btrfs_scrub_pause(struct btrfs_root *root)
    2949                 :            : {
    2950                 :          0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2951                 :            : 
    2952                 :          0 :         mutex_lock(&fs_info->scrub_lock);
    2953                 :          0 :         atomic_inc(&fs_info->scrub_pause_req);
    2954         [ #  # ]:          0 :         while (atomic_read(&fs_info->scrubs_paused) !=
    2955                 :          0 :                atomic_read(&fs_info->scrubs_running)) {
    2956                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2957 [ #  # ][ #  # ]:          0 :                 wait_event(fs_info->scrub_pause_wait,
    2958                 :            :                            atomic_read(&fs_info->scrubs_paused) ==
    2959                 :            :                            atomic_read(&fs_info->scrubs_running));
    2960                 :          0 :                 mutex_lock(&fs_info->scrub_lock);
    2961                 :            :         }
    2962                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
    2963                 :          0 : }
    2964                 :            : 
    2965                 :          0 : void btrfs_scrub_continue(struct btrfs_root *root)
    2966                 :            : {
    2967                 :          0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2968                 :            : 
    2969                 :          0 :         atomic_dec(&fs_info->scrub_pause_req);
    2970                 :          0 :         wake_up(&fs_info->scrub_pause_wait);
    2971                 :          0 : }
    2972                 :            : 
    2973                 :          0 : int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
    2974                 :            : {
    2975                 :          0 :         mutex_lock(&fs_info->scrub_lock);
    2976         [ #  # ]:          0 :         if (!atomic_read(&fs_info->scrubs_running)) {
    2977                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2978                 :          0 :                 return -ENOTCONN;
    2979                 :            :         }
    2980                 :            : 
    2981                 :          0 :         atomic_inc(&fs_info->scrub_cancel_req);
    2982         [ #  # ]:          0 :         while (atomic_read(&fs_info->scrubs_running)) {
    2983                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    2984 [ #  # ][ #  # ]:          0 :                 wait_event(fs_info->scrub_pause_wait,
    2985                 :            :                            atomic_read(&fs_info->scrubs_running) == 0);
    2986                 :          0 :                 mutex_lock(&fs_info->scrub_lock);
    2987                 :            :         }
    2988                 :            :         atomic_dec(&fs_info->scrub_cancel_req);
    2989                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
    2990                 :            : 
    2991                 :          0 :         return 0;
    2992                 :            : }
    2993                 :            : 
    2994                 :          0 : int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
    2995                 :            :                            struct btrfs_device *dev)
    2996                 :            : {
    2997                 :            :         struct scrub_ctx *sctx;
    2998                 :            : 
    2999                 :          0 :         mutex_lock(&fs_info->scrub_lock);
    3000                 :          0 :         sctx = dev->scrub_device;
    3001         [ #  # ]:          0 :         if (!sctx) {
    3002                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    3003                 :          0 :                 return -ENOTCONN;
    3004                 :            :         }
    3005                 :          0 :         atomic_inc(&sctx->cancel_req);
    3006         [ #  # ]:          0 :         while (dev->scrub_device) {
    3007                 :          0 :                 mutex_unlock(&fs_info->scrub_lock);
    3008 [ #  # ][ #  # ]:          0 :                 wait_event(fs_info->scrub_pause_wait,
    3009                 :            :                            dev->scrub_device == NULL);
    3010                 :          0 :                 mutex_lock(&fs_info->scrub_lock);
    3011                 :            :         }
    3012                 :          0 :         mutex_unlock(&fs_info->scrub_lock);
    3013                 :            : 
    3014                 :          0 :         return 0;
    3015                 :            : }
    3016                 :            : 
    3017                 :          0 : int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
    3018                 :            :                          struct btrfs_scrub_progress *progress)
    3019                 :            : {
    3020                 :            :         struct btrfs_device *dev;
    3021                 :            :         struct scrub_ctx *sctx = NULL;
    3022                 :            : 
    3023                 :          0 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    3024                 :          0 :         dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
    3025         [ #  # ]:          0 :         if (dev)
    3026                 :          0 :                 sctx = dev->scrub_device;
    3027         [ #  # ]:          0 :         if (sctx)
    3028                 :          0 :                 memcpy(progress, &sctx->stat, sizeof(*progress));
    3029                 :          0 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    3030                 :            : 
    3031 [ #  # ][ #  # ]:          0 :         return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
    3032                 :            : }
    3033                 :            : 
    3034                 :          0 : static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
    3035                 :            :                                u64 extent_logical, u64 extent_len,
    3036                 :            :                                u64 *extent_physical,
    3037                 :            :                                struct btrfs_device **extent_dev,
    3038                 :            :                                int *extent_mirror_num)
    3039                 :            : {
    3040                 :            :         u64 mapped_length;
    3041                 :          0 :         struct btrfs_bio *bbio = NULL;
    3042                 :            :         int ret;
    3043                 :            : 
    3044                 :          0 :         mapped_length = extent_len;
    3045                 :          0 :         ret = btrfs_map_block(fs_info, READ, extent_logical,
    3046                 :            :                               &mapped_length, &bbio, 0);
    3047 [ #  # ][ #  # ]:          0 :         if (ret || !bbio || mapped_length < extent_len ||
         [ #  # ][ #  # ]
    3048                 :          0 :             !bbio->stripes[0].dev->bdev) {
    3049                 :          0 :                 kfree(bbio);
    3050                 :          0 :                 return;
    3051                 :            :         }
    3052                 :            : 
    3053                 :          0 :         *extent_physical = bbio->stripes[0].physical;
    3054                 :          0 :         *extent_mirror_num = bbio->mirror_num;
    3055                 :          0 :         *extent_dev = bbio->stripes[0].dev;
    3056                 :          0 :         kfree(bbio);
    3057                 :            : }
    3058                 :            : 
    3059                 :          0 : static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
    3060                 :            :                               struct scrub_wr_ctx *wr_ctx,
    3061                 :            :                               struct btrfs_fs_info *fs_info,
    3062                 :            :                               struct btrfs_device *dev,
    3063                 :            :                               int is_dev_replace)
    3064                 :            : {
    3065         [ #  # ]:          0 :         WARN_ON(wr_ctx->wr_curr_bio != NULL);
    3066                 :            : 
    3067                 :          0 :         mutex_init(&wr_ctx->wr_lock);
    3068                 :          0 :         wr_ctx->wr_curr_bio = NULL;
    3069         [ #  # ]:          0 :         if (!is_dev_replace)
    3070                 :            :                 return 0;
    3071                 :            : 
    3072         [ #  # ]:          0 :         WARN_ON(!dev->bdev);
    3073                 :          0 :         wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
    3074                 :            :                                          bio_get_nr_vecs(dev->bdev));
    3075                 :          0 :         wr_ctx->tgtdev = dev;
    3076                 :          0 :         atomic_set(&wr_ctx->flush_all_writes, 0);
    3077                 :            :         return 0;
    3078                 :            : }
    3079                 :            : 
    3080                 :          0 : static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
    3081                 :            : {
    3082                 :          0 :         mutex_lock(&wr_ctx->wr_lock);
    3083                 :          0 :         kfree(wr_ctx->wr_curr_bio);
    3084                 :          0 :         wr_ctx->wr_curr_bio = NULL;
    3085                 :          0 :         mutex_unlock(&wr_ctx->wr_lock);
    3086                 :          0 : }
    3087                 :            : 
    3088                 :          0 : static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
    3089                 :            :                             int mirror_num, u64 physical_for_dev_replace)
    3090                 :            : {
    3091                 :            :         struct scrub_copy_nocow_ctx *nocow_ctx;
    3092                 :          0 :         struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
    3093                 :            : 
    3094                 :            :         nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
    3095         [ #  # ]:          0 :         if (!nocow_ctx) {
    3096                 :            :                 spin_lock(&sctx->stat_lock);
    3097                 :          0 :                 sctx->stat.malloc_errors++;
    3098                 :            :                 spin_unlock(&sctx->stat_lock);
    3099                 :          0 :                 return -ENOMEM;
    3100                 :            :         }
    3101                 :            : 
    3102                 :          0 :         scrub_pending_trans_workers_inc(sctx);
    3103                 :            : 
    3104                 :          0 :         nocow_ctx->sctx = sctx;
    3105                 :          0 :         nocow_ctx->logical = logical;
    3106                 :          0 :         nocow_ctx->len = len;
    3107                 :          0 :         nocow_ctx->mirror_num = mirror_num;
    3108                 :          0 :         nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
    3109                 :          0 :         nocow_ctx->work.func = copy_nocow_pages_worker;
    3110                 :          0 :         INIT_LIST_HEAD(&nocow_ctx->inodes);
    3111                 :          0 :         btrfs_queue_worker(&fs_info->scrub_nocow_workers,
    3112                 :            :                            &nocow_ctx->work);
    3113                 :            : 
    3114                 :          0 :         return 0;
    3115                 :            : }
    3116                 :            : 
    3117                 :          0 : static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
    3118                 :            : {
    3119                 :            :         struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
    3120                 :            :         struct scrub_nocow_inode *nocow_inode;
    3121                 :            : 
    3122                 :            :         nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
    3123         [ #  # ]:          0 :         if (!nocow_inode)
    3124                 :            :                 return -ENOMEM;
    3125                 :          0 :         nocow_inode->inum = inum;
    3126                 :          0 :         nocow_inode->offset = offset;
    3127                 :          0 :         nocow_inode->root = root;
    3128                 :          0 :         list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
    3129                 :          0 :         return 0;
    3130                 :            : }
    3131                 :            : 
    3132                 :            : #define COPY_COMPLETE 1
    3133                 :            : 
    3134                 :          0 : static void copy_nocow_pages_worker(struct btrfs_work *work)
    3135                 :            : {
    3136                 :          0 :         struct scrub_copy_nocow_ctx *nocow_ctx =
    3137                 :            :                 container_of(work, struct scrub_copy_nocow_ctx, work);
    3138                 :          0 :         struct scrub_ctx *sctx = nocow_ctx->sctx;
    3139                 :          0 :         u64 logical = nocow_ctx->logical;
    3140                 :          0 :         u64 len = nocow_ctx->len;
    3141                 :          0 :         int mirror_num = nocow_ctx->mirror_num;
    3142                 :          0 :         u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
    3143                 :            :         int ret;
    3144                 :            :         struct btrfs_trans_handle *trans = NULL;
    3145                 :            :         struct btrfs_fs_info *fs_info;
    3146                 :            :         struct btrfs_path *path;
    3147                 :            :         struct btrfs_root *root;
    3148                 :            :         int not_written = 0;
    3149                 :            : 
    3150                 :          0 :         fs_info = sctx->dev_root->fs_info;
    3151                 :          0 :         root = fs_info->extent_root;
    3152                 :            : 
    3153                 :          0 :         path = btrfs_alloc_path();
    3154         [ #  # ]:          0 :         if (!path) {
    3155                 :            :                 spin_lock(&sctx->stat_lock);
    3156                 :          0 :                 sctx->stat.malloc_errors++;
    3157                 :            :                 spin_unlock(&sctx->stat_lock);
    3158                 :            :                 not_written = 1;
    3159                 :          0 :                 goto out;
    3160                 :            :         }
    3161                 :            : 
    3162                 :          0 :         trans = btrfs_join_transaction(root);
    3163         [ #  # ]:          0 :         if (IS_ERR(trans)) {
    3164                 :            :                 not_written = 1;
    3165                 :            :                 goto out;
    3166                 :            :         }
    3167                 :            : 
    3168                 :          0 :         ret = iterate_inodes_from_logical(logical, fs_info, path,
    3169                 :            :                                           record_inode_for_nocow, nocow_ctx);
    3170         [ #  # ]:          0 :         if (ret != 0 && ret != -ENOENT) {
    3171                 :          0 :                 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
    3172                 :            :                         "phys %llu, len %llu, mir %u, ret %d",
    3173                 :            :                         logical, physical_for_dev_replace, len, mirror_num,
    3174                 :            :                         ret);
    3175                 :            :                 not_written = 1;
    3176                 :          0 :                 goto out;
    3177                 :            :         }
    3178                 :            : 
    3179                 :          0 :         btrfs_end_transaction(trans, root);
    3180                 :            :         trans = NULL;
    3181         [ #  # ]:          0 :         while (!list_empty(&nocow_ctx->inodes)) {
    3182                 :            :                 struct scrub_nocow_inode *entry;
    3183                 :          0 :                 entry = list_first_entry(&nocow_ctx->inodes,
    3184                 :            :                                          struct scrub_nocow_inode,
    3185                 :            :                                          list);
    3186                 :          0 :                 list_del_init(&entry->list);
    3187                 :          0 :                 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
    3188                 :            :                                                  entry->root, nocow_ctx);
    3189                 :          0 :                 kfree(entry);
    3190         [ #  # ]:          0 :                 if (ret == COPY_COMPLETE) {
    3191                 :            :                         ret = 0;
    3192                 :            :                         break;
    3193         [ #  # ]:          0 :                 } else if (ret) {
    3194                 :            :                         break;
    3195                 :            :                 }
    3196                 :            :         }
    3197                 :            : out:
    3198         [ #  # ]:          0 :         while (!list_empty(&nocow_ctx->inodes)) {
    3199                 :            :                 struct scrub_nocow_inode *entry;
    3200                 :          0 :                 entry = list_first_entry(&nocow_ctx->inodes,
    3201                 :            :                                          struct scrub_nocow_inode,
    3202                 :            :                                          list);
    3203                 :          0 :                 list_del_init(&entry->list);
    3204                 :          0 :                 kfree(entry);
    3205                 :            :         }
    3206 [ #  # ][ #  # ]:          0 :         if (trans && !IS_ERR(trans))
    3207                 :          0 :                 btrfs_end_transaction(trans, root);
    3208         [ #  # ]:          0 :         if (not_written)
    3209                 :          0 :                 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
    3210                 :            :                                             num_uncorrectable_read_errors);
    3211                 :            : 
    3212                 :          0 :         btrfs_free_path(path);
    3213                 :          0 :         kfree(nocow_ctx);
    3214                 :            : 
    3215                 :          0 :         scrub_pending_trans_workers_dec(sctx);
    3216                 :          0 : }
    3217                 :            : 
    3218                 :          0 : static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
    3219                 :            :                                       struct scrub_copy_nocow_ctx *nocow_ctx)
    3220                 :            : {
    3221                 :          0 :         struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
    3222                 :            :         struct btrfs_key key;
    3223                 :            :         struct inode *inode;
    3224                 :            :         struct page *page;
    3225                 :            :         struct btrfs_root *local_root;
    3226                 :            :         struct btrfs_ordered_extent *ordered;
    3227                 :            :         struct extent_map *em;
    3228                 :          0 :         struct extent_state *cached_state = NULL;
    3229                 :            :         struct extent_io_tree *io_tree;
    3230                 :            :         u64 physical_for_dev_replace;
    3231                 :          0 :         u64 len = nocow_ctx->len;
    3232                 :          0 :         u64 lockstart = offset, lockend = offset + len - 1;
    3233                 :            :         unsigned long index;
    3234                 :            :         int srcu_index;
    3235                 :            :         int ret = 0;
    3236                 :            :         int err = 0;
    3237                 :            : 
    3238                 :          0 :         key.objectid = root;
    3239                 :          0 :         key.type = BTRFS_ROOT_ITEM_KEY;
    3240                 :          0 :         key.offset = (u64)-1;
    3241                 :            : 
    3242                 :          0 :         srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
    3243                 :            : 
    3244                 :            :         local_root = btrfs_read_fs_root_no_name(fs_info, &key);
    3245         [ #  # ]:          0 :         if (IS_ERR(local_root)) {
    3246                 :            :                 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
    3247                 :          0 :                 return PTR_ERR(local_root);
    3248                 :            :         }
    3249                 :            : 
    3250                 :          0 :         key.type = BTRFS_INODE_ITEM_KEY;
    3251                 :          0 :         key.objectid = inum;
    3252                 :          0 :         key.offset = 0;
    3253                 :          0 :         inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
    3254                 :            :         srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
    3255         [ #  # ]:          0 :         if (IS_ERR(inode))
    3256                 :          0 :                 return PTR_ERR(inode);
    3257                 :            : 
    3258                 :            :         /* Avoid truncate/dio/punch hole.. */
    3259                 :          0 :         mutex_lock(&inode->i_mutex);
    3260                 :          0 :         inode_dio_wait(inode);
    3261                 :            : 
    3262                 :          0 :         physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
    3263                 :          0 :         io_tree = &BTRFS_I(inode)->io_tree;
    3264                 :            : 
    3265                 :          0 :         lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
    3266                 :          0 :         ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
    3267         [ #  # ]:          0 :         if (ordered) {
    3268                 :          0 :                 btrfs_put_ordered_extent(ordered);
    3269                 :          0 :                 goto out_unlock;
    3270                 :            :         }
    3271                 :            : 
    3272                 :          0 :         em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
    3273         [ #  # ]:          0 :         if (IS_ERR(em)) {
    3274                 :            :                 ret = PTR_ERR(em);
    3275                 :          0 :                 goto out_unlock;
    3276                 :            :         }
    3277                 :            : 
    3278                 :            :         /*
    3279                 :            :          * This extent does not actually cover the logical extent anymore,
    3280                 :            :          * move on to the next inode.
    3281                 :            :          */
    3282 [ #  # ][ #  # ]:          0 :         if (em->block_start > nocow_ctx->logical ||
    3283                 :          0 :             em->block_start + em->block_len < nocow_ctx->logical + len) {
    3284                 :          0 :                 free_extent_map(em);
    3285                 :          0 :                 goto out_unlock;
    3286                 :            :         }
    3287                 :          0 :         free_extent_map(em);
    3288                 :            : 
    3289         [ #  # ]:          0 :         while (len >= PAGE_CACHE_SIZE) {
    3290                 :          0 :                 index = offset >> PAGE_CACHE_SHIFT;
    3291                 :            : again:
    3292                 :          0 :                 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
    3293         [ #  # ]:          0 :                 if (!page) {
    3294                 :          0 :                         btrfs_err(fs_info, "find_or_create_page() failed");
    3295                 :            :                         ret = -ENOMEM;
    3296                 :          0 :                         goto out;
    3297                 :            :                 }
    3298                 :            : 
    3299         [ #  # ]:          0 :                 if (PageUptodate(page)) {
    3300         [ #  # ]:          0 :                         if (PageDirty(page))
    3301                 :            :                                 goto next_page;
    3302                 :            :                 } else {
    3303                 :            :                         ClearPageError(page);
    3304                 :          0 :                         err = extent_read_full_page_nolock(io_tree, page,
    3305                 :            :                                                            btrfs_get_extent,
    3306                 :            :                                                            nocow_ctx->mirror_num);
    3307         [ #  # ]:          0 :                         if (err) {
    3308                 :            :                                 ret = err;
    3309                 :            :                                 goto next_page;
    3310                 :            :                         }
    3311                 :            : 
    3312                 :            :                         lock_page(page);
    3313                 :            :                         /*
    3314                 :            :                          * If the page has been remove from the page cache,
    3315                 :            :                          * the data on it is meaningless, because it may be
    3316                 :            :                          * old one, the new data may be written into the new
    3317                 :            :                          * page in the page cache.
    3318                 :            :                          */
    3319         [ #  # ]:          0 :                         if (page->mapping != inode->i_mapping) {
    3320                 :          0 :                                 unlock_page(page);
    3321                 :          0 :                                 page_cache_release(page);
    3322                 :          0 :                                 goto again;
    3323                 :            :                         }
    3324         [ #  # ]:          0 :                         if (!PageUptodate(page)) {
    3325                 :            :                                 ret = -EIO;
    3326                 :            :                                 goto next_page;
    3327                 :            :                         }
    3328                 :            :                 }
    3329                 :          0 :                 err = write_page_nocow(nocow_ctx->sctx,
    3330                 :            :                                        physical_for_dev_replace, page);
    3331         [ #  # ]:          0 :                 if (err)
    3332                 :            :                         ret = err;
    3333                 :            : next_page:
    3334                 :          0 :                 unlock_page(page);
    3335                 :          0 :                 page_cache_release(page);
    3336                 :            : 
    3337         [ #  # ]:          0 :                 if (ret)
    3338                 :            :                         break;
    3339                 :            : 
    3340                 :          0 :                 offset += PAGE_CACHE_SIZE;
    3341                 :          0 :                 physical_for_dev_replace += PAGE_CACHE_SIZE;
    3342                 :          0 :                 len -= PAGE_CACHE_SIZE;
    3343                 :            :         }
    3344                 :            :         ret = COPY_COMPLETE;
    3345                 :            : out_unlock:
    3346                 :          0 :         unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
    3347                 :            :                              GFP_NOFS);
    3348                 :            : out:
    3349                 :          0 :         mutex_unlock(&inode->i_mutex);
    3350                 :          0 :         iput(inode);
    3351                 :          0 :         return ret;
    3352                 :            : }
    3353                 :            : 
    3354                 :          0 : static int write_page_nocow(struct scrub_ctx *sctx,
    3355                 :            :                             u64 physical_for_dev_replace, struct page *page)
    3356                 :            : {
    3357                 :            :         struct bio *bio;
    3358                 :            :         struct btrfs_device *dev;
    3359                 :            :         int ret;
    3360                 :            : 
    3361                 :          0 :         dev = sctx->wr_ctx.tgtdev;
    3362         [ #  # ]:          0 :         if (!dev)
    3363                 :            :                 return -EIO;
    3364         [ #  # ]:          0 :         if (!dev->bdev) {
    3365         [ #  # ]:          0 :                 printk_ratelimited(KERN_WARNING
    3366                 :            :                         "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
    3367                 :            :                 return -EIO;
    3368                 :            :         }
    3369                 :          0 :         bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
    3370         [ #  # ]:          0 :         if (!bio) {
    3371                 :            :                 spin_lock(&sctx->stat_lock);
    3372                 :          0 :                 sctx->stat.malloc_errors++;
    3373                 :            :                 spin_unlock(&sctx->stat_lock);
    3374                 :          0 :                 return -ENOMEM;
    3375                 :            :         }
    3376                 :          0 :         bio->bi_iter.bi_size = 0;
    3377                 :          0 :         bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
    3378                 :          0 :         bio->bi_bdev = dev->bdev;
    3379                 :          0 :         ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
    3380         [ #  # ]:          0 :         if (ret != PAGE_CACHE_SIZE) {
    3381                 :            : leave_with_eio:
    3382                 :          0 :                 bio_put(bio);
    3383                 :          0 :                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
    3384                 :          0 :                 return -EIO;
    3385                 :            :         }
    3386                 :            : 
    3387         [ #  # ]:          0 :         if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
    3388                 :            :                 goto leave_with_eio;
    3389                 :            : 
    3390                 :          0 :         bio_put(bio);
    3391                 :          0 :         return 0;
    3392                 :            : }

Generated by: LCOV version 1.9