Skip to content

Commit 7ee7fd8

Browse files
adam900710kdave
authored andcommitted
btrfs: make read verification handle bs > ps cases without large folios
The current read verification is also relying on large folios to support bs > ps cases, but that introduced quite some limits. To enhance read-repair to support bs > ps without large folios: - Make btrfs_data_csum_ok() to accept an array of paddrs Which can pass the paddrs[] direct into btrfs_calculate_block_csum_pages(). - Make repair_one_sector() to accept an array of paddrs So that it can submit a repair bio backed by regular pages, not only large folios. This requires us to allocate more slots at bio allocation time though. Also since the caller may have only partially advanced the saved_iter for bs > ps cases, we can not directly trust the logical bytenr from saved_iter (can be unaligned), thus a manual round down is necessary for the logical bytenr. - Make btrfs_check_read_bio() to build an array of paddrs The tricky part is that we can only call btrfs_data_csum_ok() after all involved pages are assembled. This means at the call time of btrfs_check_read_bio(), our offset inside the bio is already at the end of the fs block. Thus we must re-calculate @bio_offset for btrfs_data_csum_ok() and repair_one_sector(). Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 9e15f50 commit 7ee7fd8

File tree

3 files changed

+52
-30
lines changed

3 files changed

+52
-30
lines changed

fs/btrfs/bio.c

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
171171
struct btrfs_failed_bio *fbio = repair_bbio->private;
172172
struct btrfs_inode *inode = repair_bbio->inode;
173173
struct btrfs_fs_info *fs_info = inode->root->fs_info;
174-
struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
175174
/*
176175
* We can not move forward the saved_iter, as it will be later
177176
* utilized by repair_bbio again.
@@ -188,8 +187,14 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
188187
/* Repair bbio should be eaxctly one block sized. */
189188
ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize);
190189

190+
btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
191+
ASSERT(slot < nr_steps);
192+
paddrs[slot] = paddr;
193+
slot++;
194+
}
195+
191196
if (repair_bbio->bio.bi_status ||
192-
!btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
197+
!btrfs_data_csum_ok(repair_bbio, dev, 0, paddrs)) {
193198
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
194199
repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
195200

@@ -204,12 +209,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
204209
return;
205210
}
206211

207-
btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
208-
ASSERT(slot < nr_steps);
209-
paddrs[slot] = paddr;
210-
slot++;
211-
}
212-
213212
do {
214213
mirror = prev_repair_mirror(fbio, mirror);
215214
btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
@@ -231,21 +230,25 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
231230
*/
232231
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
233232
u32 bio_offset,
234-
phys_addr_t paddr,
233+
phys_addr_t paddrs[],
235234
struct btrfs_failed_bio *fbio)
236235
{
237236
struct btrfs_inode *inode = failed_bbio->inode;
238237
struct btrfs_fs_info *fs_info = inode->root->fs_info;
239-
struct folio *folio = page_folio(phys_to_page(paddr));
240238
const u32 sectorsize = fs_info->sectorsize;
241-
const u32 foff = offset_in_folio(folio, paddr);
242-
const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
239+
const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
240+
const u32 nr_steps = sectorsize / step;
241+
/*
242+
* For bs > ps cases, the saved_iter can be partially moved forward.
243+
* In that case we should round it down to the block boundary.
244+
*/
245+
const u64 logical = round_down(failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
246+
sectorsize);
243247
struct btrfs_bio *repair_bbio;
244248
struct bio *repair_bio;
245249
int num_copies;
246250
int mirror;
247251

248-
ASSERT(foff + sectorsize <= folio_size(folio));
249252
btrfs_debug(fs_info, "repair read error: read error at %llu",
250253
failed_bbio->file_offset + bio_offset);
251254

@@ -265,10 +268,18 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
265268

266269
atomic_inc(&fbio->repair_count);
267270

268-
repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
271+
repair_bio = bio_alloc_bioset(NULL, nr_steps, REQ_OP_READ, GFP_NOFS,
269272
&btrfs_repair_bioset);
270-
repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
271-
bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
273+
repair_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT;
274+
for (int i = 0; i < nr_steps; i++) {
275+
int ret;
276+
277+
ASSERT(offset_in_page(paddrs[i]) + step <= PAGE_SIZE);
278+
279+
ret = bio_add_page(repair_bio, phys_to_page(paddrs[i]), step,
280+
offset_in_page(paddrs[i]));
281+
ASSERT(ret == step);
282+
}
272283

273284
repair_bbio = btrfs_bio(repair_bio);
274285
btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset,
@@ -284,10 +295,13 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
284295
{
285296
struct btrfs_inode *inode = bbio->inode;
286297
struct btrfs_fs_info *fs_info = inode->root->fs_info;
287-
u32 sectorsize = fs_info->sectorsize;
298+
const u32 sectorsize = fs_info->sectorsize;
299+
const u32 step = min(sectorsize, PAGE_SIZE);
300+
const u32 nr_steps = sectorsize / step;
288301
struct bvec_iter *iter = &bbio->saved_iter;
289302
blk_status_t status = bbio->bio.bi_status;
290303
struct btrfs_failed_bio *fbio = NULL;
304+
phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
291305
phys_addr_t paddr;
292306
u32 offset = 0;
293307

@@ -306,10 +320,16 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
306320
/* Clear the I/O error. A failed repair will reset it. */
307321
bbio->bio.bi_status = BLK_STS_OK;
308322

309-
btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
310-
if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
311-
fbio = repair_one_sector(bbio, offset, paddr, fbio);
312-
offset += sectorsize;
323+
btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) {
324+
paddrs[(offset / step) % nr_steps] = paddr;
325+
offset += step;
326+
327+
if (IS_ALIGNED(offset, sectorsize)) {
328+
if (status ||
329+
!btrfs_data_csum_ok(bbio, dev, offset - sectorsize, paddrs))
330+
fbio = repair_one_sector(bbio, offset - sectorsize,
331+
paddrs, fbio);
332+
}
313333
}
314334
if (bbio->csum != bbio->csum_inline)
315335
kvfree(bbio->csum);

fs/btrfs/btrfs_inode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info,
550550
int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
551551
const u8 * const csum_expected);
552552
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
553-
u32 bio_offset, phys_addr_t paddr);
553+
u32 bio_offset, const phys_addr_t paddrs[]);
554554
noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
555555
struct btrfs_file_extent *file_extent,
556556
bool nowait);

fs/btrfs/inode.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3420,25 +3420,27 @@ int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8
34203420
}
34213421

34223422
/*
3423-
* Verify the checksum of a single data sector.
3423+
* Verify the checksum of a single data sector, which can be scattered at
3424+
* different noncontiguous pages.
34243425
*
34253426
* @bbio: btrfs_io_bio which contains the csum
34263427
* @dev: device the sector is on
34273428
* @bio_offset: offset to the beginning of the bio (in bytes)
3428-
* @bv: bio_vec to check
3429+
* @paddrs: physical addresses which back the fs block
34293430
*
34303431
* Check if the checksum on a data block is valid. When a checksum mismatch is
34313432
* detected, report the error and fill the corrupted range with zero.
34323433
*
34333434
* Return %true if the sector is ok or had no checksum to start with, else %false.
34343435
*/
34353436
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
3436-
u32 bio_offset, phys_addr_t paddr)
3437+
u32 bio_offset, const phys_addr_t paddrs[])
34373438
{
34383439
struct btrfs_inode *inode = bbio->inode;
34393440
struct btrfs_fs_info *fs_info = inode->root->fs_info;
34403441
const u32 blocksize = fs_info->sectorsize;
3441-
struct folio *folio;
3442+
const u32 step = min(blocksize, PAGE_SIZE);
3443+
const u32 nr_steps = blocksize / step;
34423444
u64 file_offset = bbio->file_offset + bio_offset;
34433445
u64 end = file_offset + blocksize - 1;
34443446
u8 *csum_expected;
@@ -3458,7 +3460,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
34583460

34593461
csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) *
34603462
fs_info->csum_size;
3461-
if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected))
3463+
btrfs_calculate_block_csum_pages(fs_info, paddrs, csum);
3464+
if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0))
34623465
goto zeroit;
34633466
return true;
34643467

@@ -3467,9 +3470,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
34673470
bbio->mirror_num);
34683471
if (dev)
34693472
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
3470-
folio = page_folio(phys_to_page(paddr));
3471-
ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
3472-
folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize);
3473+
for (int i = 0; i < nr_steps; i++)
3474+
memzero_page(phys_to_page(paddrs[i]), offset_in_page(paddrs[i]), step);
34733475
return false;
34743476
}
34753477

0 commit comments

Comments
 (0)