@@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
410410 }
411411 }
412412 hlist_add_head_rcu (& zwplug -> node , & disk -> zone_wplugs_hash [idx ]);
413+ atomic_inc (& disk -> nr_zone_wplugs );
413414 spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
414415
415416 return true;
416417}
417418
418- static struct blk_zone_wplug * disk_get_zone_wplug (struct gendisk * disk ,
419- sector_t sector )
419+ static struct blk_zone_wplug * disk_get_hashed_zone_wplug (struct gendisk * disk ,
420+ sector_t sector )
420421{
421422 unsigned int zno = disk_zone_no (disk , sector );
422423 unsigned int idx = hash_32 (zno , disk -> zone_wplugs_hash_bits );
@@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
437438 return NULL ;
438439}
439440
441+ static inline struct blk_zone_wplug * disk_get_zone_wplug (struct gendisk * disk ,
442+ sector_t sector )
443+ {
444+ if (!atomic_read (& disk -> nr_zone_wplugs ))
445+ return NULL ;
446+
447+ return disk_get_hashed_zone_wplug (disk , sector );
448+ }
449+
440450static void disk_free_zone_wplug_rcu (struct rcu_head * rcu_head )
441451{
442452 struct blk_zone_wplug * zwplug =
@@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
503513 zwplug -> flags |= BLK_ZONE_WPLUG_UNHASHED ;
504514 spin_lock_irqsave (& disk -> zone_wplugs_lock , flags );
505515 hlist_del_init_rcu (& zwplug -> node );
516+ atomic_dec (& disk -> nr_zone_wplugs );
506517 spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
507518 disk_put_zone_wplug (zwplug );
508519}
@@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
593604{
594605 struct bio * bio ;
595606
607+ if (bio_list_empty (& zwplug -> bio_list ))
608+ return ;
609+
610+ pr_warn_ratelimited ("%s: zone %u: Aborting plugged BIOs\n" ,
611+ zwplug -> disk -> disk_name , zwplug -> zone_no );
596612 while ((bio = bio_list_pop (& zwplug -> bio_list )))
597613 blk_zone_wplug_bio_io_error (zwplug , bio );
598614}
@@ -1040,6 +1056,47 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10401056 return true;
10411057}
10421058
1059+ static void blk_zone_wplug_handle_native_zone_append (struct bio * bio )
1060+ {
1061+ struct gendisk * disk = bio -> bi_bdev -> bd_disk ;
1062+ struct blk_zone_wplug * zwplug ;
1063+ unsigned long flags ;
1064+
1065+ /*
1066+ * We have native support for zone append operations, so we are not
1067+ * going to handle @bio through plugging. However, we may already have a
1068+ * zone write plug for the target zone if that zone was previously
1069+ * partially written using regular writes. In such case, we risk leaving
1070+ * the plug in the disk hash table if the zone is fully written using
1071+ * zone append operations. Avoid this by removing the zone write plug.
1072+ */
1073+ zwplug = disk_get_zone_wplug (disk , bio -> bi_iter .bi_sector );
1074+ if (likely (!zwplug ))
1075+ return ;
1076+
1077+ spin_lock_irqsave (& zwplug -> lock , flags );
1078+
1079+ /*
1080+ * We are about to remove the zone write plug. But if the user
1081+ * (mistakenly) has issued regular writes together with native zone
1082+ * append, we must aborts the writes as otherwise the plugged BIOs would
1083+ * not be executed by the plug BIO work as disk_get_zone_wplug() will
1084+ * return NULL after the plug is removed. Aborting the plugged write
1085+ * BIOs is consistent with the fact that these writes will most likely
1086+ * fail anyway as there is no ordering guarantees between zone append
1087+ * operations and regular write operations.
1088+ */
1089+ if (!bio_list_empty (& zwplug -> bio_list )) {
1090+ pr_warn_ratelimited ("%s: zone %u: Invalid mix of zone append and regular writes\n" ,
1091+ disk -> disk_name , zwplug -> zone_no );
1092+ disk_zone_wplug_abort (zwplug );
1093+ }
1094+ disk_remove_zone_wplug (disk , zwplug );
1095+ spin_unlock_irqrestore (& zwplug -> lock , flags );
1096+
1097+ disk_put_zone_wplug (zwplug );
1098+ }
1099+
10431100/**
10441101 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
10451102 * @bio: The BIO being submitted
@@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
10961153 */
10971154 switch (bio_op (bio )) {
10981155 case REQ_OP_ZONE_APPEND :
1099- if (!bdev_emulates_zone_append (bdev ))
1156+ if (!bdev_emulates_zone_append (bdev )) {
1157+ blk_zone_wplug_handle_native_zone_append (bio );
11001158 return false;
1159+ }
11011160 fallthrough ;
11021161 case REQ_OP_WRITE :
11031162 case REQ_OP_WRITE_ZEROES :
@@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
12841343{
12851344 unsigned int i ;
12861345
1346+ atomic_set (& disk -> nr_zone_wplugs , 0 );
12871347 disk -> zone_wplugs_hash_bits =
12881348 min (ilog2 (pool_size ) + 1 , BLK_ZONE_WPLUG_MAX_HASH_BITS );
12891349
@@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
13381398 }
13391399 }
13401400
1401+ WARN_ON_ONCE (atomic_read (& disk -> nr_zone_wplugs ));
13411402 kfree (disk -> zone_wplugs_hash );
13421403 disk -> zone_wplugs_hash = NULL ;
13431404 disk -> zone_wplugs_hash_bits = 0 ;
@@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
15501611 }
15511612
15521613 /*
1553- * We need to track the write pointer of all zones that are not
1554- * empty nor full. So make sure we have a zone write plug for
1555- * such zone if the device has a zone write plug hash table.
1614+ * If the device needs zone append emulation, we need to track the
1615+ * write pointer of all zones that are not empty nor full. So make sure
1616+ * we have a zone write plug for such zone if the device has a zone
1617+ * write plug hash table.
15561618 */
1557- if (!disk -> zone_wplugs_hash )
1619+ if (!queue_emulates_zone_append ( disk -> queue ) || ! disk -> zone_wplugs_hash )
15581620 return 0 ;
15591621
15601622 disk_zone_wplug_sync_wp_offset (disk , zone );
0 commit comments