@@ -417,272 +417,12 @@ struct bch_set {
417417 struct bch_val v ;
418418};
419419
420- /* Extents */
421-
422- /*
423- * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
424- * preceded by checksum/compression information (bch_extent_crc32 or
425- * bch_extent_crc64).
426- *
427- * One major determining factor in the format of extents is how we handle and
428- * represent extents that have been partially overwritten and thus trimmed:
429- *
430- * If an extent is not checksummed or compressed, when the extent is trimmed we
431- * don't have to remember the extent we originally allocated and wrote: we can
432- * merely adjust ptr->offset to point to the start of the data that is currently
433- * live. The size field in struct bkey records the current (live) size of the
434- * extent, and is also used to mean "size of region on disk that we point to" in
435- * this case.
436- *
437- * Thus an extent that is not checksummed or compressed will consist only of a
438- * list of bch_extent_ptrs, with none of the fields in
439- * bch_extent_crc32/bch_extent_crc64.
440- *
441- * When an extent is checksummed or compressed, it's not possible to read only
442- * the data that is currently live: we have to read the entire extent that was
443- * originally written, and then return only the part of the extent that is
444- * currently live.
445- *
446- * Thus, in addition to the current size of the extent in struct bkey, we need
447- * to store the size of the originally allocated space - this is the
448- * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
449- * when the extent is trimmed, instead of modifying the offset field of the
450- * pointer, we keep a second smaller offset field - "offset into the original
451- * extent of the currently live region".
452- *
453- * The other major determining factor is replication and data migration:
454- *
455- * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
456- * write, we will initially write all the replicas in the same format, with the
457- * same checksum type and compression format - however, when copygc runs later (or
458- * tiering/cache promotion, anything that moves data), it is not in general
459- * going to rewrite all the pointers at once - one of the replicas may be in a
460- * bucket on one device that has very little fragmentation while another lives
461- * in a bucket that has become heavily fragmented, and thus is being rewritten
462- * sooner than the rest.
463- *
464- * Thus it will only move a subset of the pointers (or in the case of
465- * tiering/cache promotion perhaps add a single pointer without dropping any
466- * current pointers), and if the extent has been partially overwritten it must
467- * write only the currently live portion (or copygc would not be able to reduce
468- * fragmentation!) - which necessitates a different bch_extent_crc format for
469- * the new pointer.
470- *
471- * But in the interests of space efficiency, we don't want to store one
472- * bch_extent_crc for each pointer if we don't have to.
473- *
474- * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
475- * bch_extent_ptrs appended arbitrarily one after the other. We determine the
476- * type of a given entry with a scheme similar to utf8 (except we're encoding a
477- * type, not a size), encoding the type in the position of the first set bit:
478- *
479- * bch_extent_crc32 - 0b1
480- * bch_extent_ptr - 0b10
481- * bch_extent_crc64 - 0b100
482- *
483- * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
484- * bch_extent_crc64 is the least constrained).
485- *
486- * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
487- * until the next bch_extent_crc32/64.
488- *
489- * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
490- * is neither checksummed nor compressed.
491- */
492-
493420/* 128 bits, sufficient for cryptographic MACs: */
494421struct bch_csum {
495422 __le64 lo ;
496423 __le64 hi ;
497424} __packed __aligned (8 );
498425
499- #define BCH_EXTENT_ENTRY_TYPES () \
500- x(ptr, 0) \
501- x(crc32, 1) \
502- x(crc64, 2) \
503- x(crc128, 3) \
504- x(stripe_ptr, 4) \
505- x(rebalance, 5)
506- #define BCH_EXTENT_ENTRY_MAX 6
507-
508- enum bch_extent_entry_type {
509- #define x(f, n) BCH_EXTENT_ENTRY_##f = n,
510- BCH_EXTENT_ENTRY_TYPES ()
511- #undef x
512- };
513-
514- /* Compressed/uncompressed size are stored biased by 1: */
515- struct bch_extent_crc32 {
516- #if defined(__LITTLE_ENDIAN_BITFIELD )
517- __u32 type :2 ,
518- _compressed_size :7 ,
519- _uncompressed_size :7 ,
520- offset :7 ,
521- _unused :1 ,
522- csum_type :4 ,
523- compression_type :4 ;
524- __u32 csum ;
525- #elif defined (__BIG_ENDIAN_BITFIELD )
526- __u32 csum ;
527- __u32 compression_type :4 ,
528- csum_type :4 ,
529- _unused :1 ,
530- offset :7 ,
531- _uncompressed_size :7 ,
532- _compressed_size :7 ,
533- type :2 ;
534- #endif
535- } __packed __aligned (8 );
536-
537- #define CRC32_SIZE_MAX (1U << 7)
538- #define CRC32_NONCE_MAX 0
539-
540- struct bch_extent_crc64 {
541- #if defined(__LITTLE_ENDIAN_BITFIELD )
542- __u64 type :3 ,
543- _compressed_size :9 ,
544- _uncompressed_size :9 ,
545- offset :9 ,
546- nonce :10 ,
547- csum_type :4 ,
548- compression_type :4 ,
549- csum_hi :16 ;
550- #elif defined (__BIG_ENDIAN_BITFIELD )
551- __u64 csum_hi :16 ,
552- compression_type :4 ,
553- csum_type :4 ,
554- nonce :10 ,
555- offset :9 ,
556- _uncompressed_size :9 ,
557- _compressed_size :9 ,
558- type :3 ;
559- #endif
560- __u64 csum_lo ;
561- } __packed __aligned (8 );
562-
563- #define CRC64_SIZE_MAX (1U << 9)
564- #define CRC64_NONCE_MAX ((1U << 10) - 1)
565-
566- struct bch_extent_crc128 {
567- #if defined(__LITTLE_ENDIAN_BITFIELD )
568- __u64 type :4 ,
569- _compressed_size :13 ,
570- _uncompressed_size :13 ,
571- offset :13 ,
572- nonce :13 ,
573- csum_type :4 ,
574- compression_type :4 ;
575- #elif defined (__BIG_ENDIAN_BITFIELD )
576- __u64 compression_type :4 ,
577- csum_type :4 ,
578- nonce :13 ,
579- offset :13 ,
580- _uncompressed_size :13 ,
581- _compressed_size :13 ,
582- type :4 ;
583- #endif
584- struct bch_csum csum ;
585- } __packed __aligned (8 );
586-
587- #define CRC128_SIZE_MAX (1U << 13)
588- #define CRC128_NONCE_MAX ((1U << 13) - 1)
589-
590- /*
591- * @reservation - pointer hasn't been written to, just reserved
592- */
593- struct bch_extent_ptr {
594- #if defined(__LITTLE_ENDIAN_BITFIELD )
595- __u64 type :1 ,
596- cached :1 ,
597- unused :1 ,
598- unwritten :1 ,
599- offset :44 , /* 8 petabytes */
600- dev :8 ,
601- gen :8 ;
602- #elif defined (__BIG_ENDIAN_BITFIELD )
603- __u64 gen :8 ,
604- dev :8 ,
605- offset :44 ,
606- unwritten :1 ,
607- unused :1 ,
608- cached :1 ,
609- type :1 ;
610- #endif
611- } __packed __aligned (8 );
612-
613- struct bch_extent_stripe_ptr {
614- #if defined(__LITTLE_ENDIAN_BITFIELD )
615- __u64 type :5 ,
616- block :8 ,
617- redundancy :4 ,
618- idx :47 ;
619- #elif defined (__BIG_ENDIAN_BITFIELD )
620- __u64 idx :47 ,
621- redundancy :4 ,
622- block :8 ,
623- type :5 ;
624- #endif
625- };
626-
627- struct bch_extent_rebalance {
628- #if defined(__LITTLE_ENDIAN_BITFIELD )
629- __u64 type :6 ,
630- unused :34 ,
631- compression :8 , /* enum bch_compression_opt */
632- target :16 ;
633- #elif defined (__BIG_ENDIAN_BITFIELD )
634- __u64 target :16 ,
635- compression :8 ,
636- unused :34 ,
637- type :6 ;
638- #endif
639- };
640-
641- union bch_extent_entry {
642- #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
643- unsigned long type ;
644- #elif __BITS_PER_LONG == 32
645- struct {
646- unsigned long pad ;
647- unsigned long type ;
648- };
649- #else
650- #error edit for your odd byteorder.
651- #endif
652-
653- #define x (f , n ) struct bch_extent_##f f;
654- BCH_EXTENT_ENTRY_TYPES ()
655- #undef x
656- };
657-
658- struct bch_btree_ptr {
659- struct bch_val v ;
660-
661- __u64 _data [0 ];
662- struct bch_extent_ptr start [];
663- } __packed __aligned (8 );
664-
665- struct bch_btree_ptr_v2 {
666- struct bch_val v ;
667-
668- __u64 mem_ptr ;
669- __le64 seq ;
670- __le16 sectors_written ;
671- __le16 flags ;
672- struct bpos min_key ;
673- __u64 _data [0 ];
674- struct bch_extent_ptr start [];
675- } __packed __aligned (8 );
676-
677- LE16_BITMASK (BTREE_PTR_RANGE_UPDATED , struct bch_btree_ptr_v2 , flags , 0 , 1 );
678-
679- struct bch_extent {
680- struct bch_val v ;
681-
682- __u64 _data [0 ];
683- union bch_extent_entry start [];
684- } __packed __aligned (8 );
685-
686426struct bch_reservation {
687427 struct bch_val v ;
688428
@@ -691,25 +431,6 @@ struct bch_reservation {
691431 __u8 pad [3 ];
692432} __packed __aligned (8 );
693433
694- /* Maximum size (in u64s) a single pointer could be: */
695- #define BKEY_EXTENT_PTR_U64s_MAX \
696- ((sizeof(struct bch_extent_crc128) + \
697- sizeof(struct bch_extent_ptr)) / sizeof(__u64))
698-
699- /* Maximum possible size of an entire extent value: */
700- #define BKEY_EXTENT_VAL_U64s_MAX \
701- (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
702-
703- /* * Maximum possible size of an entire extent, key + value: */
704- #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
705-
706- /* Btree pointers don't carry around checksums: */
707- #define BKEY_BTREE_PTR_VAL_U64s_MAX \
708- ((sizeof(struct bch_btree_ptr_v2) + \
709- sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
710- #define BKEY_BTREE_PTR_U64s_MAX \
711- (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
712-
713434struct bch_backpointer {
714435 struct bch_val v ;
715436 __u8 btree_id ;
@@ -720,6 +441,8 @@ struct bch_backpointer {
720441 struct bpos pos ;
721442} __packed __aligned (8 );
722443
444+ #include "extents_format.h"
445+
723446/* Reflink: */
724447
725448struct bch_reflink_p {
0 commit comments