diff --git a/.gitmodules b/.gitmodules index e69de29..0d6c25a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "nih/generics-sop"] + path = nih/generics-sop + url = https://github.com/Taneb/generics-sop.git diff --git a/cabal.project b/cabal.project index 97a1441..0bd277f 100644 --- a/cabal.project +++ b/cabal.project @@ -1,4 +1,4 @@ -packages: . +packages: ., nih/* package coda-lsp optimization: False diff --git a/nih/generics-sop b/nih/generics-sop new file mode 160000 index 0000000..25df5cc --- /dev/null +++ b/nih/generics-sop @@ -0,0 +1 @@ +Subproject commit 25df5ccaa28e66d4a8435e21d01cfe5d0e40efb8 diff --git a/src/Data/Binary/Succinct.hs b/src/Data/Binary/Succinct.hs index bb18ae9..833deba 100644 --- a/src/Data/Binary/Succinct.hs +++ b/src/Data/Binary/Succinct.hs @@ -2,8 +2,10 @@ module Data.Binary.Succinct ( module Data.Binary.Succinct.Get , module Data.Binary.Succinct.Put , module Data.Binary.Succinct.Blob + , module Data.Binary.Succinct.Size ) where import Data.Binary.Succinct.Get import Data.Binary.Succinct.Put import Data.Binary.Succinct.Blob +import Data.Binary.Succinct.Size diff --git a/src/Data/Binary/Succinct/Blob.hs b/src/Data/Binary/Succinct/Blob.hs index 428c57a..8dcb95a 100644 --- a/src/Data/Binary/Succinct/Blob.hs +++ b/src/Data/Binary/Succinct/Blob.hs @@ -2,117 +2,102 @@ module Data.Binary.Succinct.Blob ( Blob(..) , runPut -- guts - , metaBitCount - , shapeBitCount - , contentByteCount , inspectMeta , inspectShape , inspectContent , inspectBlob ) where -import Control.Monad (replicateM_) -import Data.Word +import Data.Binary.Succinct.Orphans () +import Data.Binary.Succinct.Put import Data.Bits -import Data.Bits.Coding as Bits -import Data.Bytes.Put import Data.ByteString as Strict import Data.ByteString.Builder as Builder import Data.ByteString.Lazy as Lazy +import Data.Semigroup import qualified Data.Vector.Storable as Storable +import Data.Vector.Storable.ByteString +import Data.Word import HaskellWorks.Data.BalancedParens.RangeMinMax as BP -import HaskellWorks.Data.RankSelect.CsPoppy as CsPoppy import HaskellWorks.Data.RankSelect.Base.Rank0 -import Data.Vector.Storable.ByteString -import HaskellWorks.Data.BalancedParens - -import Data.Binary.Succinct.Put -import Data.Binary.Succinct.Orphans () +import HaskellWorks.Data.RankSelect.CsPoppy as CsPoppy data Blob = Blob - { blobMeta :: CsPoppy + { blobSize :: Word64 + , blobMeta :: CsPoppy , blobShape :: RangeMinMax (Storable.Vector Word64) , blobContent :: Strict.ByteString } deriving Show runPutM :: PutM a -> (a, Blob) -runPutM ma = case unPutM ma' (S 0 0 0 0) of - Result a _ (W m s c) -> (a, Blob - { blobMeta = makeCsPoppy $ ws m - , blobShape = mkRangeMinMax $ ws s +runPutM ma = case unPutM ma (S 0 0 0 0) of + Result a (S i b j b') (W m s c n) -> (a, Blob + { blobSize = n + , blobMeta = makeCsPoppy $ ws $ flush8 i b m + , blobShape = mkRangeMinMax $ ws $ flush8 j b' s , blobContent = bs c }) where - pad = replicateM_ 7 $ putWord8 0 - flush8 = Bits.putAligned pad + flush :: Int -> Word8 -> Builder -> Builder + flush 0 _ xs = xs + flush _ b xs = xs <> word8 b + + flush8 :: Int -> Word8 -> Builder -> Builder + flush8 r k d = flush r k d <> stimes (7 :: Int) (word8 0) + + trim8 :: Strict.ByteString -> Strict.ByteString trim8 b = Strict.take (Strict.length b .&. complement 7) b + + bs :: Builder -> Strict.ByteString bs = Lazy.toStrict . Builder.toLazyByteString + -- TODO: use a custom untrimmed strategy or write a dedicated + -- builder -> strict bs combinator that uses a doubling buffer + -- size? we could modify that to cram everything into one buffer + -- in the end + + ws :: Builder -> Storable.Vector Word64 ws = byteStringToVector . trim8 . bs - ma' = do - result <- ma - meta flush8 - shape flush8 - -- content pad - return result runPut :: Put -> Blob runPut = snd . runPutM -rank1_ :: Rank1 v => v -> Int -> Word64 +{- +rank1_ :: Rank1 v => v -> Word64 -> Word64 rank1_ s i | i <= 0 = 0 - | otherwise = rank1 s (fromIntegral i) + | otherwise = rank1 s i -rank0_ :: Rank0 v => v -> Int -> Word64 +rank0_ :: Rank0 v => v -> Word64 -> Word64 rank0_ s i | i <= 0 = 0 - | otherwise = rank0 s (fromIntegral i) - -access :: Rank1 v => v -> Int -> Bool -access s i = toEnum $ fromIntegral $ rank1_ s i - rank1_ s (i - 1) + | otherwise = rank0 s i +-} --- Compute how many bits the shape index takes up --- We use findClose on the first paren to tell us where the last meaningful paren is -shapeBitCount :: Blob -> Int -shapeBitCount (Blob _ s _) = case findClose s 1 of - Just n -> fromIntegral n - Nothing -> 0 +access :: Rank1 v => v -> Word64 -> Word64 +access s 1 = rank1 s 1 +access s n = rank1 s n - rank1 s (n - 1) --- Compute how many bytes the content takes up -contentByteCount :: Blob -> Int -contentByteCount (Blob _ _ c) = Strict.length c - --- Compute how many bits are non-garbage in our meta index -metaBitCount :: Blob -> Int -metaBitCount b = contentByteCount b + shapeBitCount b +as :: Rank1 v => a -> a -> v -> Word64 -> a +as l r s i = case access s i of + 0 -> l + _ -> r -- Print out a string of S's and D's, corresponding to Shape or Data, from the meta index inspectMeta :: Blob -> String -inspectMeta b@(Blob m _ _) = do - i <- [1..(metaBitCount b)] - case access m i of - True -> "S" - False -> "D" +inspectMeta (Blob n m _ _) = as 'D' 'S' m <$> [1..n] -- Print out the balanced parentheses representation of our shape index inspectShape :: Blob -> String -inspectShape b@(Blob _ s _) = do - i <- [1..(shapeBitCount b)] - case access s i of - True -> "(" - False -> ")" +inspectShape (Blob n m s _) = as ')' '(' s <$> [1..rank1 m n] -- Print out our raw content buffer --- Can't figure out how to print strict bytestrings nicely... inspectContent :: Blob -> String -inspectContent (Blob _ _ _) = undefined +inspectContent (Blob _ _ _ c) = show c -- Print out a representation of the entire blob, interleaving shape and content inspectBlob :: Blob -> String -inspectBlob b@(Blob m s c) = do - i <- [1..(metaBitCount b)] +inspectBlob (Blob n m s c) = do + i <- [1..n] case access m i of - True -> case access s (fromIntegral $ rank1_ m i) of - True -> "(" - False -> ")" - False -> "{" ++ show (Strict.index c $ (fromIntegral $ rank0_ m i) - 1) ++ "}" \ No newline at end of file + 0 -> '{' : shows (Strict.index c $ fromIntegral $ rank0 m i - 1) "}" + _ -> [as ')' '(' s $ rank1 m i] diff --git a/src/Data/Binary/Succinct/Generics.hs b/src/Data/Binary/Succinct/Generics.hs new file mode 100644 index 0000000..e3f45d3 --- /dev/null +++ b/src/Data/Binary/Succinct/Generics.hs @@ -0,0 +1,228 @@ +{-# language GADTs #-} +{-# language PolyKinds #-} +{-# language DataKinds #-} +{-# language ConstraintKinds #-} +{-# language KindSignatures #-} +{-# language TypeOperators #-} +{-# language FlexibleContexts #-} +{-# language FunctionalDependencies #-} +{-# language FlexibleInstances #-} +{-# language MultiParamTypeClasses #-} +{-# language TypeApplications #-} +{-# language ScopedTypeVariables #-} +{-# language AllowAmbiguousTypes #-} +{-# language UndecidableInstances #-} +{-# language PatternSynonyms #-} + +module Data.Binary.Succinct.Generics + ( + -- * API + Shape(..), Shaped, shape + , SBool(..) + , SDecidedStrictness(..) + -- * Annotations + , Annotation(..) + , ShowAnn(..) + , Unannotated(..) + , GenType(..) + -- * Implementation + , GShape(..) + , ReifiedBool(..) + , GShaped(..) + , ReifiedDecidedStrictness(..) + ) where + +import GHC.Generics +import GHC.Types +import Data.Proxy + +-------------------------------------------------------------------------------- +-- * Reified Bools +-------------------------------------------------------------------------------- + +data SBool (t :: Bool) where + STrue :: SBool 'True + SFalse :: SBool 'False + +instance Show (SBool t) where + showsPrec _ STrue = showString "STrue" + showsPrec _ SFalse = showString "SFalse" + +class ReifiedBool (t :: Bool) where + reflectBool :: SBool t + +instance ReifiedBool 'True where + reflectBool = STrue + +instance ReifiedBool 'False where + reflectBool = SFalse + +-------------------------------------------------------------------------------- +-- * Reified Strictness +-------------------------------------------------------------------------------- + +data SDecidedStrictness (t :: DecidedStrictness) where + SDecidedLazy :: SDecidedStrictness 'DecidedLazy + SDecidedStrict :: SDecidedStrictness 'DecidedStrict + SDecidedUnpack :: SDecidedStrictness 'DecidedUnpack + +instance Show (SDecidedStrictness t) where + showsPrec d SDecidedLazy = showString "SDecidedLazy" + showsPrec d SDecidedStrict = showString "SDecidedStrict" + showsPrec d SDecidedUnpack = showString "SDecidedUnpack" + +class ReifiedDecidedStrictness (t :: DecidedStrictness) where + reflectDecidedStrictness :: SDecidedStrictness t + +instance ReifiedDecidedStrictness DecidedLazy where + reflectDecidedStrictness = SDecidedLazy + +instance ReifiedDecidedStrictness DecidedStrict where + reflectDecidedStrictness = SDecidedStrict + +instance ReifiedDecidedStrictness DecidedUnpack where + reflectDecidedStrictness = SDecidedUnpack + +-------------------------------------------------------------------------------- +-- * Reified Strictness +-------------------------------------------------------------------------------- + +data GenType = Ty | Constructors | Fields | Field + +data GShape ann (ty :: GenType) p t where + Type :: ann 'Ty t -> SBool nt -> GShape ann 'Constructors p t -> GShape ann 'Ty p (M1 D ('MetaData dc mdl pkg nt) t) + V :: GShape ann 'Constructors p V1 + S :: GShape ann 'Constructors p l -> GShape ann 'Constructors p r -> GShape ann 'Constructors p (l :+: r) + Con :: ann 'Constructors t -> GShape ann 'Fields p t -> GShape ann 'Constructors p (M1 C ci t) + P :: GShape ann 'Fields p l -> GShape ann 'Fields p r -> GShape ann 'Fields p (l :*: r) + Sel :: ann 'Fields t -> SDecidedStrictness ds -> GShape ann 'Field p t -> GShape ann 'Fields p (M1 S ('MetaSel fn su ss ds) t) + U :: GShape ann 'Fields p U1 + K :: p c => Proxy c -> GShape ann 'Field p (K1 i c) + +-------------------------------------------------------------------------------- +-- * ShowAnn +-------------------------------------------------------------------------------- + +class ShowAnn (ann :: GenType -> (* -> *) -> *) where + showsPrecAnn :: Int -> ann ty t -> ShowS + +instance ShowAnn ann => Show (GShape ann ty p t) where + showsPrec d (Type a nt cs) = showParen (d > 10) $ + showString "Type " . showsPrecAnn 11 a . showChar ' ' . showsPrec 11 nt . showChar ' ' . showsPrec 11 cs + showsPrec d (S l r) = showParen (d > 10) $ + showString "S " . showsPrec 11 l . showChar ' ' . showsPrec 11 r + showsPrec d (Con a b) = showParen (d > 10) $ + showString "Con " . showsPrecAnn 11 a . showChar ' ' . showsPrec 11 b + showsPrec _ V = showChar 'V' + showsPrec d (P l r) = showParen (d > 10) $ + showString "P " . showsPrec 11 l . showChar ' ' . showsPrec 11 r + showsPrec d (Sel a s b) = showParen (d > 10) $ showString "Sel " . showsPrecAnn 11 a . showChar ' ' . showsPrec 11 s . showChar ' ' . showsPrec 11 b + showsPrec _ U = showChar 'U' + showsPrec d (K Proxy) = showParen (d > 10) $ showString "K Proxy" + +-------------------------------------------------------------------------------- +-- * Annotations +-------------------------------------------------------------------------------- + +class Annotation (ann :: GenType -> (* -> *) -> *) (p :: * -> Constraint) where + typeAnn :: SBool nt -> GShape ann 'Constructors p t -> ann 'Ty t + conAnn :: GShape ann 'Fields p t -> ann 'Constructors t + selAnn :: SDecidedStrictness ds -> GShape ann 'Field p t -> ann 'Fields t + +-------------------------------------------------------------------------------- +-- * Smart Constructors +-------------------------------------------------------------------------------- + +-- smart constructor for ignoring annotations +pattern Type_ + :: Annotation ann p + => () + => SBool nt + -> GShape ann 'Constructors p t + -> GShape ann 'Ty p (M1 D ('MetaData dc mdl pkg nt) t) +pattern Type_ nt cs <- Type _a nt cs where + Type_ nt cs = Type (typeAnn nt cs) nt cs + +pattern Con_ + :: Annotation ann p + => () + => GShape ann 'Fields p t + -> GShape ann 'Constructors p (M1 C ci t) +pattern Con_ fs <- Con _a fs where + Con_ fs = Con (conAnn fs) fs + +pattern Sel_ + :: Annotation ann p + => () + => SDecidedStrictness ds + -> GShape ann 'Field p t + -> GShape ann 'Fields p (M1 S ('MetaSel fn su ss ds) t) +pattern Sel_ ds f <- Sel _a ds f where + Sel_ ds f = Sel (selAnn ds f) ds f + +instance Eq (GShape ann ty p t) where + _ == _ = True + +instance Ord (GShape ann ty p t) where + compare _ _ = EQ + +-------------------------------------------------------------------------------- +-- * Shape Reflection +-------------------------------------------------------------------------------- + +shape :: forall p a ann. (Shaped p a, Annotation ann p) => Shape ann p a +shape = Shape $ gshape @('Ty) @p @(Rep a) + +newtype Shape ann p a = Shape { runShape :: GShape ann 'Ty p (Rep a) } + deriving Show + +instance Eq (Shape ann p a) where + _ == _ = True + +instance Ord (Shape ann p a) where + compare _ _ = EQ + +class (Generic a, GShaped 'Ty p (Rep a)) => Shaped p a +instance (Generic a, GShaped 'Ty p (Rep a)) => Shaped p a + +-------------------------------------------------------------------------------- +-- * Generic Shape Reflection +-------------------------------------------------------------------------------- + +class GShaped ty p t | t -> ty where + gshape :: Annotation ann p => GShape ann ty p t + +instance (ReifiedBool nt, GShaped Constructors p t) => GShaped Ty p (M1 D ('MetaData dc md pkg nt) t) where + gshape = Type_ reflectBool (gshape @_ @p) + +instance (GShaped Constructors p l, GShaped Constructors p r) => GShaped Constructors p (l :+: r) where + gshape = S (gshape @_ @p) (gshape @_ @p) + +instance GShaped Fields p t => GShaped Constructors p (M1 C ci t) where + gshape = Con_ (gshape @_ @p) + +instance (GShaped Fields p l, GShaped Fields p r) => GShaped Fields p (l :*: r) where + gshape = P (gshape @_ @p) (gshape @_ @p) + +instance (ReifiedDecidedStrictness ds, GShaped Field p t) => GShaped Fields p (M1 S ('MetaSel fn su ss ds) t) where + gshape = Sel_ reflectDecidedStrictness (gshape @_ @p) where + +instance GShaped Fields p U1 where + gshape = U + +instance p c => GShaped Field p (K1 i c) where + gshape = K Proxy + +-------------------------------------------------------------------------------- +-- * Unnannotated +-------------------------------------------------------------------------------- + +data Unannotated (ty :: GenType) (t :: * -> *) = Unannotated + +instance ShowAnn Unannotated where + showsPrecAnn _ Unannotated = showString "Unannotated" + +instance Annotation Unannotated p where + typeAnn _ _ = Unannotated + conAnn _ = Unannotated + selAnn _ _ = Unannotated diff --git a/src/Data/Binary/Succinct/Get.hs b/src/Data/Binary/Succinct/Get.hs index c042c4a..be50246 100644 --- a/src/Data/Binary/Succinct/Get.hs +++ b/src/Data/Binary/Succinct/Get.hs @@ -8,16 +8,23 @@ {-# language ScopedTypeVariables #-} {-# language TypeApplications #-} {-# language TypeOperators #-} +{-# options_ghc -Wno-all #-} -- shut up for now module Data.Binary.Succinct.Get ( Get(..) - , get8 +{- , Gettable(..) + -- guts , liftGet + , get8 + , rest + , focused +-} ) where import Control.Monad (ap) import Data.Binary.Succinct.Blob import Data.ByteString as Strict +import Data.ByteString.UTF8 as UTF8 import Data.Functor.Compose as F import Data.Functor.Product as F import Data.Functor.Sum as F @@ -34,6 +41,7 @@ import qualified Generics.SOP.GGP as SOP import HaskellWorks.Data.BalancedParens.RangeMinMax as BP import HaskellWorks.Data.BalancedParens.BalancedParens as BP +import HaskellWorks.Data.BalancedParens as BP import HaskellWorks.Data.RankSelect.Base.Rank0 import HaskellWorks.Data.RankSelect.Base.Rank1 import HaskellWorks.Data.RankSelect.Base.Select1 @@ -41,12 +49,30 @@ import HaskellWorks.Data.RankSelect.Base.Select1 newtype Get a = Get { runGet :: Blob -> Word64 -> a } deriving Functor +-- () -> -- no bytes +-- (a,b) -> (a)b -- storable optimizations would let us skip the parens if either were fixed sized? +-- (a,b,c) -> (a)(b)c +-- Left a -> 0a +-- Right b -> 1b +-- Nothing -> 0 +-- Just a -> 1a +-- [] -> 0 +-- (a:as) -> 1(a)as +-- ("hello",65) -> (1(h)1(e)1(l)1(l)1(o)0)65 +-- +-- (1h1e1l1l1o0)65 with storable optimizations? +-- +-- storable optimizations would let us skip the parens around the child entirely +-- for things that store as fixed size + instance Applicative Get where pure a = Get $ \_ _ -> a (<*>) = ap instance Monad Get where - m >>= k = Get $ \e s -> runGet (k (runGet m e s)) e s + m >>= k = Get $ \e i -> runGet (k (runGet m e i)) e i + +{- shapely :: (RangeMinMax (Storable.Vector Word64) -> Word64 -> Maybe Word64) @@ -75,18 +101,40 @@ liftGet g = Get $ \(Blob meta _ content) i -> Left e -> error e Right a -> a +rest :: Get ByteString +rest = Get $ \(Blob meta _ content) i -> Strict.drop (fromIntegral $ rank0 meta i) content + +focused :: Get ByteString +focused = Get $ \ (Blob meta shape content) i -> let + j = rank0 meta i + ending = Strict.drop (fromIntegral j) content + in case rank0 meta . select1 meta <$> findClose shape (i - j) of + Just k -> Strict.take (fromIntegral $ k - j) ending + Nothing -> ending + -------------------------------------------------------------------------------- -- * Gettable -------------------------------------------------------------------------------- +{- + general case: + * Store nth data constructor @Foo a b c@ as @(n(a)(b)(c))@ + + optimizations: + * if there is only one field, this doesn't store the child parens: @Just a -> (1a)@ + * if there is no field, no child parens at all. @Nothing -> (0)@ + * if there is only one constructor, don't store the tag: @((a,b)) -> (a(b))@ + + furter optimizations possible: + * if all parts have known size, no parens at all (including parent parens) +-} + class Gettable a where get :: Get a - default get :: (G.Generic a, SOP.GTo a, SOP.All2 Gettable (SOP.GCode a)) - => Get a + default get :: (G.Generic a, SOP.GTo a, SOP.All2 Gettable (SOP.GCode a)) => Get a get = gget -gget :: forall a. (G.Generic a, SOP.GTo a, SOP.All2 Gettable (SOP.GCode a)) - => Get a +gget :: forall a. (G.Generic a, SOP.GTo a, SOP.All2 Gettable (SOP.GCode a)) => Get a gget = case SOP.shape :: SOP.Shape (SOP.GCode a) of SOP.ShapeCons SOP.ShapeNil -> SOP.gto . SOP.SOP . SOP.Z <$> move firstChild (products SOP.shape) @@ -114,9 +162,16 @@ gget = case SOP.shape :: SOP.Shape (SOP.GCode a) of return $ SOP.I a SOP.:* as instance Gettable () + instance Gettable Word8 where get = get8 +instance Gettable Char where + get = tweak <$> focused where + tweak bs = case decode bs of + Just (c,_) -> c + Nothing -> error "bad input" + instance Gettable Word16 where get = liftGet S.getWord16le instance Gettable Word32 where get = liftGet S.getWord32le instance Gettable Word64 where get = liftGet S.getWord64le @@ -124,6 +179,7 @@ instance Gettable Int8 where get = liftGet S.getInt8 instance Gettable Int16 where get = liftGet S.getInt16le instance Gettable Int32 where get = liftGet S.getInt32le instance Gettable Int64 where get = liftGet S.getInt64le +-- TODO: Gettable Integer, Gettable Int? instance Gettable (Proxy a) instance Gettable a => Gettable (Maybe a) @@ -133,3 +189,5 @@ instance (Gettable a, Gettable b) => Gettable (Either a b) instance Gettable (f (g a)) => Gettable (F.Compose f g a) instance (Gettable (f a), Gettable (g a)) => Gettable (F.Product f g a) instance (Gettable (f a), Gettable (g a)) => Gettable (F.Sum f g a) + +-} diff --git a/src/Data/Binary/Succinct/Put.hs b/src/Data/Binary/Succinct/Put.hs index ae729ba..b19b021 100644 --- a/src/Data/Binary/Succinct/Put.hs +++ b/src/Data/Binary/Succinct/Put.hs @@ -3,6 +3,9 @@ {-# language FlexibleContexts #-} {-# language BangPatterns #-} {-# language EmptyCase #-} +{-# language AllowAmbiguousTypes #-} +{-# language TypeApplications #-} +{-# language ScopedTypeVariables #-} {-# language GADTs #-} {-# language TypeOperators #-} {-# options_ghc -funbox-strict-fields #-} @@ -21,16 +24,16 @@ module Data.Binary.Succinct.Put {- .Internal -} , gput ) where -import Control.Monad (ap, replicateM_) +import Control.Monad (ap) import Data.Bits -import Data.Bits.Coding -import Data.Bytes.Put +import Data.Semigroup import Data.ByteString as Strict -import Data.ByteString.Builder +import Data.ByteString.Builder as Builder +import Data.ByteString.UTF8 as UTF8 import Data.Int import Data.Proxy import qualified Data.Serialize.Put as S --- import Data.Void +import Data.Void import Data.Word import qualified GHC.Generics as G import Data.Functor.Compose as F @@ -39,25 +42,16 @@ import Data.Functor.Sum as F import qualified Generics.SOP as SOP import qualified Generics.SOP.GGP as SOP -putLSB :: MonadPut m => Bool -> Coding m () -putLSB v = Coding $ \k i b -> - if i == 7 - then do - putWord8 (pushBit b i v) - k () 0 0 - else (k () $! i + 1) $! pushBit b i v - where - pushBit w i False = clearBit w i - pushBit w i True = setBit w i +import Data.Binary.Succinct.Size data S = S !Int !Word8 !Int !Word8 -data W = W !Builder !Builder !Builder +data W = W !Builder !Builder !Builder !Word64 instance Semigroup W where - W a b c <> W d e f = W (a <> d) (b <> e) (c <> f) + W a b c n <> W d e f m = W (a <> d) (b <> e) (c <> f) (n + m) instance Monoid W where - mempty = W mempty mempty mempty + mempty = W mempty mempty mempty 0 mappend = (<>) data Result a = Result a {-# UNPACK #-} !S {-# UNPACK #-} !W @@ -77,116 +71,160 @@ instance Monad PutM where Result a s' w -> case unPutM (f a) s' of Result b s'' w' -> Result b s'' (w <> w') --- your job is to properly deal with managing meta, shape and content coherently - -meta :: Coding S.PutM a -> PutM a -meta m = PutM $ \(S o1 d1 o2 d2) -> case S.runPutMBuilder (runCoding m go o1 d1) of - ((a,o1',d1'), builder) -> Result a (S o1' d1' o2 d2) (W builder mempty mempty) - where - go :: a -> Int -> Word8 -> S.PutM (a, Int, Word8) - go a o1' d1' = pure (a, o1', d1') - -shape :: Coding S.PutM a -> PutM a -shape m = PutM $ \(S o1 d1 o2 d2) -> case S.runPutMBuilder (runCoding m go o2 d2) of - ((a, o2', d2'), b) -> Result a (S o1 d1 o2' d2') (W mempty b mempty) - where - go :: a -> Int -> Word8 -> S.PutM (a, Int, Word8) - go a o2' d2' = pure (a, o2', d2') +push :: Bool -> Int -> Word8 -> (Builder, Int, Word8) +push v i b + | i == 7 = (Builder.word8 b', 0, 0) + | otherwise = (mempty, i + 1, b') + where b' = if v then setBit b i else b +{-# INLINE push #-} + +meta :: Bool -> PutM () +meta v = PutM $ \(S i b j c) -> case push v i b of + (m,i',b') -> Result () (S i' b' j c) $ W m mempty mempty 1 + +shape :: Bool -> PutM () +shape v = PutM $ \(S i b j c) -> case push v j c of + (s,j',c') -> case push True i b of + (m, i', b') -> Result () (S i' b' j' c') $ W m s mempty 1 + +-- push a run of 0s into the meta buffer +metas :: Int -> PutM () +metas k + | k <= 0 = pure () + | otherwise = PutM $ \(S i b j c) -> case divMod (i + k) 8 of + (0,r) -> Result () (S r b j c) $ W mempty mempty mempty (fromIntegral k) + (q,r) -> Result () (S r 0 j c) $ + W (Builder.word8 b <> stimesMonoid (q-1) (Builder.word8 0)) + mempty + mempty + (fromIntegral k) -- should this log how much is put and just automatically scribble into shape? -- PutM doesn't give us enough info to do that efficiently. content :: S.PutM a -> PutM a content m = PutM $ \s -> case S.runPutMBuilder m of - (a, b) -> Result a s (W mempty mempty b) - -{- - meta - 11110000110000111100000011 poppy, compact, not succinct - / \ - content shape - #1 #2 #2 'h' 'i' (((()()))()) - 000010111011 --} + (a, b) -> Result a s (W mempty mempty b 0) -putParen :: Bool -> Put -putParen p = do - meta $ putLSB True - shape $ putLSB p +-- meta +-- 11110000110000111100000011 poppy, compact, not succinct +-- / \ +-- content shape +-- #1 #2 #2 'h' 'i' (((()()))()) +-- 000010111011 putParens :: Put -> Put -putParens p = putParen True *> p <* putParen False +putParens p = shape True *> p <* shape False + +putBS :: ByteString -> Put +putBS bs = putN (Strict.length bs) (S.putByteString bs) put8 :: Word8 -> Put -put8 w = meta (putLSB False) *> content (putWord8 w) +put8 w = meta False *> content (S.putWord8 w) putN :: Int -> S.Put -> Put --- TODO: replace that replicateM_ !!! -putN i w = meta (replicateM_ i $ putLSB False) *> content w +putN i w = metas i *> content w putN_ :: S.Put -> Put --- TODO: replace that replicateM_ !!! -putN_ w = meta (replicateM_ n $ putLSB False) *> content (S.putByteString bs) - where - bs = S.runPut w - n = Strict.length bs +putN_ w = putN (Strict.length bs) (S.putByteString bs) where + bs = S.runPut w -------------------------------------------------------------------------------- -- * Puttable -------------------------------------------------------------------------------- -class Puttable a where +class Sized a => Puttable a where put :: a -> Put default put :: (G.Generic a, SOP.GFrom a, SOP.All2 Puttable (SOP.GCode a)) => a -> Put put = gput +{- +(((a,b),c),d) -- (((a)b)c)d +((a,b),(c,d)) -- ((a)b)(c)d +(a,(b,(c,d))) -- (a)(b)(c)d + +-- with b and c of known size: +(((a,b),c),d) -- (((a)b)c)d +((a,b),(c,d)) -- ((a)b)cd +(a,(b,(c,d))) -- (a)bcd +-} + gput :: (G.Generic a, SOP.GFrom a, SOP.All2 Puttable (SOP.GCode a)) => a -> Put gput xs0 = case SOP.lengthSList sop of + -- skip storing the data constructor when we have only one constructor + -- TODO: skip when we only have one _possible_ constructor (skip size=Any constructors) 1 -> case sop of - SOP.SOP (SOP.Z xs) -> putParens (products xs) + SOP.Z xs -> products xs _ -> error "the impossible happened" _ -> sums 0 sop where - sop = SOP.gfrom xs0 + SOP.SOP sop = SOP.gfrom xs0 - sums :: SOP.All2 Puttable xss => Word8 -> SOP.SOP SOP.I xss -> Put - sums !acc (SOP.SOP (SOP.Z xs)) = putParens (put8 acc *> products xs) - sums acc (SOP.SOP (SOP.S xss)) = sums (acc + 1) (SOP.SOP xss) + sums :: SOP.All2 Puttable xss => Word8 -> SOP.NS (SOP.NP SOP.I) xss -> Put + sums !acc (SOP.Z xs) = put8 acc *> products xs + sums acc (SOP.S xss) = sums (acc + 1) xss products :: SOP.All Puttable xs => SOP.NP SOP.I xs -> Put products SOP.Nil = pure () - products (SOP.I x SOP.:* xs) = putParens (put x) *> products xs + products (SOP.I x SOP.:* xs) = products1 x xs -instance Puttable Word64 where - put = putN 8 . S.putWord64le + -- the last field is written without parens + -- TODO: the last variable width field should be written without parens + products1 :: (Puttable x, SOP.All Puttable xs) => x -> SOP.NP SOP.I xs -> Put + products1 x SOP.Nil = put x + products1 x (SOP.I y SOP.:* ys) = putWithParens x *> products1 y ys -instance Puttable Word32 where - put = putN 4 . S.putWord32le +-- TODO: skip parens on fields from left to right when the field itself is strict all the way down +-- and can have its length calculated from its contents? +putWithParens :: forall a. Puttable a => a -> Put +putWithParens = case size @a of + SVariable -> putParens . put + _ -> put -instance Puttable Word16 where - put = putN 2 . S.putWord16le +-- (String,Int64,String) +-- ((a)bc) instance Puttable Word8 where put = putN 1 . S.putWord8 -instance Puttable Int64 where - put = putN 8 . S.putInt64le +instance Puttable Word16 where + put = putN 2 . S.putWord16le -instance Puttable Int32 where - put = putN 4 . S.putInt32le +instance Puttable Word32 where + put = putN 4 . S.putWord32le -instance Puttable Int16 where - put = putN 2 . S.putInt16le +instance Puttable Word64 where + put = putN 8 . S.putWord64le instance Puttable Int8 where put = putN 1 . S.putInt8 --- instance Puttable Void -- TODO: fix GSumFrom in Generics.SOP to allow V1 -instance Puttable () -instance Puttable (Proxy a) +instance Puttable Int16 where + put = putN 2 . S.putInt16le + +instance Puttable Int32 where + put = putN 4 . S.putInt32le + +instance Puttable Int64 where + put = putN 8 . S.putInt64le + +instance Puttable Char where + put = putBS . UTF8.fromString . pure + +instance Puttable Bool +instance (Puttable a, Puttable b) => Puttable (Either a b) instance Puttable a => Puttable (Maybe a) +instance Puttable Ordering +instance Puttable (Proxy a) +--instance Puttable Void instance Puttable a => Puttable [a] +instance Puttable () instance (Puttable a, Puttable b) => Puttable (a, b) -instance (Puttable a, Puttable b) => Puttable (Either a b) -instance Puttable (f (g a)) => Puttable (Compose f g a) +instance (Puttable a, Puttable b, Puttable c) => Puttable (a, b, c) +instance (Puttable a, Puttable b, Puttable c, Puttable d) => Puttable (a, b, c, d) +instance (Puttable a, Puttable b, Puttable c, Puttable d, Puttable e) => Puttable (a, b, c, d, e) +instance (Puttable a, Puttable b, Puttable c, Puttable d, Puttable e, Puttable f) => Puttable (a, b, c, d, e, f) +instance (Puttable a, Puttable b, Puttable c, Puttable d, Puttable e, Puttable f, Puttable g) => Puttable (a, b, c, d, e, f, g) + +instance Puttable (f (g a)) => Puttable (F.Compose f g a) instance (Puttable (f a), Puttable (g a)) => Puttable (F.Product f g a) instance (Puttable (f a), Puttable (g a)) => Puttable (F.Sum f g a) diff --git a/src/Data/Binary/Succinct/Size.hs b/src/Data/Binary/Succinct/Size.hs new file mode 100644 index 0000000..df30369 --- /dev/null +++ b/src/Data/Binary/Succinct/Size.hs @@ -0,0 +1,162 @@ +{-# Language AllowAmbiguousTypes #-} +{-# Language TypeApplications #-} +{-# Language ScopedTypeVariables #-} +{-# Language DefaultSignatures #-} +{-# Language FlexibleContexts #-} +{-# Language StandaloneDeriving #-} +{-# Language MonoLocalBinds #-} +{-# Language GADTs #-} +{-# Language DataKinds #-} +{-# Language TypeOperators #-} +{-# Language PolyKinds #-} +{-# Language TypeFamilies #-} +{-# Language UndecidableInstances #-} + +module Data.Binary.Succinct.Size + ( Sized(..) + , Size(..) + , gsize + , Sing(SAny, SVariable, SExactly) + , (/\), type (/\) + , (\/), type (\/) + , Pad(..) + ) where + +import Data.Functor.Compose as F +import Data.Functor.Product as F +import Data.Functor.Sum as F +import Data.Int +import Data.Proxy +import Data.Singletons +import Data.Singletons.TypeLits +import Data.Singletons.Prelude.Bool +import Data.Singletons.Prelude.Eq +import Data.Singletons.Prelude.Num +import qualified Data.Type.Equality as DTE +import Data.Void +import Data.Word +import Generics.SOP hiding (sing, Sing, SingI) +import Generics.SOP.GGP +import qualified GHC.Generics as GHC +import GHC.TypeNats (Nat) + +data Size = Any | Variable | Exactly Nat -- kind + +data instance Sing (s :: Size) where + SAny :: Sing 'Any + SVariable :: Sing 'Variable + SExactly :: Sing n -> Sing ('Exactly n) +deriving instance Show (Sing (s :: Size)) + +instance SingI 'Any where sing = SAny +instance SingI 'Variable where sing = SVariable +instance KnownNat n => SingI ('Exactly n) where sing = SExactly SNat + +class Sized (a :: *) where + type SizeOf a :: Size + type SizeOf a = GSizeOf (GCode a) + size :: Sing (SizeOf a) + default size :: (GHC.Generic a, GFrom a, All2 Sized (GCode a), SizeOf a ~ GSizeOf (GCode a)) => Sing (SizeOf a) + size = gsize @(GCode a) + +-- @((\/), Any)@ is a monoid +type family (\/) (a :: Size) (b :: Size) :: Size where + 'Any \/ x = x + x \/ 'Any = x + 'Variable \/ _ = 'Variable + _ \/ 'Variable = 'Variable + 'Exactly i \/ 'Exactly j = If (i DTE.== j) ('Exactly i) 'Variable + +(\/) :: Sing a -> Sing b -> Sing (a \/ b) +SAny \/ x = x +x \/ SAny = x +SVariable \/ _ = SVariable +_ \/ SVariable = SVariable +SExactly p \/ SExactly q = sIf (p %== q) (SExactly p) SVariable +infixr 2 \/ + + +-- @((/\), Exactly 0)@ is a monoid +type family (/\) (a :: Size) (b :: Size) :: Size where + 'Any /\ _ = 'Any + _ /\ 'Any = 'Any + 'Variable /\ _ = 'Variable + _ /\ 'Variable = 'Variable + 'Exactly i /\ 'Exactly j = 'Exactly (i + j) + +(/\) :: Sing a -> Sing b -> Sing (a /\ b) +SAny /\ _ = SAny +_ /\ SAny = SAny +SVariable /\ SVariable = SVariable +SVariable /\ SExactly _ = SVariable +SExactly _ /\ SVariable = SVariable +SExactly p /\ SExactly q = SExactly (p %+ q) +infixr 3 /\ + +-- If the sum is not unary, add a byte for the tag +type family GSizeOf (xss :: [[*]]) :: Size where + GSizeOf '[xs] = GSizeOfProduct xs + GSizeOf '[] = 'Any + GSizeOf (xs ': xs' ': xss) = 'Exactly 1 /\ GSizeOfSum (xs ': xs' ': xss) + +type family GSizeOfSum (a :: [[*]]) :: Size where + GSizeOfSum '[] = 'Any + GSizeOfSum (xs ': xss) = GSizeOfProduct xs \/ GSizeOfSum xss + +type family GSizeOfProduct (a :: [*]) :: Size where + GSizeOfProduct '[] = 'Exactly 0 + GSizeOfProduct (x ': xs) = SizeOf x /\ GSizeOfProduct xs + +newtype Sz a = Sz (Sing (SizeOf a)) + +gsize :: forall xss. All2 Sized xss => Sing (GSizeOf xss) +gsize = gsizeOf @xss $ unPOP $ hcpure (Proxy @Sized) ksize where + ksize :: forall x. Sized x => Sz x + ksize = Sz (size @x) + + gsizeOf :: NP (NP Sz) yss -> Sing (GSizeOf yss) + gsizeOf (np :* Nil) = gsizeOfProduct np + gsizeOf Nil = SAny + gsizeOf (np :* np' :* nps) = SExactly (SNat @1) /\ gsizeOfSum (np :* np' :* nps) + + gsizeOfSum :: NP (NP Sz) yss -> Sing (GSizeOfSum yss) + gsizeOfSum Nil = SAny + gsizeOfSum (np :* nps) = gsizeOfProduct np \/ gsizeOfSum nps + + gsizeOfProduct :: NP Sz ys -> Sing (GSizeOfProduct ys) + gsizeOfProduct Nil = SExactly (SNat @0) + gsizeOfProduct (Sz sz :* szs) = sz /\ gsizeOfProduct szs + +data Pad (n :: Nat) = Pad + +instance KnownNat n => Sized (Pad n) where type SizeOf (Pad n) = 'Exactly n; size = sing + +instance Sized Word8 where type SizeOf Word8 = 'Exactly 1; size = sing +instance Sized Word16 where type SizeOf Word16 = 'Exactly 2; size = sing +instance Sized Word32 where type SizeOf Word32 = 'Exactly 4; size = sing +instance Sized Word64 where type SizeOf Word64 = 'Exactly 8; size = sing +instance Sized Int8 where type SizeOf Int8 = 'Exactly 1; size = sing +instance Sized Int16 where type SizeOf Int16 = 'Exactly 2; size = sing +instance Sized Int32 where type SizeOf Int32 = 'Exactly 4; size = sing +instance Sized Int64 where type SizeOf Int64 = 'Exactly 8; size = sing + +instance Sized [a] where type SizeOf [a] = 'Variable; size = sing +instance Sized Char where type SizeOf Char = 'Variable; size = sing + +instance Sized Bool +instance (Sized a, Sized b) => Sized (Either a b) +instance Sized a => Sized (Maybe a) +instance Sized Ordering +instance Sized (Proxy a) +instance Sized Void +instance Sized () +instance (Sized a, Sized b) => Sized (a, b) +instance (Sized a, Sized b, Sized c) => Sized (a, b, c) +instance (Sized a, Sized b, Sized c, Sized d) => Sized (a, b, c, d) +instance (Sized a, Sized b, Sized c, Sized d, Sized e) => Sized (a, b, c, d, e) +instance (Sized a, Sized b, Sized c, Sized d, Sized e, Sized f) => Sized (a, b, c, d, e, f) +instance (Sized a, Sized b, Sized c, Sized d, Sized e, Sized f, Sized g) => Sized (a, b, c, d, e, f, g) + +instance Sized (f (g a)) => Sized (F.Compose f g a) +instance (Sized (f a), Sized (g a)) => Sized (F.Product f g a) +instance (Sized (f a), Sized (g a)) => Sized (F.Sum f g a) diff --git a/succinct-binary.cabal b/succinct-binary.cabal index 428e239..91b835f 100644 --- a/succinct-binary.cabal +++ b/succinct-binary.cabal @@ -24,9 +24,7 @@ source-repository head library build-depends: - base >= 4 && < 5, - bits, - bytes, + base >= 4 && < 5, bytestring, cereal >= 0.5.7, contravariant, @@ -34,7 +32,9 @@ library hw-rankselect >= 0.12.0.4, hw-rankselect-base, hw-balancedparens, + singletons >= 2.4, spool, + utf8-string, vector hs-source-dirs: src @@ -44,5 +44,6 @@ library Data.Binary.Succinct.Get Data.Binary.Succinct.Orphans Data.Binary.Succinct.Put + Data.Binary.Succinct.Size ghc-options: -Wall