diff --git a/Cargo.lock b/Cargo.lock index 736f2b1461..e5a2f4c983 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,22 +116,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -161,8 +161,8 @@ dependencies = [ "serde_bytes", "serde_json", "snap", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror 2.0.17", "uuid", "xz2", @@ -187,8 +187,8 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror 2.0.17", "uuid", "zstd", @@ -223,9 +223,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ "arrow-arith", "arrow-array", @@ -244,23 +244,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -269,30 +269,34 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", - "num", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -301,15 +305,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ "arrow-array", "arrow-cast", @@ -322,21 +326,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ "arrow-array", "arrow-buffer", @@ -350,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,20 +366,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ "arrow-array", "arrow-buffer", @@ -385,9 +392,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ "arrow-array", "arrow-buffer", @@ -398,33 +405,33 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" dependencies = [ - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ "arrow-array", "arrow-buffer", @@ -432,7 +439,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -585,7 +592,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -620,7 +627,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -646,9 +653,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.8" +version = "1.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cf2b6af2a95a20e266782b4f76f1a5e12bf412a9db2de9c1e9123b9d8c0ad8" +checksum = "a0149602eeaf915158e14029ba0c78dedb8c08d554b024d54c8f239aab46511d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -676,9 +683,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.8" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc" +checksum = "b01c9521fa01558f750d183c8c68c81b0155b9d193a4ba7f84c36bd1b6d04a06" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -688,9 +695,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.1" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" +checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" dependencies = [ "aws-lc-sys", "zeroize", @@ -698,11 +705,10 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.32.3" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" +checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" dependencies = [ - "bindgen", "cc", "cmake", "dunce", @@ -711,9 +717,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.12" +version = "1.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa006bb32360ed90ac51203feafb9d02e3d21046e1fd3a450a404b90ea73e5d" +checksum = "7ce527fb7e53ba9626fc47824f25e256250556c40d8f81d27dd92aa38239d632" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -735,9 +741,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.126.0" +version = "1.132.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9c10a11584c0b619c9e478143072c4028c39017f98534e206156a7e94188be" +checksum = "35638d8e6ef97adb7f3154ffc618bbe1d631a503b6d8328b94af77b7615fbeb2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -757,9 +763,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.41.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "761f176da526badb4c3dbd67ee1da2faf3dc1e537ed229355f7590d80595ae35" +checksum = "ce68b5d4652e6248827e472c67df8773ae6ab3946ff176de8d3ee7c295299efd" dependencies = [ "aws-credential-types", "aws-runtime", @@ -779,9 +785,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.86.0" +version = "1.90.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0abbfab841446cce6e87af853a3ba2cc1bc9afcd3f3550dd556c43d434c86d" +checksum = "4f18e53542c522459e757f81e274783a78f8c81acdfc8d1522ee8a18b5fb1c66" dependencies = [ "aws-credential-types", "aws-runtime", @@ -801,9 +807,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.89.0" +version = "1.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "695dc67bb861ccb8426c9129b91c30e266a0e3d85650cafdf62fcca14c8fd338" +checksum = "532f4d866012ffa724a4385c82e8dd0e59f0ca0e600f3f22d4c03b6824b34e4a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -823,9 +829,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.88.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30990923f4f675523c51eb1c0dec9b752fb267b36a61e83cbc219c9d86da715" +checksum = "1be6fbbfa1a57724788853a623378223fe828fc4c09b146c992f0c95b6256174" dependencies = [ "aws-credential-types", "aws-runtime", @@ -846,9 +852,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.5" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -879,15 +885,16 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.4" +version = "0.62.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" +checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -899,9 +906,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1" +checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -912,13 +919,13 @@ dependencies = [ "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-pki-types", "tokio", @@ -929,9 +936,9 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.6" +version = "0.61.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390" +checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54" dependencies = [ "aws-smithy-types", ] @@ -957,9 +964,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.3" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404" +checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -981,9 +988,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.1" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46" +checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -998,9 +1005,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.3" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457" +checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" dependencies = [ "base64-simd", "bytes", @@ -1024,18 +1031,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.11" +version = "0.60.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163" +checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.9" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2fd329bf0e901ff3f60425691410c69094dc2a1f34b331f37bfc4e9ac1565a1" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1104,26 +1111,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.108", -] - [[package]] name = "bitflags" version = "2.10.0" @@ -1220,14 +1207,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "borsh" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" dependencies = [ "borsh-derive", "cfg_aliases", @@ -1235,15 +1222,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1365,9 +1352,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.43" +version = "1.2.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" +checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a" dependencies = [ "find-msvc-tools", "jobserver", @@ -1375,15 +1362,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -1430,22 +1408,11 @@ dependencies = [ "inout", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -1453,9 +1420,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -1472,7 +1439,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1516,12 +1483,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1616,9 +1582,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" dependencies = [ "crc-catalog", ] @@ -1688,9 +1654,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1724,7 +1690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1758,7 +1724,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1772,7 +1738,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1783,7 +1749,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1794,7 +1760,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1813,12 +1779,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1829,6 +1794,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", @@ -1851,7 +1817,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "hex", "itertools 0.14.0", "log", "object_store", @@ -1859,6 +1824,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1870,9 +1836,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1885,7 +1851,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1896,9 +1861,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1908,10 +1873,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1919,16 +1885,18 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0b9c821d14e79070f42ea3a6d6618ced04d94277f0a32301918d7a022c250f" +checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" dependencies = [ "arrow", "async-trait", "aws-config", "aws-credential-types", + "chrono", "clap", "datafusion", + "datafusion-common", "dirs", "env_logger", "futures", @@ -1945,20 +1913,19 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", "apache-avro 0.20.0", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "libc", "log", "object_store", @@ -1972,9 +1939,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1983,9 +1950,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -2008,9 +1975,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -2018,47 +1983,64 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-avro" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10d40b6953ebc9099b37adfd12fde97eb73ff0cee44355c6dea64b8a4537d561" +checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" dependencies = [ "apache-avro 0.20.0", "arrow", "async-trait", "bytes", - "chrono", - "datafusion-catalog", "datafusion-common", "datafusion-datasource", - "datafusion-execution", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "num-traits", "object_store", - "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -2070,74 +2052,67 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", - "hex", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -2156,9 +2131,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -2169,7 +2144,8 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -2178,22 +2154,22 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", @@ -2211,6 +2187,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -2220,9 +2197,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -2241,9 +2218,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2254,9 +2231,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -2264,6 +2241,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -2276,9 +2254,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -2292,9 +2270,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -2310,9 +2288,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2320,20 +2298,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -2341,7 +2319,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "recursive", @@ -2351,9 +2329,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2364,9 +2342,8 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph 0.8.3", @@ -2374,9 +2351,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -2389,9 +2366,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -2403,9 +2380,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -2417,15 +2394,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -2444,7 +2420,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "parking_lot", @@ -2454,12 +2430,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -2472,35 +2447,26 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-spark" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613efb6666a7d42fcb922b90cd0daa2b25ea486d141350e5d3e86e46df28309a" +checksum = "97a8d6fed24c80dd403dcc6afec33766a599d1b72575f222237f01429b2e58ba" dependencies = [ "arrow", + "bigdecimal", "chrono", "crc32fast", "datafusion-catalog", @@ -2508,24 +2474,24 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", - "datafusion-macros", "log", + "rand 0.9.2", "sha1", "url", - "xxhash-rust", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "recursive", "regex", @@ -2534,9 +2500,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17598193dd875ca895400c51ccab1c30fceb1855220dc60aa415a4db7c95a2d7" +checksum = "a6830e357705e0d54fda6e3ce70a87c2b255197563c6463d668520cbfc1e0b7c" dependencies = [ "arrow", "async-trait", @@ -2561,14 +2527,15 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa011a3814d91a03ab655ad41bbe5e57b203b2859281af8fe2c30aebbbcc5d9" +checksum = "2505af06d103a55b4e8ded0c6aeb6c72a771948da939c0bd3f8eee67af475a9c" dependencies = [ "async-recursion", "async-trait", "chrono", "datafusion", + "half", "itertools 0.14.0", "object_store", "pbjson-types", @@ -2618,7 +2585,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2628,7 +2595,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2678,7 +2645,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2729,7 +2696,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2770,7 +2737,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2903,9 +2870,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fixedbitset" @@ -2957,6 +2924,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3073,7 +3046,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -3088,6 +3061,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -3108,9 +3087,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -3173,7 +3152,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3192,7 +3171,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3238,14 +3217,19 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hashlink" @@ -3412,9 +3396,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -3456,22 +3440,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.4.0", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56" dependencies = [ "base64 0.22.1", "bytes", @@ -3480,7 +3464,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper 1.7.0", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", @@ -3568,9 +3552,8 @@ dependencies = [ "serde_repr", "serde_with", "smol", - "strum 0.27.2", + "strum", "tempfile", - "thrift", "tokio", "typed-builder", "url", @@ -3689,7 +3672,7 @@ dependencies = [ "itertools 0.13.0", "regex", "sqlx", - "strum 0.27.2", + "strum", "tempfile", "tokio", ] @@ -3912,21 +3895,21 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] [[package]] name = "indicatif" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ "console", "portable-atomic", @@ -3969,9 +3952,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -4009,28 +3992,28 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "jiff-tzdb-platform", "log", "portable-atomic", "portable-atomic-util", - "serde", - "windows-sys 0.59.0", + "serde_core", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4060,9 +4043,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -4157,19 +4140,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" - -[[package]] -name = "libloading" -version = "0.8.9" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link", -] +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libm" @@ -4282,9 +4255,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ "twox-hash", ] @@ -4349,19 +4322,13 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990" +checksum = "0adbe6e92a6ce0fd6c4aac593fdfd3e3950b0f61b1a63aa9731eb6fd85776fa3" dependencies = [ "serde", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4406,25 +4373,26 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "mockito" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7760e0e418d9b7e5777c0374009ca4c93861b9066f18cb334a20ce50ab63aa48" +checksum = "7e0603425789b4a70fcc4ac4f5a46a566c116ee3e2a6b768dc623f7719c611de" dependencies = [ "assert-json-diff", "bytes", "colored", - "futures-util", + "futures-core", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", "log", + "pin-project-lite", "rand 0.9.2", "regex", "serde_json", @@ -4474,7 +4442,7 @@ checksum = "b40e46c845ac234bcba19db7ab252bc2778cbadd516a466d2f12b1580852d136" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4500,7 +4468,7 @@ checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4549,16 +4517,6 @@ dependencies = [ "libc", ] -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -4568,20 +4526,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -4595,11 +4539,10 @@ dependencies = [ [[package]] name = "num-bigint-dig" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" dependencies = [ - "byteorder", "lazy_static", "libm", "num-integer", @@ -4645,17 +4588,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -4666,16 +4598,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "num_enum" version = "0.7.5" @@ -4695,7 +4617,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4722,12 +4644,12 @@ dependencies = [ "http 1.4.0", "http-body-util", "humantime", - "hyper 1.7.0", + "hyper 1.8.1", "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "rand 0.9.2", "reqwest", "ring", @@ -4775,7 +4697,7 @@ dependencies = [ "log", "md-5", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "reqsign", "reqwest", "serde", @@ -4870,9 +4792,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4889,10 +4811,11 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", "ring", @@ -4913,31 +4836,31 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pbjson" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "serde", ] [[package]] name = "pbjson-build" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "prost", "prost-types", ] [[package]] name = "pbjson-types" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" dependencies = [ "bytes", "chrono", @@ -4990,7 +4913,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.12.0", + "indexmap 2.12.1", ] [[package]] @@ -5001,7 +4924,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", ] @@ -5062,7 +4985,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5263,7 +5186,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5286,9 +5209,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -5296,9 +5219,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" dependencies = [ "heck", "itertools 0.14.0", @@ -5310,28 +5233,28 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.108", + "syn 2.0.111", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" dependencies = [ "prost", ] @@ -5383,7 +5306,7 @@ checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5404,9 +5327,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -5424,7 +5347,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.34", + "rustls 0.23.35", "socket2 0.6.1", "thiserror 2.0.17", "tokio", @@ -5444,7 +5367,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -5469,9 +5392,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -5585,7 +5508,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5625,7 +5548,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5665,14 +5588,20 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "regress" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.16.1", "memchr", ] +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" @@ -5734,7 +5663,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.27.7", "hyper-util", "js-sys", @@ -5742,7 +5671,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-pki-types", "serde", @@ -5760,7 +5689,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] @@ -5803,7 +5732,7 @@ checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" dependencies = [ "bytes", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "munge", "ptr_meta 0.3.1", "rancor", @@ -5832,7 +5761,7 @@ checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5847,9 +5776,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" dependencies = [ "const-oid", "digest", @@ -5866,6 +5795,35 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.111", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -5938,9 +5896,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", "once_cell", @@ -5995,9 +5953,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" dependencies = [ "web-time", "zeroize", @@ -6112,9 +6070,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" dependencies = [ "dyn-clone", "ref-cast", @@ -6131,7 +6089,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6256,7 +6214,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6267,7 +6225,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6291,7 +6249,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6312,7 +6270,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6329,17 +6287,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10574371d41b0d9b2cff89418eda27da52bcaff2cc8741db26382a77c29131f1" +checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.0", + "indexmap 2.12.1", "schemars 0.9.0", - "schemars 1.0.4", + "schemars 1.1.0", "serde_core", "serde_json", "serde_with_macros", @@ -6348,14 +6306,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a72d8216842fdd57820dc78d840bef99248e35fb2554ff923319e60f2d686b" +checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6364,7 +6322,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "ryu", "serde", @@ -6410,9 +6368,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] @@ -6552,9 +6510,9 @@ dependencies = [ [[package]] name = "sonic-simd" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b421f7b6aa4a5de8f685aaf398dfaa828346ee639d2b1c1061ab43d40baa6223" +checksum = "5707edbfb34a40c9f2a55fa09a49101d9fec4e0cc171ce386086bd9616f34257" dependencies = [ "cfg-if", ] @@ -6605,9 +6563,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -6622,7 +6580,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6656,12 +6614,12 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.34", + "rustls 0.23.35", "serde", "serde_json", "sha2", @@ -6684,7 +6642,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6705,7 +6663,7 @@ dependencies = [ "sha2", "sqlx-core", "sqlx-sqlite", - "syn 2.0.108", + "syn 2.0.111", "tokio", "url", ] @@ -6848,32 +6806,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.108", + "strum_macros", ] [[package]] @@ -6885,7 +6824,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6900,9 +6839,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.58.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228" +checksum = "21f1cb6d0bcd097a39fc25f7236236be29881fe122e282e4173d6d007a929927" dependencies = [ "heck", "pbjson", @@ -6918,7 +6857,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.108", + "syn 2.0.111", "typify", "walkdir", ] @@ -6942,9 +6881,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -6968,7 +6907,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7028,7 +6967,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7039,7 +6978,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7051,15 +6990,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -7068,9 +6998,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding 3.0.4", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] @@ -7163,7 +7091,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7182,7 +7110,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.34", + "rustls 0.23.35", "tokio", ] @@ -7199,9 +7127,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -7246,7 +7174,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", "serde_spanned", "toml_datetime 0.6.11", @@ -7260,7 +7188,7 @@ version = "0.23.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "toml_datetime 0.7.3", "toml_parser", "winnow", @@ -7298,9 +7226,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "9cf146f99d442e8e68e585f5d798ccd3cad9a7835b917e09728880a862706456" dependencies = [ "bitflags", "bytes", @@ -7328,9 +7256,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -7340,20 +7268,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -7372,9 +7300,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -7413,7 +7341,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7424,9 +7352,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "typify" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" dependencies = [ "typify-impl", "typify-macro", @@ -7434,9 +7362,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062879d46aa4c9dfe0d33b035bbaf512da192131645d05deacb7033ec8581a09" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" dependencies = [ "heck", "log", @@ -7447,16 +7375,16 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.108", + "syn 2.0.111", "thiserror 2.0.17", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9708a3ceb6660ba3f8d2b8f0567e7d4b8b198e2b94d093b8a6077a751425de9e" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" dependencies = [ "proc-macro2", "quote", @@ -7465,7 +7393,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.108", + "syn 2.0.111", "typify-impl", ] @@ -7477,24 +7405,24 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] [[package]] name = "unicode-properties" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" @@ -7516,9 +7444,9 @@ checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unit-prefix" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" [[package]] name = "unsafe-libyaml" @@ -7564,13 +7492,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -7696,9 +7624,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -7709,9 +7637,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -7722,9 +7650,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7732,22 +7660,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] @@ -7767,9 +7695,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -7791,14 +7719,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -7843,7 +7771,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7854,7 +7782,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -8114,9 +8042,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] @@ -8148,12 +8076,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xxhash-rust" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" - [[package]] name = "xz2" version = "0.1.7" @@ -8188,28 +8110,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -8229,7 +8151,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] @@ -8269,7 +8191,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0512155534..e926d26816 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,20 +36,23 @@ version = "0.7.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" # Check the MSRV policy in README.md before changing this -rust-version = "1.87" +rust-version = "1.88" + +[workspace.lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(FALSE)'] } [workspace.dependencies] anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" -arrow-arith = "56.2" -arrow-array = "56.2" -arrow-buffer = "56.2" -arrow-cast = "56.2" -arrow-ord = "56.2" -arrow-schema = "56.2" -arrow-select = "56.2" -arrow-string = "56.2" +arrow-arith = "57.0" +arrow-array = "57.0" +arrow-buffer = "57.0" +arrow-cast = "57.0" +arrow-ord = "57.0" +arrow-schema = "57.0" +arrow-select = "57.0" +arrow-string = "57.0" as-any = "0.3.2" async-trait = "0.1.89" aws-config = "1.8.7" @@ -62,9 +65,9 @@ bytes = "1.10" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } ctor = "0.2.8" -datafusion = "50" -datafusion-cli = "50" -datafusion-sqllogictest = "50" +datafusion = "51.0" +datafusion-cli = "51.0" +datafusion-sqllogictest = "51.0" derive_builder = "0.20" dirs = "6" enum-ordinalize = "4.3.0" @@ -101,7 +104,7 @@ num-bigint = "0.4.6" once_cell = "1.20" opendal = "0.55.0" ordered-float = "4" -parquet = "56.2" +parquet = "57.0" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 814c9afb35..a7244cc8dd 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -180,9 +180,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" dependencies = [ "arrow-arith", "arrow-array", @@ -202,23 +202,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -228,25 +228,28 @@ dependencies = [ "chrono-tz", "half", "hashbrown 0.16.0", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,15 +262,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,21 +283,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" dependencies = [ "arrow-array", "arrow-buffer", @@ -308,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" dependencies = [ "arrow-array", "arrow-buffer", @@ -320,19 +324,21 @@ dependencies = [ "chrono", "half", "indexmap 2.12.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +349,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515" dependencies = [ "arrow-array", "arrow-data", @@ -355,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,34 +374,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ "bitflags", - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,7 +409,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -1029,11 +1035,9 @@ dependencies = [ [[package]] name = "datafusion" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1044,6 +1048,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -1072,6 +1077,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1084,8 +1090,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1098,7 +1103,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1110,8 +1114,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1121,10 +1124,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1133,13 +1137,11 @@ dependencies = [ [[package]] name = "datafusion-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", - "base64", "chrono", "half", "hashbrown 0.14.5", @@ -1158,8 +1160,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "futures", "log", @@ -1169,8 +1170,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-compression", @@ -1193,9 +1193,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1203,22 +1201,42 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "50.3.0" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1231,48 +1249,41 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -1282,21 +1293,18 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" [[package]] name = "datafusion-execution" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1315,8 +1323,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1328,6 +1335,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.12.0", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -1337,8 +1345,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1350,8 +1357,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "abi_stable", "arrow", @@ -1359,6 +1365,7 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-common", "datafusion-functions-aggregate-common", "datafusion-proto", "datafusion-proto-common", @@ -1372,8 +1379,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "arrow-buffer", @@ -1391,6 +1397,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -1401,8 +1408,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1422,8 +1428,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1435,8 +1440,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "arrow-ord", @@ -1444,6 +1448,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -1457,8 +1462,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1473,8 +1477,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1491,8 +1494,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1501,10 +1503,9 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", "syn 2.0.108", ] @@ -1512,8 +1513,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "chrono", @@ -1532,8 +1532,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1546,7 +1545,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.12.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph", @@ -1555,8 +1553,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1570,8 +1567,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1584,8 +1580,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1597,15 +1592,13 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1635,14 +1628,24 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "chrono", - "datafusion", + "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto-common", "object_store", "prost", @@ -1651,8 +1654,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1662,11 +1664,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -1680,35 +1680,24 @@ dependencies = [ [[package]] name = "datafusion-session" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", "indexmap 2.12.0", @@ -1993,6 +1982,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2784,20 +2779,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -2833,28 +2814,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -3001,9 +2960,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -3022,11 +2981,11 @@ dependencies = [ "half", "hashbrown 0.16.0", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -3156,9 +3115,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -3166,9 +3125,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", @@ -3221,9 +3180,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "indoc", "libc", @@ -3238,19 +3197,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -3258,9 +3216,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3270,9 +3228,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", @@ -3526,6 +3484,12 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" @@ -3667,6 +3631,35 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.108", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -4008,9 +4001,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 953d4a98fc..499dd4c72c 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -31,11 +31,11 @@ license = "Apache-2.0" crate-type = ["cdylib"] [dependencies] -arrow = { version = "56", features = ["pyarrow", "chrono-tz"] } +arrow = { version = "57.0", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } -pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] } +pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } -datafusion-ffi = { version = "50" } +datafusion-ffi = { version = "51.0" } tokio = { version = "1.46.1", default-features = false } [profile.release] diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index 24e9f061dd..c159d573fc 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -24,46 +24,46 @@ use pyo3::prelude::*; use crate::error::to_py_err; #[pyfunction] -pub fn identity(py: Python, array: PyObject) -> PyResult { +pub fn identity(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Identity) } #[pyfunction] -pub fn void(py: Python, array: PyObject) -> PyResult { +pub fn void(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Void) } #[pyfunction] -pub fn year(py: Python, array: PyObject) -> PyResult { +pub fn year(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Year) } #[pyfunction] -pub fn month(py: Python, array: PyObject) -> PyResult { +pub fn month(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Month) } #[pyfunction] -pub fn day(py: Python, array: PyObject) -> PyResult { +pub fn day(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Day) } #[pyfunction] -pub fn hour(py: Python, array: PyObject) -> PyResult { +pub fn hour(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Hour) } #[pyfunction] -pub fn bucket(py: Python, array: PyObject, num_buckets: u32) -> PyResult { +pub fn bucket(py: Python, array: Py, num_buckets: u32) -> PyResult> { apply(py, array, Transform::Bucket(num_buckets)) } #[pyfunction] -pub fn truncate(py: Python, array: PyObject, width: u32) -> PyResult { +pub fn truncate(py: Python, array: Py, width: u32) -> PyResult> { apply(py, array, Transform::Truncate(width)) } -fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult { +fn apply(py: Python, array: Py, transform: Transform) -> PyResult> { // import let array = ArrayData::from_pyarrow_bound(array.bind(py))?; let array = make_array(array); @@ -71,7 +71,7 @@ fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult, m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index dce287ed6e..37a7996f80 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -151,33 +151,33 @@ impl GlueCatalog { async fn new(config: GlueCatalogConfig) -> Result { let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await; let mut file_io_props = config.props.clone(); - if !file_io_props.contains_key(S3_ACCESS_KEY_ID) { - if let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) { - file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); - } + if !file_io_props.contains_key(S3_ACCESS_KEY_ID) + && let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) + { + file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); } - if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) { - if let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) { - file_io_props.insert( - S3_SECRET_ACCESS_KEY.to_string(), - secret_access_key.to_string(), - ); - } + if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) + && let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) + { + file_io_props.insert( + S3_SECRET_ACCESS_KEY.to_string(), + secret_access_key.to_string(), + ); } - if !file_io_props.contains_key(S3_REGION) { - if let Some(region) = file_io_props.get(AWS_REGION_NAME) { - file_io_props.insert(S3_REGION.to_string(), region.to_string()); - } + if !file_io_props.contains_key(S3_REGION) + && let Some(region) = file_io_props.get(AWS_REGION_NAME) + { + file_io_props.insert(S3_REGION.to_string(), region.to_string()); } - if !file_io_props.contains_key(S3_SESSION_TOKEN) { - if let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) { - file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); - } + if !file_io_props.contains_key(S3_SESSION_TOKEN) + && let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) + { + file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); } - if !file_io_props.contains_key(S3_ENDPOINT) { - if let Some(aws_endpoint) = config.uri.as_ref() { - file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); - } + if !file_io_props.contains_key(S3_ENDPOINT) + && let Some(aws_endpoint) = config.uri.as_ref() + { + file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); } let client = aws_sdk_glue::Client::new(&sdk_config); diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 895a5cf5e4..8a775a07b7 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -28,6 +28,9 @@ keywords = ["iceberg"] license = { workspace = true } repository = { workspace = true } +[lints] +workspace = true + [features] default = ["storage-memory", "storage-fs", "storage-s3", "tokio"] storage-all = ["storage-memory", "storage-fs", "storage-s3", "storage-gcs"] @@ -87,7 +90,6 @@ serde_repr = { workspace = true } serde_with = { workspace = true } smol = { workspace = true, optional = true } strum = { workspace = true, features = ["derive"] } -thrift = { workspace = true } tokio = { workspace = true, optional = false, features = ["sync"] } typed-builder = { workspace = true } url = { workspace = true } diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index ab5a96f751..af0ae21762 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -485,10 +485,10 @@ impl ArrowReader { // we need to call next() to update the cache with the newly positioned value. delete_vector_iter.advance_to(next_row_group_base_idx); // Only update the cache if the cached value is stale (in the skipped range) - if let Some(cached_idx) = next_deleted_row_idx_opt { - if cached_idx < next_row_group_base_idx { - next_deleted_row_idx_opt = delete_vector_iter.next(); - } + if let Some(cached_idx) = next_deleted_row_idx_opt + && cached_idx < next_row_group_base_idx + { + next_deleted_row_idx_opt = delete_vector_iter.next(); } // still increment the current page base index but then skip to the next row group @@ -842,10 +842,10 @@ impl ArrowReader { }; // If all row groups were filtered out, return an empty RowSelection (select no rows) - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups.is_empty() { - return Ok(RowSelection::from(Vec::new())); - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups.is_empty() + { + return Ok(RowSelection::from(Vec::new())); } let mut selected_row_groups_idx = 0; @@ -878,10 +878,10 @@ impl ArrowReader { results.push(selections_for_page); - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups_idx == selected_row_groups.len() { - break; - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups_idx == selected_row_groups.len() + { + break; } } @@ -1012,14 +1012,13 @@ fn apply_name_mapping_to_arrow_schema( let mut metadata = field.metadata().clone(); - if let Some(mapped_field) = mapped_field_opt { - if let Some(field_id) = mapped_field.field_id() { - // Field found in mapping with a field_id → assign it - metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); - } - // If field_id is None, leave the field without an ID (will be filtered by projection) + if let Some(mapped_field) = mapped_field_opt + && let Some(field_id) = mapped_field.field_id() + { + // Field found in mapping with a field_id → assign it + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); } - // If field not found in mapping, leave it without an ID (will be filtered by projection) + // If field_id is None, leave the field without an ID (will be filtered by projection) Field::new(field.name(), field.data_type().clone(), field.is_nullable()) .with_metadata(metadata) @@ -1896,7 +1895,7 @@ message schema { assert_eq!(err.kind(), ErrorKind::DataInvalid); assert_eq!( err.to_string(), - "DataInvalid => Unsupported Arrow data type: Duration(Microsecond)".to_string() + "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string() ); // Omitting field c2, we still get an error due to c3 being selected @@ -2122,7 +2121,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap(); @@ -2303,7 +2302,7 @@ message schema { let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().to_str().unwrap().to_string(); - let file_path = format!("{}/multi_row_group.parquet", &table_location); + let file_path = format!("{table_location}/multi_row_group.parquet"); // Force each batch into its own row group for testing byte range filtering. let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( @@ -2507,7 +2506,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/old_file.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/old_file.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -2613,7 +2612,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2647,7 +2646,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2712,15 +2711,14 @@ message schema { // Step 4: Verify we got 199 rows (not 200) let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read: {}", total_rows); + println!("Total rows read: {total_rows}"); println!("Expected: 199 rows (deleted row 199 which had id=200)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 199, - "Expected 199 rows after deleting row 199, but got {} rows. \ - The bug causes position deletes in later row groups to be ignored.", - total_rows + "Expected 199 rows after deleting row 199, but got {total_rows} rows. \ + The bug causes position deletes in later row groups to be ignored." ); // Verify the deleted row (id=200) is not present @@ -2807,7 +2805,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2841,7 +2839,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2931,16 +2929,15 @@ message schema { // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read from row group 1: {}", total_rows); + println!("Total rows read from row group 1: {total_rows}"); println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 99, - "Expected 99 rows from row group 1 after deleting position 199, but got {} rows. \ + "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \ The bug causes position deletes to be lost when advance_to() is followed by next() \ - when skipping unselected row groups.", - total_rows + when skipping unselected row groups." ); // Verify the deleted row (id=200) is not present @@ -3029,7 +3026,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -3063,7 +3060,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3209,7 +3206,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3222,7 +3219,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3306,7 +3303,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3319,7 +3316,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 3], @@ -3392,7 +3389,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3405,7 +3402,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3480,7 +3477,7 @@ message schema { .set_max_row_group_size(2) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); // Write 6 rows in 3 batches (will create 3 row groups) @@ -3505,7 +3502,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3546,7 +3543,7 @@ message schema { assert_eq!(all_values.len(), 6); for i in 0..6 { - assert_eq!(all_names[i], format!("name_{}", i)); + assert_eq!(all_names[i], format!("name_{i}")); assert_eq!(all_values[i], i as i32); } } @@ -3621,7 +3618,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3634,7 +3631,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3718,7 +3715,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3730,7 +3727,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 5, 2], @@ -3820,7 +3817,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3839,7 +3836,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3978,7 +3975,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/data.parquet", table_location), + data_file_path: format!("{table_location}/data.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], diff --git a/crates/iceberg/src/arrow/record_batch_projector.rs b/crates/iceberg/src/arrow/record_batch_projector.rs index 45de0212e8..7028eee961 100644 --- a/crates/iceberg/src/arrow/record_batch_projector.rs +++ b/crates/iceberg/src/arrow/record_batch_projector.rs @@ -133,25 +133,24 @@ impl RecordBatchProjector { { for (pos, field) in fields.iter().enumerate() { let id = field_id_fetch_func(field)?; - if let Some(id) = id { - if target_field_id == id { - index_vec.push(pos); - return Ok(Some(field.clone())); - } + if let Some(id) = id + && target_field_id == id + { + index_vec.push(pos); + return Ok(Some(field.clone())); } - if let DataType::Struct(inner) = field.data_type() { - if searchable_field_func(field) { - if let Some(res) = Self::fetch_field_index( - inner, - index_vec, - target_field_id, - field_id_fetch_func, - searchable_field_func, - )? { - index_vec.push(pos); - return Ok(Some(res)); - } - } + if let DataType::Struct(inner) = field.data_type() + && searchable_field_func(field) + && let Some(res) = Self::fetch_field_index( + inner, + index_vec, + target_field_id, + field_id_fetch_func, + searchable_field_func, + )? + { + index_vec.push(pos); + return Ok(Some(res)); } } Ok(None) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index a20adb6a5a..d79299f765 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -492,7 +492,7 @@ impl RecordBatchTransformer { let this_field_id = field_id_str.parse().map_err(|e| { Error::new( ErrorKind::DataInvalid, - format!("field id not parseable as an i32: {}", e), + format!("field id not parseable as an i32: {e}"), ) })?; @@ -615,7 +615,7 @@ impl RecordBatchTransformer { (dt, _) => { return Err(Error::new( ErrorKind::Unexpected, - format!("unexpected target column type {}", dt), + format!("unexpected target column type {dt}"), )); } }) diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index f1cf225bb4..eb675dff31 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -258,15 +258,15 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { "The partner is not a decimal128 array", ) })?; - if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() { - if *arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" - ), - )); - } + if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() + && (*arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale) + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" + ), + )); } Ok(array.iter().map(|v| v.map(Literal::decimal)).collect()) } @@ -348,10 +348,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } else if let Some(array) = partner.as_any().downcast_ref::() { Ok(array.iter().map(|v| v.map(Literal::string)).collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a string array", - )); + )) } } PrimitiveType::Uuid => { @@ -415,10 +415,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { .map(|v| v.map(|v| Literal::binary(v.to_vec()))) .collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a binary array", - )); + )) } } } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 27d5edaedb..f3a521379e 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -1000,13 +1000,13 @@ mod _serde_set_statistics { snapshot_id, statistics, } = SetStatistics::deserialize(deserializer)?; - if let Some(snapshot_id) = snapshot_id { - if snapshot_id != statistics.snapshot_id { - return Err(serde::de::Error::custom(format!( - "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", - statistics.snapshot_id - ))); - } + if let Some(snapshot_id) = snapshot_id + && snapshot_id != statistics.snapshot_id + { + return Err(serde::de::Error::custom(format!( + "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", + statistics.snapshot_id + ))); } Ok(statistics) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index f382bf079e..df8a10193c 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -36,7 +36,7 @@ impl DeleteVector { } } - pub fn iter(&self) -> DeleteVectorIterator { + pub fn iter(&self) -> DeleteVectorIterator<'_> { let outer = self.inner.bitmaps(); DeleteVectorIterator { outer, inner: None } } @@ -93,10 +93,10 @@ impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option { - if let Some(inner) = &mut self.inner { - if let Some(inner_next) = inner.bitmap_iter.next() { - return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); - } + if let Some(inner) = &mut self.inner + && let Some(inner_next) = inner.bitmap_iter.next() + { + return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } if let Some((high_bits, next_bitmap)) = self.outer.next() { diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index abbd136cb1..770163ae95 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -161,10 +161,10 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { _predicate: &BoundPredicate, ) -> crate::Result { let field = self.field_summary_for_reference(reference); - if let Some(contains_nan) = field.contains_nan { - if !contains_nan { - return ROWS_CANNOT_MATCH; - } + if let Some(contains_nan) = field.contains_nan + && !contains_nan + { + return ROWS_CANNOT_MATCH; } if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) { @@ -389,16 +389,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { return ROWS_MIGHT_MATCH; } - if prefix.as_bytes().eq(&lower_bound[..prefix_len]) { - if let Some(upper_bound) = &field.upper_bound { - // if upper is shorter than the prefix then upper can't start with the prefix - if prefix_len > upper_bound.len() { - return ROWS_MIGHT_MATCH; - } + if prefix.as_bytes().eq(&lower_bound[..prefix_len]) + && let Some(upper_bound) = &field.upper_bound + { + // if upper is shorter than the prefix then upper can't start with the prefix + if prefix_len > upper_bound.len() { + return ROWS_MIGHT_MATCH; + } - if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { - return ROWS_CANNOT_MATCH; - } + if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { + return ROWS_CANNOT_MATCH; } } } diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 3745d94d18..66e2898532 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -23,7 +23,7 @@ use fnv::FnvHashSet; use ordered_float::OrderedFloat; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::metadata::RowGroupMetaData; -use parquet::file::page_index::index::Index; +use parquet::file::page_index::column_index::ColumnIndexMetaData; use parquet::file::page_index::offset_index::OffsetIndexMetaData; use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; @@ -59,7 +59,7 @@ impl PageNullCount { } pub(crate) struct PageIndexEvaluator<'a> { - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, iceberg_field_id_to_parquet_column_index: &'a HashMap, @@ -69,7 +69,7 @@ pub(crate) struct PageIndexEvaluator<'a> { impl<'a> PageIndexEvaluator<'a> { pub(crate) fn new( - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -92,7 +92,7 @@ impl<'a> PageIndexEvaluator<'a> { /// matching the filter predicate. pub(crate) fn eval( filter: &'a BoundPredicate, - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -240,137 +240,135 @@ impl<'a> PageIndexEvaluator<'a> { fn apply_predicate_to_column_index( predicate: F, field_type: &PrimitiveType, - column_index: &Index, + column_index: &ColumnIndexMetaData, row_counts: &[usize], ) -> Result>> where F: Fn(Option, Option, PageNullCount) -> Result, { let result: Result> = match column_index { - Index::NONE => { + ColumnIndexMetaData::NONE => { return Ok(None); } - Index::BOOLEAN(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BOOLEAN(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - item.max.map(|val| { + max.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT32(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT32(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT64(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT64(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FLOAT(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::FLOAT(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::DOUBLE(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::DOUBLE(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::BYTE_ARRAY(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.clone().map(|val| { + min.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - item.max.clone().map(|val| { + max.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FIXED_LEN_BYTE_ARRAY(_) => { + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'FIXED_LEN_BYTE_ARRAY' index type in column_index", )); } - Index::INT96(_) => { + ColumnIndexMetaData::INT96(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'INT96' index type in column_index", @@ -547,16 +545,16 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> { return Ok(false); } - if let Some(min) = min { - if min.gt(datum) { - return Ok(false); - } + if let Some(min) = min + && min.gt(datum) + { + return Ok(false); } - if let Some(max) = max { - if max.lt(datum) { - return Ok(false); - } + if let Some(max) = max + && max.lt(datum) + { + return Ok(false); } Ok(true) @@ -787,28 +785,164 @@ mod tests { use std::collections::HashMap; use std::sync::Arc; - use parquet::arrow::arrow_reader::RowSelector; - use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType}; - use parquet::data_type::ByteArray; - use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData}; - use parquet::file::page_index::index::{Index, NativeIndex, PageIndex}; - use parquet::file::page_index::offset_index::OffsetIndexMetaData; - use parquet::file::statistics::Statistics; - use parquet::format::{BoundaryOrder, PageLocation}; - use parquet::schema::types::{ - ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, + use arrow_array::{ArrayRef, Float32Array, RecordBatch, StringArray}; + use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use parquet::arrow::ArrowWriter; + use parquet::arrow::arrow_reader::{ + ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelector, }; + use parquet::file::metadata::ParquetMetaData; + use parquet::file::properties::WriterProperties; use rand::{Rng, thread_rng}; + use tempfile::NamedTempFile; use super::PageIndexEvaluator; use crate::expr::{Bind, Reference}; use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type}; use crate::{ErrorKind, Result}; + /// Helper function to create a test parquet file with page indexes + /// and return the metadata needed for testing + fn create_test_parquet_file() -> Result<(Arc, NamedTempFile)> { + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let props = WriterProperties::builder() + .set_data_page_row_count_limit(1024) + .set_write_batch_size(512) + .build(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); + + let mut batches = vec![]; + + // Batch 1: 1024 rows - strings with AARDVARK, BEAR, BISON + let float_vals: Vec> = vec![None; 1024]; + let mut string_vals = vec![]; + string_vals.push(Some("AARDVARK".to_string())); + for _ in 1..1023 { + string_vals.push(Some("BEAR".to_string())); + } + string_vals.push(Some("BISON".to_string())); + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 2: 1024 rows - all DEER + let float_vals: Vec> = vec![None; 1024]; + let string_vals = vec![Some("DEER".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 3: 1024 rows - float 0-10 + let mut float_vals = vec![]; + for i in 0..1024 { + float_vals.push(Some(i as f32 * 10.0 / 1024.0)); + } + let mut string_vals = vec![]; + string_vals.push(Some("GIRAFFE".to_string())); + string_vals.push(None); + for _ in 2..1024 { + string_vals.push(Some("HIPPO".to_string())); + } + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 4: 1024 rows - float 10-20 + let mut float_vals = vec![None]; + for i in 1..1024 { + float_vals.push(Some(10.0 + i as f32 * 10.0 / 1024.0)); + } + let string_vals = vec![Some("HIPPO".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Write rows one at a time to give the writer a chance to split into pages + for batch in &batches { + for i in 0..batch.num_rows() { + writer.write(&batch.slice(i, 1)).unwrap(); + } + } + + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata().clone(); + + Ok((metadata, temp_file)) + } + + /// Get the test metadata components for testing + fn get_test_metadata( + metadata: &ParquetMetaData, + ) -> ( + Vec, + Vec, + &parquet::file::metadata::RowGroupMetaData, + ) { + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + (column_index, offset_index, row_group_metadata) + } + #[test] fn eval_matches_no_rows_for_empty_row_group() -> Result<()> { - let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?; - let (column_index, offset_index) = create_page_index()?; + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let empty_float: ArrayRef = Arc::new(Float32Array::from(Vec::>::new())); + let empty_string: ArrayRef = Arc::new(StringArray::from(Vec::>::new())); + let empty_batch = + RecordBatch::try_new(arrow_schema.clone(), vec![empty_float, empty_string]).unwrap(); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema, None).unwrap(); + writer.write(&empty_batch).unwrap(); + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata(); + + if metadata.num_row_groups() == 0 || metadata.row_group(0).num_rows() == 0 { + return Ok(()); + } let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; @@ -816,27 +950,28 @@ mod tests { .greater_than(Datum::float(1.0)) .bind(iceberg_schema_ref.clone(), false)?; + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![]; - - assert_eq!(result, expected); + assert_eq!(result.len(), 0); Ok(()) } #[test] fn eval_is_null_select_only_pages_with_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -847,15 +982,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::select(1024), - RowSelector::skip(1024), RowSelector::select(2048), + RowSelector::skip(1024), + RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -865,9 +1000,8 @@ mod tests { #[test] fn eval_is_not_null_dont_select_pages_with_all_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -878,12 +1012,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -892,9 +1026,8 @@ mod tests { #[test] fn eval_is_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -905,7 +1038,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -919,9 +1052,8 @@ mod tests { #[test] fn eval_not_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -932,7 +1064,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -946,9 +1078,8 @@ mod tests { #[test] fn eval_inequality_nan_datum_all_rows_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -959,12 +1090,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -973,9 +1104,8 @@ mod tests { #[test] fn eval_inequality_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -986,16 +1116,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1005,9 +1134,8 @@ mod tests { #[test] fn eval_eq_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1018,16 +1146,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Pages 0-1: all null (skip) + // Page 2: 0-10 (select, might contain 5.0) + // Page 3: 10-20 (skip, min > 5.0) let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1037,9 +1167,8 @@ mod tests { #[test] fn eval_not_eq_all_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1050,7 +1179,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1064,9 +1193,8 @@ mod tests { #[test] fn eval_starts_with_error_float_col() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1077,7 +1205,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), ); @@ -1089,11 +1217,13 @@ mod tests { #[test] fn eval_starts_with_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test starts_with on string column where only some pages match + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing starts_with("B") should select only page 0 let filter = Reference::new("col_string") .starts_with(Datum::string("B")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1102,16 +1232,13 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![ - RowSelector::select(512), - RowSelector::skip(3536), - RowSelector::select(48), - ]; + // Page 0 has "BEAR" and "BISON" (starts with B), rest don't + let expected = vec![RowSelector::select(1024), RowSelector::skip(3072)]; assert_eq!(result, expected); @@ -1121,11 +1248,13 @@ mod tests { #[test] fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test not_starts_with where one page has ALL values starting with prefix + // Our file has page 1 with all "DEER" (min="DEER", max="DEER") + // Testing not_starts_with("DE") should skip page 1 where all values start with "DE" let filter = Reference::new("col_string") .not_starts_with(Datum::string("DE")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1134,15 +1263,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0: mixed values (select) + // Page 1: all "DEER" starting with "DE" (skip) + // Pages 2-3: other values not all starting with "DE" (select) let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(3072), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(2048), ]; assert_eq!(result, expected); @@ -1153,10 +1285,8 @@ mod tests { #[test] fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> { let mut rng = thread_rng(); - - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1167,7 +1297,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1181,30 +1311,32 @@ mod tests { #[test] fn eval_in_valid_set_size_some_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test is_in with multiple values using min/max bounds + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing is_in(["AARDVARK", "GIRAFFE"]) - both are in different pages let filter = Reference::new("col_string") - .is_in([Datum::string("AARDVARK"), Datum::string("ICEBERG")]) + .is_in([Datum::string("AARDVARK"), Datum::string("GIRAFFE")]) .bind(iceberg_schema_ref.clone(), false)?; let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0 contains "AARDVARK", page 1 doesn't contain either, page 2 contains "GIRAFFE", page 3 doesn't let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(2976), - RowSelector::skip(48), - RowSelector::select(48), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(1024), + RowSelector::skip(1024), ]; assert_eq!(result, expected); @@ -1235,183 +1367,4 @@ mod tests { Ok((iceberg_schema_ref, field_id_map)) } - - fn build_parquet_schema_descriptor() -> Result> { - let field_1 = Arc::new( - parquetSchemaType::primitive_type_builder("col_float", ParquetPhysicalType::FLOAT) - .with_id(Some(1)) - .build()?, - ); - - let field_2 = Arc::new( - parquetSchemaType::primitive_type_builder( - "col_string", - ParquetPhysicalType::BYTE_ARRAY, - ) - .with_id(Some(2)) - .with_logical_type(Some(ParquetLogicalType::String)) - .build()?, - ); - - let group_type = Arc::new( - parquetSchemaType::group_type_builder("all") - .with_id(Some(1000)) - .with_fields(vec![field_1, field_2]) - .build()?, - ); - - let schema_descriptor = SchemaDescriptor::new(group_type); - let schema_descriptor_arc = Arc::new(schema_descriptor); - Ok(schema_descriptor_arc) - } - - fn create_row_group_metadata( - num_rows: i64, - col_1_num_vals: i64, - col_1_stats: Option, - col_2_num_vals: i64, - col_2_stats: Option, - ) -> Result { - let schema_descriptor_arc = build_parquet_schema_descriptor()?; - - let column_1_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(0).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_float".to_string()]), - )); - - let column_2_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(1).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_string".to_string()]), - )); - - let mut col_1_meta = - ColumnChunkMetaData::builder(column_1_desc_ptr).set_num_values(col_1_num_vals); - if let Some(stats1) = col_1_stats { - col_1_meta = col_1_meta.set_statistics(stats1) - } - - let mut col_2_meta = - ColumnChunkMetaData::builder(column_2_desc_ptr).set_num_values(col_2_num_vals); - if let Some(stats2) = col_2_stats { - col_2_meta = col_2_meta.set_statistics(stats2) - } - - let row_group_metadata = RowGroupMetaData::builder(schema_descriptor_arc) - .set_num_rows(num_rows) - .set_column_metadata(vec![ - col_1_meta.build()?, - // .set_statistics(Statistics::float(None, None, None, 1, false)) - col_2_meta.build()?, - ]) - .build(); - - Ok(row_group_metadata?) - } - - fn create_page_index() -> Result<(Vec, Vec)> { - let idx_float = Index::FLOAT(NativeIndex:: { - indexes: vec![ - PageIndex { - min: None, - max: None, - null_count: Some(1024), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(0.0), - max: Some(10.0), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(10.0), - max: Some(20.0), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let idx_string = Index::BYTE_ARRAY(NativeIndex:: { - indexes: vec![ - PageIndex { - min: Some("AA".into()), - max: Some("DD".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DE".into()), - max: Some("DE".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DF".into()), - max: Some("UJ".into()), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: Some(48), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let page_locs_float = vec![ - PageLocation::new(0, 1024, 0), - PageLocation::new(1024, 1024, 1024), - PageLocation::new(2048, 1024, 2048), - PageLocation::new(3072, 1024, 3072), - ]; - - let page_locs_string = vec![ - PageLocation::new(0, 512, 0), - PageLocation::new(512, 512, 512), - PageLocation::new(1024, 2976, 1024), - PageLocation::new(4000, 48, 4000), - PageLocation::new(4048, 48, 4048), - ]; - - Ok((vec![idx_float, idx_string], vec![ - OffsetIndexMetaData { - page_locations: page_locs_float, - unencoded_byte_array_data_bytes: None, - }, - OffsetIndexMetaData { - page_locations: page_locs_string, - unencoded_byte_array_data_bytes: None, - }, - ])) - } } diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index e9bed775ef..7c652e2068 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -129,10 +129,10 @@ impl<'a> StrictMetricsEvaluator<'a> { self.upper_bound(field_id) }; - if let Some(bound) = bound { - if cmp_fn(bound, datum) { - return ROWS_MUST_MATCH; - } + if let Some(bound) = bound + && cmp_fn(bound, datum) + { + return ROWS_MUST_MATCH; } ROWS_MIGHT_NOT_MATCH @@ -219,10 +219,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(&nan_count) = self.nan_count(field_id) { - if nan_count == 0 { - return ROWS_MUST_MATCH; - } + if let Some(&nan_count) = self.nan_count(field_id) + && nan_count == 0 + { + return ROWS_MUST_MATCH; } if self.contains_nulls_only(field_id) { @@ -258,10 +258,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(lower) = self.lower_bound(field_id) { - if lower.is_nan() { - return ROWS_MIGHT_NOT_MATCH; - } + if let Some(lower) = self.lower_bound(field_id) + && lower.is_nan() + { + return ROWS_MIGHT_NOT_MATCH; } self.visit_inequality(reference, datum, PartialOrd::gt, true) diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 60854b8bae..4c30ca2ec5 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -296,18 +296,18 @@ mod tests { check_record_batches( record_batch.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ diff --git a/crates/iceberg/src/inspect/metadata_table.rs b/crates/iceberg/src/inspect/metadata_table.rs index 92571db181..d5e9d60869 100644 --- a/crates/iceberg/src/inspect/metadata_table.rs +++ b/crates/iceberg/src/inspect/metadata_table.rs @@ -71,12 +71,12 @@ impl<'a> MetadataTable<'a> { } /// Get the snapshots table. - pub fn snapshots(&self) -> SnapshotsTable { + pub fn snapshots(&self) -> SnapshotsTable<'_> { SnapshotsTable::new(self.0) } /// Get the manifests table. - pub fn manifests(&self) -> ManifestsTable { + pub fn manifests(&self) -> ManifestsTable<'_> { ManifestsTable::new(self.0) } } diff --git a/crates/iceberg/src/inspect/snapshots.rs b/crates/iceberg/src/inspect/snapshots.rs index 6081ec165b..fbed7ec11e 100644 --- a/crates/iceberg/src/inspect/snapshots.rs +++ b/crates/iceberg/src/inspect/snapshots.rs @@ -151,14 +151,14 @@ mod tests { check_record_batches( batch_stream.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ 2018-01-04T21:22:35.770+00:00, 2019-04-12T20:29:15.770+00:00, diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index d5f2ad8fab..2300c42c01 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -175,7 +175,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, should start with {}", path, prefix), + format!("Invalid gcs url: {path}, should start with {prefix}"), )) } } @@ -190,7 +190,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, should start with {}", path, prefix), + format!("Invalid oss url: {path}, should start with {prefix}"), )) } } diff --git a/crates/iceberg/src/io/storage_azdls.rs b/crates/iceberg/src/io/storage_azdls.rs index fe12167f6f..5abb0cd6e0 100644 --- a/crates/iceberg/src/io/storage_azdls.rs +++ b/crates/iceberg/src/io/storage_azdls.rs @@ -165,7 +165,7 @@ impl FromStr for AzureStorageScheme { "wasbs" => Ok(AzureStorageScheme::Wasbs), _ => Err(Error::new( ErrorKind::DataInvalid, - format!("Unexpected Azure Storage scheme: {}", s), + format!("Unexpected Azure Storage scheme: {s}"), )), } } @@ -397,11 +397,11 @@ mod tests { let config = azdls_config_parse(properties); match expected { Some(expected_config) => { - assert!(config.is_ok(), "Test case {} failed: {:?}", name, config); - assert_eq!(config.unwrap(), expected_config, "Test case: {}", name); + assert!(config.is_ok(), "Test case {name} failed: {config:?}"); + assert_eq!(config.unwrap(), expected_config, "Test case: {name}"); } None => { - assert!(config.is_err(), "Test case {} expected error.", name); + assert!(config.is_err(), "Test case {name} expected error."); } } } @@ -495,14 +495,14 @@ mod tests { let result = azdls_create_operator(input.0, &input.1, &input.2); match expected { Some((expected_filesystem, expected_path)) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); let (op, relative_path) = result.unwrap(); assert_eq!(op.info().name(), expected_filesystem); assert_eq!(relative_path, expected_path); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -543,11 +543,11 @@ mod tests { let result = input.parse::(); match expected { Some(expected_path) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); - assert_eq!(result.unwrap(), expected_path, "Test case: {}", name); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); + assert_eq!(result.unwrap(), expected_path, "Test case: {name}"); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -593,7 +593,7 @@ mod tests { for (name, path, expected) in test_cases { let endpoint = path.as_endpoint(); - assert_eq!(endpoint, expected, "Test case: {}", name); + assert_eq!(endpoint, expected, "Test case: {name}"); } } } diff --git a/crates/iceberg/src/io/storage_gcs.rs b/crates/iceberg/src/io/storage_gcs.rs index 8c3d914c86..7718df603f 100644 --- a/crates/iceberg/src/io/storage_gcs.rs +++ b/crates/iceberg/src/io/storage_gcs.rs @@ -71,20 +71,20 @@ pub(crate) fn gcs_config_parse(mut m: HashMap) -> Result Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, bucket is required", path), + format!("Invalid gcs url: {path}, bucket is required"), ) })?; diff --git a/crates/iceberg/src/io/storage_oss.rs b/crates/iceberg/src/io/storage_oss.rs index 8bfffc6ca8..e82dda23a5 100644 --- a/crates/iceberg/src/io/storage_oss.rs +++ b/crates/iceberg/src/io/storage_oss.rs @@ -56,7 +56,7 @@ pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, missing bucket", path), + format!("Invalid oss url: {path}, missing bucket"), ) })?; diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index fcf9afed1f..f069e0e2f9 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -134,20 +134,20 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> Result Deserialize<'de> for StructType { let type_val: String = map.next_value()?; if type_val != "struct" { return Err(serde::de::Error::custom(format!( - "expected type 'struct', got '{}'", - type_val + "expected type 'struct', got '{type_val}'" ))); } } diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index ebb0590bcf..389ac7a1fd 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -388,10 +388,10 @@ impl ManifestWriter { self.existing_rows += entry.data_file.record_count; } } - if entry.is_alive() { - if let Some(seq_num) = entry.sequence_number { - self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); - } + if entry.is_alive() + && let Some(seq_num) = entry.sequence_number + { + self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); } self.manifest_entries.push(entry); Ok(()) diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index 5a2f0b50fc..14f1bfd25f 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -110,19 +110,19 @@ impl SchemaVisitor for PruneColumn { if self.select_full_types { Ok(Some(*field.field_type.clone())) } else if field.field_type.is_struct() { - return Ok(Some(Type::Struct(PruneColumn::project_selected_struct( + Ok(Some(Type::Struct(PruneColumn::project_selected_struct( value, - )?))); + )?))) } else if !field.field_type.is_nested() { - return Ok(Some(*field.field_type.clone())); + Ok(Some(*field.field_type.clone())) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "Can't project list or map field directly when not selecting full type." .to_string(), ) .with_context("field_id", field.id.to_string()) - .with_context("field_type", field.field_type.to_string())); + .with_context("field_type", field.field_type.to_string())) } } else { Ok(value) @@ -174,20 +174,20 @@ impl SchemaVisitor for PruneColumn { Ok(Some(Type::List(list.clone()))) } else if list.element_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(value).unwrap(); - return Ok(Some(Type::List(PruneColumn::project_list( + Ok(Some(Type::List(PruneColumn::project_list( list, Type::Struct(projected_struct), - )?))); + )?))) } else if list.element_field.field_type.is_primitive() { - return Ok(Some(Type::List(list.clone()))); + Ok(Some(Type::List(list.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, List element {} of type {} was selected", list.element_field.id, list.element_field.field_type ), - )); + )) } } else if let Some(result) = value { Ok(Some(Type::List(PruneColumn::project_list(list, result)?))) @@ -208,26 +208,26 @@ impl SchemaVisitor for PruneColumn { } else if map.value_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(Some(value.unwrap())).unwrap(); - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, Type::Struct(projected_struct), - )?))); + )?))) } else if map.value_field.field_type.is_primitive() { - return Ok(Some(Type::Map(map.clone()))); + Ok(Some(Type::Map(map.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, Map value {} of type {} was selected", map.value_field.id, map.value_field.field_type ), - )); + )) } } else if let Some(value_result) = value { - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, value_result, - )?))); + )?))) } else if self.selected.contains(&map.key_field.id) { Ok(Some(Type::Map(map.clone()))) } else { diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 06b32cc847..48b715da59 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -390,18 +390,18 @@ impl TableMetadata { } fn construct_refs(&mut self) { - if let Some(current_snapshot_id) = self.current_snapshot_id { - if !self.refs.contains_key(MAIN_BRANCH) { - self.refs - .insert(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id: current_snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }); - } + if let Some(current_snapshot_id) = self.current_snapshot_id + && !self.refs.contains_key(MAIN_BRANCH) + { + self.refs + .insert(MAIN_BRANCH.to_string(), SnapshotReference { + snapshot_id: current_snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }); } } @@ -572,17 +572,17 @@ impl TableMetadata { let main_ref = self.refs.get(MAIN_BRANCH); if self.current_snapshot_id.is_some() { - if let Some(main_ref) = main_ref { - if main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Current snapshot id does not match main branch ({:?} != {:?})", - self.current_snapshot_id.unwrap_or_default(), - main_ref.snapshot_id - ), - )); - } + if let Some(main_ref) = main_ref + && main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Current snapshot id does not match main branch ({:?} != {:?})", + self.current_snapshot_id.unwrap_or_default(), + main_ref.snapshot_id + ), + )); } } else if main_ref.is_some() { return Err(Error::new( @@ -606,22 +606,21 @@ impl TableMetadata { )); } - if self.format_version >= FormatVersion::V2 { - if let Some(snapshot) = self + if self.format_version >= FormatVersion::V2 + && let Some(snapshot) = self .snapshots .values() .find(|snapshot| snapshot.sequence_number() > self.last_sequence_number) - { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", - snapshot.snapshot_id(), - snapshot.sequence_number(), - self.last_sequence_number - ), - )); - } + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", + snapshot.snapshot_id(), + snapshot.sequence_number(), + self.last_sequence_number + ), + )); } Ok(()) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6068716eff..354dc1889c 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -711,10 +711,10 @@ impl Transform { PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), PredicateOperator::StartsWith => match datum.literal() { PrimitiveLiteral::String(s) => { - if let Some(w) = width { - if s.len() == w as usize { - return Some(PredicateOperator::Eq); - }; + if let Some(w) = width + && s.len() == w as usize + { + return Some(PredicateOperator::Eq); }; Some(*op) } @@ -757,47 +757,45 @@ impl Transform { _ => false, }; - if should_adjust { - if let &PrimitiveLiteral::Int(v) = transformed.literal() { - match op { - PredicateOperator::LessThan - | PredicateOperator::LessThanOrEq - | PredicateOperator::In => { - if v < 0 { + if should_adjust && let &PrimitiveLiteral::Int(v) = transformed.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + // # TODO + // An ugly hack to fix. Refine the increment and decrement logic later. + match self { + Transform::Day => { + return Some(AdjustedProjection::Single(Datum::date(v + 1))); + } + _ => { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + } + } + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), // # TODO // An ugly hack to fix. Refine the increment and decrement logic later. - match self { - Transform::Day => { - return Some(AdjustedProjection::Single(Datum::date(v + 1))); + { + match self { + Transform::Day => Datum::date(v + 1), + _ => Datum::int(v + 1), } - _ => { - return Some(AdjustedProjection::Single(Datum::int(v + 1))); - } - } - }; - } - PredicateOperator::Eq => { - if v < 0 { - let new_set = FnvHashSet::from_iter(vec![ - transformed.to_owned(), - // # TODO - // An ugly hack to fix. Refine the increment and decrement logic later. - { - match self { - Transform::Day => Datum::date(v + 1), - _ => Datum::int(v + 1), - } - }, - ]); - return Some(AdjustedProjection::Set(new_set)); - } - } - _ => { - return None; + }, + ]); + return Some(AdjustedProjection::Set(new_set)); } } - }; - } + _ => { + return None; + } + } + }; None } diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 0e99d44dfe..73343a9a1a 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -447,7 +447,7 @@ fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec, expected_type: & let avro_value = Value::Bytes(input_bytes); let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); let result = raw_literal.try_into(expected_type); - assert!(result.is_err(), "Expected error but got: {:?}", result); + assert!(result.is_err(), "Expected error but got: {result:?}"); } #[test] diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 9f542a7c61..38041ca625 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -478,10 +478,10 @@ impl ViewMetadataBuilder { // as it might panic if the metadata is invalid. self.metadata.validate()?; - if let Some(previous) = self.previous_view_version.take() { - if !allow_replace_drop_dialects(&self.metadata.properties) { - require_no_dialect_dropped(&previous, self.metadata.current_version())?; - } + if let Some(previous) = self.previous_view_version.take() + && !allow_replace_drop_dialects(&self.metadata.properties) + { + require_no_dialect_dropped(&previous, self.metadata.current_version())?; } let _expired_versions = self.expire_versions(); diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 4116264a14..8ddaa26698 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -518,7 +518,7 @@ mod test_row_lineage { fn file_with_rows(record_count: u64) -> DataFile { DataFileBuilder::default() .content(DataContentType::Data) - .file_path(format!("test/{}.parquet", record_count)) + .file_path(format!("test/{record_count}.parquet")) .file_format(DataFileFormat::Parquet) .file_size_in_bytes(100) .record_count(record_count) diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index d59828ce31..c8bf26a174 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -276,13 +276,13 @@ impl<'a> SnapshotProducer<'a> { "Partition field should only be primitive type.", ) })?; - if let Some(value) = value { - if !field.compatible(&value.as_primitive_literal().unwrap()) { - return Err(Error::new( - ErrorKind::DataInvalid, - "Partition value is not compatible partition type", - )); - } + if let Some(value) = value + && !field.compatible(&value.as_primitive_literal().unwrap()) + { + return Err(Error::new( + ErrorKind::DataInvalid, + "Partition value is not compatible partition type", + )); } } Ok(()) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 5cf031a9fb..356c2cb43d 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -27,12 +27,9 @@ use itertools::Itertools; use parquet::arrow::AsyncArrowWriter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_writer::AsyncFileWriter as ArrowAsyncFileWriter; -use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; +use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics; -use parquet::format::FileMetaData; -use parquet::thrift::{TCompactOutputProtocol, TSerializable}; -use thrift::protocol::TOutputProtocol; use super::{FileWriter, FileWriterBuilder}; use crate::arrow::{ @@ -349,29 +346,6 @@ impl ParquetWriter { Ok(data_files) } - fn thrift_to_parquet_metadata(&self, file_metadata: FileMetaData) -> Result { - let mut buffer = Vec::new(); - { - let mut protocol = TCompactOutputProtocol::new(&mut buffer); - file_metadata - .write_to_out_protocol(&mut protocol) - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to write parquet metadata") - .with_source(err) - })?; - - protocol.flush().map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to flush protocol").with_source(err) - })?; - } - - let parquet_metadata = ParquetMetaDataReader::decode_metadata(&buffer).map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to decode parquet metadata").with_source(err) - })?; - - Ok(parquet_metadata) - } - /// `ParquetMetadata` to data file builder pub(crate) fn parquet_to_data_file_builder( schema: SchemaRef, @@ -564,14 +538,7 @@ impl FileWriter for ParquetWriter { })?; Ok(vec![]) } else { - let parquet_metadata = - Arc::new(self.thrift_to_parquet_metadata(metadata).map_err(|err| { - Error::new( - ErrorKind::Unexpected, - "Failed to convert metadata from thrift to parquet.", - ) - .with_source(err) - })?); + let parquet_metadata = Arc::new(metadata); Ok(vec![Self::parquet_to_data_file_builder( self.schema, diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 06246ab660..a93e494d48 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -197,18 +197,18 @@ where ); } - if self.should_roll() { - if let Some(inner) = self.inner.take() { - // close the current writer, roll to a new file - self.data_file_builders.extend(inner.close().await?); - - // start a new writer - self.inner = Some( - self.inner_builder - .build(self.new_output_file(partition_key)?) - .await?, - ); - } + if self.should_roll() + && let Some(inner) = self.inner.take() + { + // close the current writer, roll to a new file + self.data_file_builders.extend(inner.close().await?); + + // start a new writer + self.inner = Some( + self.inner_builder + .build(self.new_output_file(partition_key)?) + .await?, + ); } // write the input diff --git a/crates/iceberg/tests/file_io_gcs_test.rs b/crates/iceberg/tests/file_io_gcs_test.rs index 161285ae6f..9fbcdadd0e 100644 --- a/crates/iceberg/tests/file_io_gcs_test.rs +++ b/crates/iceberg/tests/file_io_gcs_test.rs @@ -68,7 +68,7 @@ mod tests { FileIOBuilder::new("gcs") .with_props(vec![ - (GCS_SERVICE_PATH, format!("http://{}", addr)), + (GCS_SERVICE_PATH, format!("http://{addr}")), (GCS_NO_AUTH, "true".to_string()), ]) .build() @@ -81,13 +81,13 @@ mod tests { bucket_data.insert("name", name); let client = reqwest::Client::new(); - let endpoint = format!("http://{}/storage/v1/b", server_addr); + let endpoint = format!("http://{server_addr}/storage/v1/b"); client.post(endpoint).json(&bucket_data).send().await?; Ok(()) } fn get_gs_path() -> String { - format!("gs://{}", FAKE_GCS_BUCKET) + format!("gs://{FAKE_GCS_BUCKET}") } #[tokio::test] diff --git a/crates/integrations/datafusion/src/physical_plan/repartition.rs b/crates/integrations/datafusion/src/physical_plan/repartition.rs index 8ad87fd1cc..2d1d7f862c 100644 --- a/crates/integrations/datafusion/src/physical_plan/repartition.rs +++ b/crates/integrations/datafusion/src/physical_plan/repartition.rs @@ -159,9 +159,8 @@ fn determine_partitioning_strategy( // Case 2: Partitioned table missing _partition column (normally this should not happen) (true, Err(_)) => Err(DataFusionError::Plan(format!( - "Partitioned table input missing {} column. \ - Ensure projection happens before repartitioning.", - PROJECTED_PARTITION_VALUE_COLUMN + "Partitioned table input missing {PROJECTED_PARTITION_VALUE_COLUMN} column. \ + Ensure projection happens before repartitioning." ))), // Case 3: Unpartitioned table, always use RoundRobinBatch @@ -508,8 +507,7 @@ mod tests { assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column, got: {:?}", - column_names + "Should use _partition column, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning with Identity transform"), @@ -733,8 +731,7 @@ mod tests { .collect(); assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column for mixed transforms with Identity, got: {:?}", - column_names + "Should use _partition column for mixed transforms with Identity, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning for table with identity transforms"), diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs index 2a57e16e43..ede2547535 100644 --- a/crates/integrations/datafusion/src/physical_plan/sort.rs +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -53,8 +53,7 @@ pub(crate) fn sort_by_partition(input: Arc) -> DFResult Result<()> { check_record_batches( snapshots, expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ ], snapshot_id: PrimitiveArray @@ -386,18 +386,18 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( manifests, expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ @@ -504,8 +504,8 @@ async fn test_insert_into() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "foo1", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "foo2", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }"#]], + Field { "foo1": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "foo2": Utf8, metadata: {"PARQUET:field_id": "2"} }"#]], expect![[r#" foo1: PrimitiveArray [ @@ -658,9 +658,9 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "profile", data_type: Struct([Field { name: "address", data_type: Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, Field { name: "contact", data_type: Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "profile": nullable Struct("address": Struct("street": non-null Utf8, metadata: {"PARQUET:field_id": "6"}, "city": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": non-null Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -679,7 +679,7 @@ async fn test_insert_into_nested() -> Result<()> { valid, ] [ - -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }])) + -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, metadata: {"PARQUET:field_id": "8"} }])) StructArray -- validity: [ @@ -706,7 +706,7 @@ async fn test_insert_into_nested() -> Result<()> { 95113, ] ] - -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }])) + -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "10"} }])) StructArray -- validity: [ @@ -757,13 +757,13 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][street]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][city]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][zip]", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][email]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][phone]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][street]": nullable Utf8, metadata: {"PARQUET:field_id": "6"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][city]": nullable Utf8, metadata: {"PARQUET:field_id": "7"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][zip]": nullable Int32, metadata: {"PARQUET:field_id": "8"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][email]": nullable Utf8, metadata: {"PARQUET:field_id": "9"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][phone]": nullable Utf8, metadata: {"PARQUET:field_id": "10"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -884,9 +884,9 @@ async fn test_insert_into_partitioned() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "category", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "category": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "value": Utf8, metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -923,25 +923,22 @@ async fn test_insert_into_partitioned() -> Result<()> { let file_io = table.file_io(); // List files under each expected partition path - let electronics_path = format!("{}/data/category=electronics", table_location); - let books_path = format!("{}/data/category=books", table_location); - let clothing_path = format!("{}/data/category=clothing", table_location); + let electronics_path = format!("{table_location}/data/category=electronics"); + let books_path = format!("{table_location}/data/category=books"); + let clothing_path = format!("{table_location}/data/category=clothing"); // Verify partition directories exist and contain data files assert!( file_io.exists(&electronics_path).await?, - "Expected partition directory: {}", - electronics_path + "Expected partition directory: {electronics_path}" ); assert!( file_io.exists(&books_path).await?, - "Expected partition directory: {}", - books_path + "Expected partition directory: {books_path}" ); assert!( file_io.exists(&clothing_path).await?, - "Expected partition directory: {}", - clothing_path + "Expected partition directory: {clothing_path}" ); Ok(()) diff --git a/crates/integrations/playground/src/main.rs b/crates/integrations/playground/src/main.rs index c522209957..94068bb558 100644 --- a/crates/integrations/playground/src/main.rs +++ b/crates/integrations/playground/src/main.rs @@ -24,6 +24,7 @@ use clap::Parser; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_cli::exec; +use datafusion_cli::object_storage::instrumented::InstrumentedObjectStoreRegistry; use datafusion_cli::print_format::PrintFormat; use datafusion_cli::print_options::{MaxRows, PrintOptions}; use iceberg_playground::{ICEBERG_PLAYGROUND_VERSION, IcebergCatalogList}; @@ -94,6 +95,7 @@ async fn main_inner() -> anyhow::Result<()> { quiet: args.quiet, maxrows: args.maxrows, color: args.color, + instrumented_registry: Arc::new(InstrumentedObjectStoreRegistry::new()), }; let rc = match args.rc { diff --git a/rust-toolchain.toml b/rust-toolchain.toml index ff7d1f7fbb..a7ab41ac63 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -19,6 +19,7 @@ # and only MSRV is required. # # The channel is exactly same day for our MSRV. +# Updated to 1.88 MSRV for testing with datafusion main [toolchain] -channel = "nightly-2025-03-28" +channel = "nightly-2025-06-23" components = ["rustfmt", "clippy"]