From 3aaf5a1b06446a92b9eea5e94771121cb81a7765 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Mon, 24 Nov 2025 15:40:14 +0800 Subject: [PATCH 1/8] setup --- .../datastructure/pattern/TreePattern.java | 256 ++++++++++++------ 1 file changed, 176 insertions(+), 80 deletions(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index e9b0fbe2180e..aa13508debb2 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -140,60 +140,72 @@ public static TreePattern parsePipePatternFromSourceParameters( final boolean isTreeModelDataAllowedToBeCaptured = isTreeModelDataAllowToBeCaptured(sourceParameters); - // 1. Define the default inclusion pattern (matches all, "root.**") - // This is used if no inclusion patterns are specified. - final TreePattern defaultInclusionPattern = - buildUnionPattern( - isTreeModelDataAllowedToBeCaptured, - Collections.singletonList( - new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, null))); - - // 2. Parse INCLUSION patterns using the helper - final TreePattern inclusionPattern = - parsePatternUnion( + // 1. Parse INCLUSION patterns into a list + List inclusionPatterns = + parsePatternList( sourceParameters, isTreeModelDataAllowedToBeCaptured, - // 'path' keys (IoTDB wildcard) EXTRACTOR_PATH_KEY, SOURCE_PATH_KEY, - // 'pattern' keys (Prefix or IoTDB via format) EXTRACTOR_PATTERN_KEY, - SOURCE_PATTERN_KEY, - // Default pattern if no keys are found - defaultInclusionPattern); + SOURCE_PATTERN_KEY); + + // If no inclusion patterns are specified, use default "root.**" + if (inclusionPatterns.isEmpty()) { + inclusionPatterns = + new ArrayList<>( + Collections.singletonList( + new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, null))); + } - // 3. Parse EXCLUSION patterns using the helper - final TreePattern exclusionPattern = - parsePatternUnion( + // 2. Parse EXCLUSION patterns into a list + List exclusionPatterns = + parsePatternList( sourceParameters, isTreeModelDataAllowedToBeCaptured, - // 'path.exclusion' keys (IoTDB wildcard) EXTRACTOR_PATH_EXCLUSION_KEY, SOURCE_PATH_EXCLUSION_KEY, - // 'pattern.exclusion' keys (Prefix) EXTRACTOR_PATTERN_EXCLUSION_KEY, - SOURCE_PATTERN_EXCLUSION_KEY, - // Default for exclusion is "match nothing" (null) - null); - - // 4. Combine inclusion and exclusion - if (exclusionPattern == null) { - // No exclusion defined, return the inclusion pattern directly - return inclusionPattern; - } else { - // If both inclusion and exclusion patterns support IoTDB operations, - // use the specialized ExclusionIoTDBTreePattern - if (inclusionPattern instanceof IoTDBTreePatternOperations - && exclusionPattern instanceof IoTDBTreePatternOperations) { - return new WithExclusionIoTDBTreePattern( - isTreeModelDataAllowedToBeCaptured, - (IoTDBTreePatternOperations) inclusionPattern, - (IoTDBTreePatternOperations) exclusionPattern); - } - // Both are defined, wrap them in an ExclusionTreePattern - return new WithExclusionTreePattern( - isTreeModelDataAllowedToBeCaptured, inclusionPattern, exclusionPattern); + SOURCE_PATTERN_EXCLUSION_KEY); + + // 3. Optimize the lists: remove redundant patterns (e.g., if "root.**" exists, "root.db" is + // redundant) + inclusionPatterns = optimizePatterns(inclusionPatterns); + exclusionPatterns = optimizePatterns(exclusionPatterns); + + // 4. Prune inclusion patterns: if an inclusion pattern is fully covered by an exclusion + // pattern, remove it + inclusionPatterns = pruneInclusionPatterns(inclusionPatterns, exclusionPatterns); + + // 5. Check if the resulting inclusion pattern is empty + if (inclusionPatterns.isEmpty()) { + throw new PipeException( + "Pipe: The inclusion pattern is empty after pruning by the exclusion pattern. " + + "This pipe pattern will match nothing."); + } + + // 6. Build final patterns + final TreePattern finalInclusionPattern = + buildUnionPattern(isTreeModelDataAllowedToBeCaptured, inclusionPatterns); + + if (exclusionPatterns.isEmpty()) { + return finalInclusionPattern; + } + + final TreePattern finalExclusionPattern = + buildUnionPattern(isTreeModelDataAllowedToBeCaptured, exclusionPatterns); + + // 7. Combine inclusion and exclusion + if (finalInclusionPattern instanceof IoTDBTreePatternOperations + && finalExclusionPattern instanceof IoTDBTreePatternOperations) { + return new WithExclusionIoTDBTreePattern( + isTreeModelDataAllowedToBeCaptured, + (IoTDBTreePatternOperations) finalInclusionPattern, + (IoTDBTreePatternOperations) finalExclusionPattern); } + + return new WithExclusionTreePattern( + isTreeModelDataAllowedToBeCaptured, finalInclusionPattern, finalExclusionPattern); } /** @@ -274,65 +286,145 @@ public static TreePattern parsePatternFromString( } /** - * A private helper method to parse a set of 'path' and 'pattern' keys into a single union - * TreePattern. This contains the original logic of parsePipePatternFromSourceParameters. - * - * @param sourceParameters The source parameters. - * @param isTreeModelDataAllowedToBeCaptured Flag for TreePattern constructor. - * @param extractorPathKey Key for extractor path (e.g., "extractor.path"). - * @param sourcePathKey Key for source path (e.g., "source.path"). - * @param extractorPatternKey Key for extractor pattern (e.g., "extractor.pattern"). - * @param sourcePatternKey Key for source pattern (e.g., "source.pattern"). - * @param defaultPattern The pattern to return if both path and pattern are null. If this - * parameter is null, this method returns null. - * @return The parsed TreePattern, or defaultPattern, or null if defaultPattern is null and no - * patterns are specified. + * Helper method to parse pattern parameters into a list of patterns without creating the Union + * object immediately. */ - private static TreePattern parsePatternUnion( + private static List parsePatternList( final PipeParameters sourceParameters, final boolean isTreeModelDataAllowedToBeCaptured, final String extractorPathKey, final String sourcePathKey, final String extractorPatternKey, - final String sourcePatternKey, - final TreePattern defaultPattern) { + final String sourcePatternKey) { final String path = sourceParameters.getStringByKeys(extractorPathKey, sourcePathKey); final String pattern = sourceParameters.getStringByKeys(extractorPatternKey, sourcePatternKey); - // 1. If both "source.path" and "source.pattern" are specified, their union will be used. - if (path != null && pattern != null) { - final List result = new ArrayList<>(); - // Parse "source.path" as IoTDB-style path. + final List result = new ArrayList<>(); + + if (path != null) { result.addAll( parseMultiplePatterns( path, p -> new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p))); - // Parse "source.pattern" using the helper method. + } + + if (pattern != null) { result.addAll( parsePatternsFromPatternParameter( pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured)); - return buildUnionPattern(isTreeModelDataAllowedToBeCaptured, result); } - // 2. If only "source.path" is specified, it will be interpreted as an IoTDB-style path. - if (path != null) { - return buildUnionPattern( - isTreeModelDataAllowedToBeCaptured, - parseMultiplePatterns( - path, p -> new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p))); + return result; + } + + /** + * Removes patterns from the list that are covered by other patterns in the same list. For + * example, if "root.**" and "root.db.**" are present, "root.db.**" is removed. + */ + private static List optimizePatterns(final List patterns) { + if (patterns == null || patterns.isEmpty()) { + return new ArrayList<>(); + } + if (patterns.size() == 1) { + return patterns; } - // 3. If only "source.pattern" is specified, parse it using the helper method. - if (pattern != null) { - return buildUnionPattern( - isTreeModelDataAllowedToBeCaptured, - parsePatternsFromPatternParameter( - pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured)); + final List optimized = new ArrayList<>(); + // Determine coverage using base paths + for (int i = 0; i < patterns.size(); i++) { + final TreePattern current = patterns.get(i); + boolean isCoveredByOther = false; + + for (int j = 0; j < patterns.size(); j++) { + if (i == j) { + continue; + } + final TreePattern other = patterns.get(j); + + // If 'other' covers 'current', 'current' is redundant. + // Note: if they mutually cover each other (duplicates), we must ensure we keep one. + // We use index comparison to break ties for exact duplicates. + if (covers(other, current)) { + if (covers(current, other)) { + // Both cover each other (likely identical). Keep the one with smaller index. + if (j < i) { + isCoveredByOther = true; + break; + } + } else { + // Strict coverage + isCoveredByOther = true; + break; + } + } + } + + if (!isCoveredByOther) { + optimized.add(current); + } + } + return optimized; + } + + /** + * Prunes patterns from the inclusion list that are fully covered by ANY pattern in the exclusion + * list. + */ + private static List pruneInclusionPatterns( + final List inclusion, final List exclusion) { + if (inclusion == null || inclusion.isEmpty()) { + return new ArrayList<>(); + } + if (exclusion == null || exclusion.isEmpty()) { + return inclusion; } - // 4. If neither "source.path" nor "source.pattern" is specified, - // return the provided default pattern (which may be null). - return defaultPattern; + final List prunedInclusion = new ArrayList<>(); + for (final TreePattern inc : inclusion) { + boolean isFullyExcluded = false; + for (final TreePattern exc : exclusion) { + if (covers(exc, inc)) { + isFullyExcluded = true; + break; + } + } + if (!isFullyExcluded) { + prunedInclusion.add(inc); + } + } + return prunedInclusion; + } + + /** Checks if 'coverer' pattern fully covers 'coveree' pattern. */ + private static boolean covers(final TreePattern coverer, final TreePattern coveree) { + try { + final List covererPaths = coverer.getBaseInclusionPaths(); + final List covereePaths = coveree.getBaseInclusionPaths(); + + if (covererPaths.isEmpty() || covereePaths.isEmpty()) { + return false; + } + + // Logic: For 'coverer' to cover 'coveree', ALL paths in 'coveree' must be included + // by at least one path in 'coverer'. + for (final PartialPath sub : covereePaths) { + boolean isSubCovered = false; + for (final PartialPath sup : covererPaths) { + if (sup.include(sub)) { + isSubCovered = true; + break; + } + } + if (!isSubCovered) { + return false; + } + } + return true; + } catch (final Exception e) { + // In case of path parsing errors or unsupported operations, assume no coverage + // to be safe and avoid aggressive pruning. + return false; + } } /** @@ -412,6 +504,10 @@ private static List parseMultiplePatterns( */ private static TreePattern buildUnionPattern( final boolean isTreeModelDataAllowedToBeCaptured, final List patterns) { + if (patterns.size() == 1) { + return patterns.get(0); + } + // Check if all instances in the list are of type IoTDBTreePattern boolean allIoTDB = true; for (final TreePattern p : patterns) { From 4ac016a588048cad1d6f772c9e7a05704d76a34c Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Mon, 24 Nov 2025 15:41:20 +0800 Subject: [PATCH 2/8] reset --- .../iotdb/commons/pipe/datastructure/pattern/TreePattern.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index aa13508debb2..b189bc4e61d4 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an From 3712c1d89a0ada536bfb3b6955995cb9a3d7d53c Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Mon, 1 Dec 2025 14:22:18 +0800 Subject: [PATCH 3/8] add UT --- .../pipe/pattern/TreePatternPruningTest.java | 168 ++++++++++++++++++ .../pattern/UnionIoTDBTreePattern.java | 6 + .../WithExclusionIoTDBTreePattern.java | 11 ++ .../pattern/WithExclusionTreePattern.java | 11 ++ 4 files changed, 196 insertions(+) create mode 100644 iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java new file mode 100644 index 000000000000..798c6d5cef19 --- /dev/null +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.pipe.pattern; + +import org.apache.iotdb.commons.pipe.config.constant.PipeSourceConstant; +import org.apache.iotdb.commons.pipe.datastructure.pattern.IoTDBTreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.TreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.UnionIoTDBTreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.WithExclusionIoTDBTreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.WithExclusionTreePattern; +import org.apache.iotdb.pipe.api.customizer.parameter.PipeParameters; +import org.apache.iotdb.pipe.api.exception.PipeException; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; + +public class TreePatternPruningTest { + + @Test + public void testUnionInternalPruning_Cover() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.db.d1.*,root.db.d1.s1"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue("Should be IoTDBTreePattern", result instanceof IoTDBTreePattern); + Assert.assertEquals("root.db.d1.*", result.getPattern()); + } + + @Test + public void testUnionInternalPruning_Duplicates() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.db.d1,root.db.d1"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue(result instanceof IoTDBTreePattern); + Assert.assertEquals("root.db.d1", result.getPattern()); + } + + @Test + public void testInclusionPrunedByExclusion_Partial() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.sg.d1,root.sg.d2"); + put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY, "root.sg.d1"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); + final WithExclusionIoTDBTreePattern exclusionPattern = (WithExclusionIoTDBTreePattern) result; + + Assert.assertEquals("root.sg.d2", exclusionPattern.getInclusionPattern().getPattern()); + Assert.assertEquals("root.sg.d1", exclusionPattern.getExclusionPattern().getPattern()); + } + + @Test + public void testInclusionPrunedByExclusion_FullCoverage_Exception() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.sg.d1"); + put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY, "root.sg.**"); + } + }); + + try { + TreePattern.parsePipePatternFromSourceParameters(params); + Assert.fail("Should throw PipeException because Exclusion fully covers Inclusion"); + } catch (final PipeException ignored) { + // Expected exception + } + } + + @Test + public void testComplexPruning() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.sg.A,root.sg.B,root.sg.A.sub"); + put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY, "root.sg.A,root.sg.A.**"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); + final WithExclusionIoTDBTreePattern excPattern = (WithExclusionIoTDBTreePattern) result; + + Assert.assertEquals("root.sg.B", excPattern.getInclusionPattern().getPattern()); + } + + @Test + public void testComplexPruning_Prefix() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATTERN_KEY, "root.sg.A,root.sg.B,root.sg.A.sub"); + put(PipeSourceConstant.SOURCE_PATTERN_EXCLUSION_KEY, "root.sg.A"); + put(PipeSourceConstant.SOURCE_PATTERN_FORMAT_KEY, "prefix"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue(result instanceof WithExclusionTreePattern); + final WithExclusionTreePattern excPattern = (WithExclusionTreePattern) result; + + Assert.assertEquals("root.sg.B", excPattern.getInclusionPattern().getPattern()); + } + + @Test + public void testUnionPreservedWhenNotCovered() { + final PipeParameters params = + new PipeParameters( + new HashMap() { + { + put(PipeSourceConstant.SOURCE_PATH_KEY, "root.sg.d1,root.sg.d2"); + put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY, "root.other"); + } + }); + + final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); + + Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); + final WithExclusionIoTDBTreePattern excResult = (WithExclusionIoTDBTreePattern) result; + + Assert.assertTrue(excResult.getInclusionPattern() instanceof UnionIoTDBTreePattern); + final UnionIoTDBTreePattern unionInc = (UnionIoTDBTreePattern) excResult.getInclusionPattern(); + Assert.assertEquals(2, unionInc.getPatterns().size()); + } +} diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java index 1d047ed89b8e..379232ad2534 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java @@ -21,6 +21,7 @@ import org.apache.iotdb.commons.path.PartialPath; import org.apache.iotdb.commons.path.PathPatternTree; +import org.apache.iotdb.commons.utils.TestOnly; import org.apache.tsfile.file.metadata.IDeviceID; @@ -55,6 +56,11 @@ public UnionIoTDBTreePattern(final IoTDBTreePattern pattern) { this.patterns = Collections.singletonList(pattern); } + @TestOnly + public List getPatterns() { + return patterns; + } + //////////////////////////// Tree Pattern Operations //////////////////////////// @Override diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java index f2e580e9cdfe..a2ac63327cca 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java @@ -21,6 +21,7 @@ import org.apache.iotdb.commons.path.PartialPath; import org.apache.iotdb.commons.path.PathPatternTree; +import org.apache.iotdb.commons.utils.TestOnly; import org.apache.tsfile.file.metadata.IDeviceID; @@ -56,6 +57,16 @@ public WithExclusionIoTDBTreePattern( this(true, inclusionPattern, exclusionPattern); } + @TestOnly + public IoTDBTreePatternOperations getInclusionPattern() { + return inclusionPattern; + } + + @TestOnly + public IoTDBTreePatternOperations getExclusionPattern() { + return exclusionPattern; + } + //////////////////////////// Tree Pattern Operations //////////////////////////// @Override diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java index 0cf1d0d1179a..772f54420701 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java @@ -20,6 +20,7 @@ package org.apache.iotdb.commons.pipe.datastructure.pattern; import org.apache.iotdb.commons.path.PartialPath; +import org.apache.iotdb.commons.utils.TestOnly; import org.apache.tsfile.file.metadata.IDeviceID; @@ -48,6 +49,16 @@ public WithExclusionTreePattern( TreePattern.checkAndLogPatternCoverage(inclusionPattern, exclusionPattern); } + @TestOnly + public TreePattern getInclusionPattern() { + return inclusionPattern; + } + + @TestOnly + public TreePattern getExclusionPattern() { + return exclusionPattern; + } + @Override public String getPattern() { return "INCLUSION(" From 129f8b5815ff0ef80b01561c88c2b99871946590 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Sun, 7 Dec 2025 16:52:57 +0800 Subject: [PATCH 4/8] apply review --- .../pipe/pattern/TreePatternPruningTest.java | 30 ++----- .../datastructure/pattern/TreePattern.java | 87 +++++++++++++++++-- 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java index 798c6d5cef19..23b6c334b5cc 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java @@ -21,10 +21,9 @@ import org.apache.iotdb.commons.pipe.config.constant.PipeSourceConstant; import org.apache.iotdb.commons.pipe.datastructure.pattern.IoTDBTreePattern; +import org.apache.iotdb.commons.pipe.datastructure.pattern.PrefixTreePattern; import org.apache.iotdb.commons.pipe.datastructure.pattern.TreePattern; import org.apache.iotdb.commons.pipe.datastructure.pattern.UnionIoTDBTreePattern; -import org.apache.iotdb.commons.pipe.datastructure.pattern.WithExclusionIoTDBTreePattern; -import org.apache.iotdb.commons.pipe.datastructure.pattern.WithExclusionTreePattern; import org.apache.iotdb.pipe.api.customizer.parameter.PipeParameters; import org.apache.iotdb.pipe.api.exception.PipeException; @@ -80,11 +79,8 @@ public void testInclusionPrunedByExclusion_Partial() { final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); - Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); - final WithExclusionIoTDBTreePattern exclusionPattern = (WithExclusionIoTDBTreePattern) result; - - Assert.assertEquals("root.sg.d2", exclusionPattern.getInclusionPattern().getPattern()); - Assert.assertEquals("root.sg.d1", exclusionPattern.getExclusionPattern().getPattern()); + Assert.assertTrue(result instanceof IoTDBTreePattern); + Assert.assertEquals("root.sg.d2", result.getPattern()); } @Test @@ -119,10 +115,8 @@ public void testComplexPruning() { final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); - Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); - final WithExclusionIoTDBTreePattern excPattern = (WithExclusionIoTDBTreePattern) result; - - Assert.assertEquals("root.sg.B", excPattern.getInclusionPattern().getPattern()); + Assert.assertTrue(result instanceof IoTDBTreePattern); + Assert.assertEquals("root.sg.B", result.getPattern()); } @Test @@ -139,10 +133,8 @@ public void testComplexPruning_Prefix() { final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); - Assert.assertTrue(result instanceof WithExclusionTreePattern); - final WithExclusionTreePattern excPattern = (WithExclusionTreePattern) result; - - Assert.assertEquals("root.sg.B", excPattern.getInclusionPattern().getPattern()); + Assert.assertTrue(result instanceof PrefixTreePattern); + Assert.assertEquals("root.sg.B", result.getPattern()); } @Test @@ -158,11 +150,7 @@ public void testUnionPreservedWhenNotCovered() { final TreePattern result = TreePattern.parsePipePatternFromSourceParameters(params); - Assert.assertTrue(result instanceof WithExclusionIoTDBTreePattern); - final WithExclusionIoTDBTreePattern excResult = (WithExclusionIoTDBTreePattern) result; - - Assert.assertTrue(excResult.getInclusionPattern() instanceof UnionIoTDBTreePattern); - final UnionIoTDBTreePattern unionInc = (UnionIoTDBTreePattern) excResult.getInclusionPattern(); - Assert.assertEquals(2, unionInc.getPatterns().size()); + Assert.assertTrue(result instanceof UnionIoTDBTreePattern); + Assert.assertEquals("root.sg.d1,root.sg.d2", result.getPattern()); } } diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index b189bc4e61d4..b9a8f7752936 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -179,12 +179,23 @@ public static TreePattern parsePipePatternFromSourceParameters( // 5. Check if the resulting inclusion pattern is empty if (inclusionPatterns.isEmpty()) { - throw new PipeException( - "Pipe: The inclusion pattern is empty after pruning by the exclusion pattern. " - + "This pipe pattern will match nothing."); + final String msg = + String.format( + "Pipe: The provided exclusion pattern fully covers the inclusion pattern. " + + "This pipe pattern will match nothing. " + + "Inclusion: %s, Exclusion: %s", + sourceParameters.getStringByKeys(EXTRACTOR_PATTERN_KEY, SOURCE_PATTERN_KEY), + sourceParameters.getStringByKeys( + EXTRACTOR_PATTERN_EXCLUSION_KEY, SOURCE_PATTERN_EXCLUSION_KEY)); + LOGGER.warn(msg); + throw new PipeException(msg); } - // 6. Build final patterns + // 6. Prune exclusion patterns: if an exclusion pattern does not overlap with + // ANY of the remaining inclusion patterns, it is useless and should be removed. + exclusionPatterns = pruneIrrelevantExclusions(inclusionPatterns, exclusionPatterns); + + // 7. Build final patterns final TreePattern finalInclusionPattern = buildUnionPattern(isTreeModelDataAllowedToBeCaptured, inclusionPatterns); @@ -195,7 +206,7 @@ public static TreePattern parsePipePatternFromSourceParameters( final TreePattern finalExclusionPattern = buildUnionPattern(isTreeModelDataAllowedToBeCaptured, exclusionPatterns); - // 7. Combine inclusion and exclusion + // 8. Combine inclusion and exclusion if (finalInclusionPattern instanceof IoTDBTreePatternOperations && finalExclusionPattern instanceof IoTDBTreePatternOperations) { return new WithExclusionIoTDBTreePattern( @@ -395,6 +406,37 @@ private static List pruneInclusionPatterns( return prunedInclusion; } + /** + * Prunes patterns from the exclusion list that do NOT overlap with any of the remaining inclusion + * patterns. + */ + private static List pruneIrrelevantExclusions( + final List inclusion, final List exclusion) { + if (exclusion == null || exclusion.isEmpty()) { + return new ArrayList<>(); + } + if (inclusion == null || inclusion.isEmpty()) { + // If inclusion is empty, exclusion is irrelevant anyway, but usually this case + // throws exception earlier. + return new ArrayList<>(); + } + + final List relevantExclusion = new ArrayList<>(); + for (final TreePattern exc : exclusion) { + boolean overlapsWithAnyInclusion = false; + for (final TreePattern inc : inclusion) { + if (overlaps(exc, inc)) { + overlapsWithAnyInclusion = true; + break; + } + } + if (overlapsWithAnyInclusion) { + relevantExclusion.add(exc); + } + } + return relevantExclusion; + } + /** Checks if 'coverer' pattern fully covers 'coveree' pattern. */ private static boolean covers(final TreePattern coverer, final TreePattern coveree) { try { @@ -423,6 +465,41 @@ private static boolean covers(final TreePattern coverer, final TreePattern cover } catch (final Exception e) { // In case of path parsing errors or unsupported operations, assume no coverage // to be safe and avoid aggressive pruning. + LOGGER.warn( + "Pipe: Failed to check if pattern [{}] covers [{}]. Assuming false.", + coverer.getPattern(), + coveree.getPattern(), + e); + return false; + } + } + + /** Checks if 'patternA' overlaps with 'patternB'. */ + private static boolean overlaps(final TreePattern patternA, final TreePattern patternB) { + try { + final List pathsA = patternA.getBaseInclusionPaths(); + final List pathsB = patternB.getBaseInclusionPaths(); + + if (pathsA.isEmpty() || pathsB.isEmpty()) { + return false; + } + + // Logic: Check if ANY path in A overlaps with ANY path in B. + for (final PartialPath pathA : pathsA) { + for (final PartialPath pathB : pathsB) { + if (pathA.overlapWith(pathB)) { + return true; + } + } + } + return false; + } catch (final Exception e) { + // Best effort check + LOGGER.warn( + "Pipe: Failed to check if pattern [{}] overlaps with [{}]. Assuming false.", + patternA.getPattern(), + patternB.getPattern(), + e); return false; } } From 387578e698e4d285882050f6e234e6d3265f86e3 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Sun, 7 Dec 2025 17:14:26 +0800 Subject: [PATCH 5/8] use trie for optimizePatterns --- .../datastructure/pattern/TreePattern.java | 170 +++++++++++++++--- .../WithExclusionIoTDBTreePattern.java | 13 -- .../pattern/WithExclusionTreePattern.java | 13 -- 3 files changed, 142 insertions(+), 54 deletions(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index b9a8f7752936..f6459124ad03 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -19,9 +19,11 @@ package org.apache.iotdb.commons.pipe.datastructure.pattern; +import org.apache.iotdb.commons.conf.IoTDBConstant; import org.apache.iotdb.commons.path.PartialPath; import org.apache.iotdb.commons.pipe.config.constant.PipeSourceConstant; import org.apache.iotdb.commons.pipe.config.constant.SystemConstant; +import org.apache.iotdb.commons.utils.TestOnly; import org.apache.iotdb.pipe.api.customizer.parameter.PipeParameters; import org.apache.iotdb.pipe.api.exception.PipeException; @@ -32,7 +34,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.function.Function; import java.util.stream.Collectors; @@ -331,6 +335,19 @@ private static List parsePatternList( /** * Removes patterns from the list that are covered by other patterns in the same list. For * example, if "root.**" and "root.db.**" are present, "root.db.**" is removed. + * + *

Optimization Strategy: + * + *

    + *
  1. Sort First: Patterns are sorted by "broadness" (shortest length and most wildcards + * first). This ensures that dominating patterns (like {@code root.**}) are processed first. + *
  2. Filter with Trie: Instead of comparing every pattern against every other pattern + * (O(N^2)), we build a Trie to check for coverage. For each pattern, we check if it is + * already "covered" by the Trie. If it is, we discard it; if not, we add it to the Trie. + *
+ * + *

Time Complexity: O(N * L), where N is the number of patterns and L is the average + * path length. */ private static List optimizePatterns(final List patterns) { if (patterns == null || patterns.isEmpty()) { @@ -340,40 +357,54 @@ private static List optimizePatterns(final List patter return patterns; } + // 1. Sort patterns by "Broadness" + // Heuristic: Shorter paths and paths with wildcards should come first. + // This allows us to insert 'root.**' first, so we can quickly skip 'root.sg.d1' later. + final List sortedPatterns = new ArrayList<>(patterns); + sortedPatterns.sort( + (o1, o2) -> { + // We can only approximate comparison here since TreePattern represents multiple paths. + // We use the first inclusion path as a representative. + final PartialPath p1 = o1.getBaseInclusionPaths().get(0); + final PartialPath p2 = o2.getBaseInclusionPaths().get(0); + + // 1. Length: Shorter is generally broader (e.g., root.** vs root.sg.d1) + final int lenCompare = Integer.compare(p1.getNodeLength(), p2.getNodeLength()); + if (lenCompare != 0) return lenCompare; + + // 2. Wildcards: Pattern with wildcards is broader (e.g., root.sg.* vs root.sg.d1) + final boolean w1 = p1.hasWildcard(); + final boolean w2 = p2.hasWildcard(); + if (w1 && !w2) return -1; + if (!w1 && w2) return 1; + + // 3. Deterministic tie-breaker + return p1.compareTo(p2); + }); + + // 2. Filter using Trie + final PatternTrie trie = new PatternTrie(); final List optimized = new ArrayList<>(); - // Determine coverage using base paths - for (int i = 0; i < patterns.size(); i++) { - final TreePattern current = patterns.get(i); - boolean isCoveredByOther = false; - - for (int j = 0; j < patterns.size(); j++) { - if (i == j) { - continue; - } - final TreePattern other = patterns.get(j); - - // If 'other' covers 'current', 'current' is redundant. - // Note: if they mutually cover each other (duplicates), we must ensure we keep one. - // We use index comparison to break ties for exact duplicates. - if (covers(other, current)) { - if (covers(current, other)) { - // Both cover each other (likely identical). Keep the one with smaller index. - if (j < i) { - isCoveredByOther = true; - break; - } - } else { - // Strict coverage - isCoveredByOther = true; - break; - } + + for (final TreePattern pattern : sortedPatterns) { + boolean isCovered = true; + // A pattern is redundant only if ALL its base paths are covered by the Trie + for (final PartialPath path : pattern.getBaseInclusionPaths()) { + if (!trie.isCovered(path)) { + isCovered = false; + break; } } - if (!isCoveredByOther) { - optimized.add(current); + if (!isCovered) { + optimized.add(pattern); + // Add all its paths to the Trie to cover future patterns + for (final PartialPath path : pattern.getBaseInclusionPaths()) { + trie.add(path); + } } } + return optimized; } @@ -663,6 +694,7 @@ public static boolean isTreeModelDataAllowToBeCaptured(final PipeParameters sour * @return An int array `[coveredCount, totalInclusionPaths]` for testing non-failing scenarios. * @throws PipeException If the inclusion pattern is fully covered by the exclusion pattern. */ + @TestOnly public static int[] checkAndLogPatternCoverage( final TreePattern inclusion, final TreePattern exclusion) throws PipeException { if (inclusion == null || exclusion == null) { @@ -726,4 +758,86 @@ public static int[] checkAndLogPatternCoverage( return new int[] {coveredCount, inclusionPaths.size()}; } + + /** A specialized Trie to efficiently check path coverage. */ + private static class PatternTrie { + private final TrieNode root = new TrieNode(); + + private static class TrieNode { + // Children nodes mapped by path segment + Map children = new HashMap<>(); + // Marks if a pattern ends here (e.g., "root.sg" is a set path) + boolean isLeaf = false; + // Special flags for optimization + boolean isMultiLevelWildcard = false; // Ends with ** + } + + /** Adds a path to the Trie. */ + public void add(final PartialPath path) { + TrieNode node = root; + final String[] nodes = path.getNodes(); + + for (final String segment : nodes) { + // If we are at a node that is already a MultiLevelWildcard (**), + // everything below is already covered. We can stop adding. + if (node.isMultiLevelWildcard) { + return; + } + + node = node.children.computeIfAbsent(segment, k -> new TrieNode()); + + // If this segment is **, mark it. + // Note: In IoTDB PartialPath, ** is usually the last node or specific wildcard. + if (segment.equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) { + node.isMultiLevelWildcard = true; + // Optimization: clear children as ** covers everything + node.children.clear(); + node.isLeaf = true; + return; + } + } + node.isLeaf = true; + } + + /** + * Checks if the given path is covered by any existing pattern in the Trie. e.g., if Trie has + * "root.sg.**", then "root.sg.d1.s1" returns true. + */ + public boolean isCovered(final PartialPath path) { + return checkCoverage(root, path.getNodes(), 0); + } + + private boolean checkCoverage(final TrieNode node, final String[] pathNodes, final int index) { + // 1. If the Trie node is a Multi-Level Wildcard (**), it covers everything remainder + if (node.isMultiLevelWildcard) { + return true; + } + + // 2. If we reached the end of the query path + if (index >= pathNodes.length) { + // The path is covered if the Trie also ends here (isLeaf) + // Example: Trie="root.sg", Path="root.sg" -> Covered + // Example: Trie="root.sg.d1", Path="root.sg" -> Not Covered (Trie is more specific) + return node.isLeaf; + } + + final String currentSegment = pathNodes[index]; + + // 3. Direct Match or Single Level Wildcard (*) in Trie + // Check exact match child + final TrieNode child = node.children.get(currentSegment); + if (child != null && checkCoverage(child, pathNodes, index + 1)) { + return true; + } + + // Check if Trie has a '*' child (One Level Wildcard) + // '*' in Trie covers any single level in Path + final TrieNode wildcardChild = node.children.get(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD); + if (wildcardChild != null) { + return checkCoverage(wildcardChild, pathNodes, index + 1); + } + + return false; + } + } } diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java index a2ac63327cca..677507603392 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java @@ -21,7 +21,6 @@ import org.apache.iotdb.commons.path.PartialPath; import org.apache.iotdb.commons.path.PathPatternTree; -import org.apache.iotdb.commons.utils.TestOnly; import org.apache.tsfile.file.metadata.IDeviceID; @@ -47,8 +46,6 @@ public WithExclusionIoTDBTreePattern( super(isTreeModelDataAllowedToBeCaptured); this.inclusionPattern = inclusionPattern; this.exclusionPattern = exclusionPattern; - - TreePattern.checkAndLogPatternCoverage(inclusionPattern, exclusionPattern); } public WithExclusionIoTDBTreePattern( @@ -57,16 +54,6 @@ public WithExclusionIoTDBTreePattern( this(true, inclusionPattern, exclusionPattern); } - @TestOnly - public IoTDBTreePatternOperations getInclusionPattern() { - return inclusionPattern; - } - - @TestOnly - public IoTDBTreePatternOperations getExclusionPattern() { - return exclusionPattern; - } - //////////////////////////// Tree Pattern Operations //////////////////////////// @Override diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java index 772f54420701..eb255ed8392b 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java @@ -20,7 +20,6 @@ package org.apache.iotdb.commons.pipe.datastructure.pattern; import org.apache.iotdb.commons.path.PartialPath; -import org.apache.iotdb.commons.utils.TestOnly; import org.apache.tsfile.file.metadata.IDeviceID; @@ -45,18 +44,6 @@ public WithExclusionTreePattern( super(isTreeModelDataAllowedToBeCaptured); this.inclusionPattern = inclusionPattern; this.exclusionPattern = exclusionPattern; - - TreePattern.checkAndLogPatternCoverage(inclusionPattern, exclusionPattern); - } - - @TestOnly - public TreePattern getInclusionPattern() { - return inclusionPattern; - } - - @TestOnly - public TreePattern getExclusionPattern() { - return exclusionPattern; } @Override From 0c32d1195c61fae7fc11e0a78695cec59ecb2c56 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 11 Dec 2025 14:54:10 +0800 Subject: [PATCH 6/8] apply review --- .../datastructure/pattern/TreePattern.java | 124 ++++++++++++++---- 1 file changed, 95 insertions(+), 29 deletions(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index f6459124ad03..8ee7fc5de608 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -370,13 +370,19 @@ private static List optimizePatterns(final List patter // 1. Length: Shorter is generally broader (e.g., root.** vs root.sg.d1) final int lenCompare = Integer.compare(p1.getNodeLength(), p2.getNodeLength()); - if (lenCompare != 0) return lenCompare; + if (lenCompare != 0) { + return lenCompare; + } // 2. Wildcards: Pattern with wildcards is broader (e.g., root.sg.* vs root.sg.d1) final boolean w1 = p1.hasWildcard(); final boolean w2 = p2.hasWildcard(); - if (w1 && !w2) return -1; - if (!w1 && w2) return 1; + if (w1 && !w2) { + return -1; + } + if (!w1 && w2) { + return 1; + } // 3. Deterministic tie-breaker return p1.compareTo(p2); @@ -505,7 +511,7 @@ private static boolean covers(final TreePattern coverer, final TreePattern cover } } - /** Checks if 'patternA' overlaps with 'patternB'. */ + /** Checks if 'patternA' overlaps with 'patternB' using a Trie optimization. */ private static boolean overlaps(final TreePattern patternA, final TreePattern patternB) { try { final List pathsA = patternA.getBaseInclusionPaths(); @@ -515,17 +521,21 @@ private static boolean overlaps(final TreePattern patternA, final TreePattern pa return false; } - // Logic: Check if ANY path in A overlaps with ANY path in B. + // Optimization: Build Trie from the smaller list (usually) or just patternB + // to avoid O(N^2) comparisons. + final PatternTrie trie = new PatternTrie(); + for (final PartialPath path : pathsB) { + trie.add(path); + } + + // Check if any path in A overlaps with the Trie constructed from B for (final PartialPath pathA : pathsA) { - for (final PartialPath pathB : pathsB) { - if (pathA.overlapWith(pathB)) { - return true; - } + if (trie.overlaps(pathA)) { + return true; } } return false; } catch (final Exception e) { - // Best effort check LOGGER.warn( "Pipe: Failed to check if pattern [{}] overlaps with [{}]. Assuming false.", patternA.getPattern(), @@ -764,8 +774,11 @@ private static class PatternTrie { private final TrieNode root = new TrieNode(); private static class TrieNode { - // Children nodes mapped by path segment + // Children nodes mapped by specific path segments (excluding *) Map children = new HashMap<>(); + // Optimized field for One Level Wildcard (*) child to reduce map lookups + TrieNode wildcardNode = null; + // Marks if a pattern ends here (e.g., "root.sg" is a set path) boolean isLeaf = false; // Special flags for optimization @@ -784,25 +797,31 @@ public void add(final PartialPath path) { return; } - node = node.children.computeIfAbsent(segment, k -> new TrieNode()); - - // If this segment is **, mark it. - // Note: In IoTDB PartialPath, ** is usually the last node or specific wildcard. + // Check for Multi-Level Wildcard (**) if (segment.equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) { node.isMultiLevelWildcard = true; // Optimization: clear children as ** covers everything - node.children.clear(); + node.children = Collections.emptyMap(); + node.wildcardNode = null; node.isLeaf = true; return; } + + // Check for One-Level Wildcard (*) + if (segment.equals(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD)) { + if (node.wildcardNode == null) { + node.wildcardNode = new TrieNode(); + } + node = node.wildcardNode; + } else { + // Regular specific node + node = node.children.computeIfAbsent(segment, k -> new TrieNode()); + } } node.isLeaf = true; } - /** - * Checks if the given path is covered by any existing pattern in the Trie. e.g., if Trie has - * "root.sg.**", then "root.sg.d1.s1" returns true. - */ + /** Checks if the given path is covered by any existing pattern in the Trie. */ public boolean isCovered(final PartialPath path) { return checkCoverage(root, path.getNodes(), 0); } @@ -816,28 +835,75 @@ private boolean checkCoverage(final TrieNode node, final String[] pathNodes, fin // 2. If we reached the end of the query path if (index >= pathNodes.length) { // The path is covered if the Trie also ends here (isLeaf) - // Example: Trie="root.sg", Path="root.sg" -> Covered - // Example: Trie="root.sg.d1", Path="root.sg" -> Not Covered (Trie is more specific) return node.isLeaf; } final String currentSegment = pathNodes[index]; - // 3. Direct Match or Single Level Wildcard (*) in Trie - // Check exact match child + // 3. Direct Match in Trie final TrieNode child = node.children.get(currentSegment); if (child != null && checkCoverage(child, pathNodes, index + 1)) { return true; } - // Check if Trie has a '*' child (One Level Wildcard) - // '*' in Trie covers any single level in Path - final TrieNode wildcardChild = node.children.get(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD); - if (wildcardChild != null) { - return checkCoverage(wildcardChild, pathNodes, index + 1); + // 4. Single Level Wildcard (*) in Trie + // Access direct field instead of map lookup + if (node.wildcardNode != null) { + return checkCoverage(node.wildcardNode, pathNodes, index + 1); } return false; } + + /** Checks if the given path overlaps with any pattern in the Trie. */ + public boolean overlaps(final PartialPath path) { + return checkOverlap(root, path.getNodes(), 0); + } + + private boolean checkOverlap(final TrieNode node, final String[] pathNodes, final int index) { + // 1. If Trie has '**', it overlaps everything. + if (node.isMultiLevelWildcard) { + return true; + } + + // 2. If Query Path has '**', it overlaps everything remaining in this valid branch. + if (index < pathNodes.length + && pathNodes[index].equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) { + return true; + } + + // 3. End of Query Path: Overlap exists if Trie also ends here. + if (index >= pathNodes.length) { + return node.isLeaf; + } + + final String pNode = pathNodes[index]; + + // 4. Case: Query Node is '*' (matches any child in Trie, both specific and wildcard) + if (pNode.equals(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD)) { + // Check all specific children + for (final TrieNode child : node.children.values()) { + if (checkOverlap(child, pathNodes, index + 1)) { + return true; + } + } + // Check wildcard child + if (node.wildcardNode != null) { + return checkOverlap(node.wildcardNode, pathNodes, index + 1); + } + return false; + } + + // 5. Case: Query Node is specific (e.g., "d1") + // 5a. Check exact match in Trie + final TrieNode exactChild = node.children.get(pNode); + if (exactChild != null && checkOverlap(exactChild, pathNodes, index + 1)) { + return true; + } + + // 5b. Check '*' in Trie (matches specific query node) + // Access direct field instead of map lookup + return node.wildcardNode != null && checkOverlap(node.wildcardNode, pathNodes, index + 1); + } } } From 6e3cf377c2f860faac4591f7f4921a2f0e2c8d0b Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 11 Dec 2025 14:56:32 +0800 Subject: [PATCH 7/8] remove useless --- .../iotdb/commons/pipe/datastructure/pattern/TreePattern.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index 8ee7fc5de608..eb5dec88fd5e 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -847,7 +847,6 @@ private boolean checkCoverage(final TrieNode node, final String[] pathNodes, fin } // 4. Single Level Wildcard (*) in Trie - // Access direct field instead of map lookup if (node.wildcardNode != null) { return checkCoverage(node.wildcardNode, pathNodes, index + 1); } @@ -902,7 +901,6 @@ private boolean checkOverlap(final TrieNode node, final String[] pathNodes, fina } // 5b. Check '*' in Trie (matches specific query node) - // Access direct field instead of map lookup return node.wildcardNode != null && checkOverlap(node.wildcardNode, pathNodes, index + 1); } } From e7dcdf0ac533d23986ec320bb54b7744575666b1 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 11 Dec 2025 15:02:06 +0800 Subject: [PATCH 8/8] improve pruneInclusionPatterns & pruneIrrelevantExclusions --- .../datastructure/pattern/TreePattern.java | 137 ++++++++---------- 1 file changed, 58 insertions(+), 79 deletions(-) diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java index eb5dec88fd5e..abc4406ddb7f 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java @@ -417,6 +417,19 @@ private static List optimizePatterns(final List patter /** * Prunes patterns from the inclusion list that are fully covered by ANY pattern in the exclusion * list. + * + *

Optimization Strategy: + * + *

    + *
  1. Build Exclusion Trie: Construct a Trie containing all paths from the exclusion + * list. This aggregates the coverage of all exclusion patterns into a single structure. + *
  2. Check Coverage: Iterate through the inclusion list. For each inclusion pattern, + * check if ALL of its represented paths are covered by the Exclusion Trie. If so, the + * pattern is redundant and removed. + *
+ * + *

Time Complexity: O((N + M) * L), where N is the number of inclusion patterns, M is + * the number of exclusion patterns, and L is the average path length. */ private static List pruneInclusionPatterns( final List inclusion, final List exclusion) { @@ -427,15 +440,29 @@ private static List pruneInclusionPatterns( return inclusion; } + // 1. Build Trie with all exclusion paths + // The Trie represents the union of all excluded areas. + final PatternTrie exclusionTrie = new PatternTrie(); + for (final TreePattern exc : exclusion) { + for (final PartialPath path : exc.getBaseInclusionPaths()) { + exclusionTrie.add(path); + } + } + final List prunedInclusion = new ArrayList<>(); + // 2. Filter inclusion patterns for (final TreePattern inc : inclusion) { - boolean isFullyExcluded = false; - for (final TreePattern exc : exclusion) { - if (covers(exc, inc)) { - isFullyExcluded = true; + boolean isFullyExcluded = true; + // An inclusion pattern is fully excluded only if ALL its constituent base paths + // are covered by the exclusion Trie. + for (final PartialPath path : inc.getBaseInclusionPaths()) { + if (!exclusionTrie.isCovered(path)) { + isFullyExcluded = false; break; } } + + // If not fully excluded (i.e., at least one path survives), keep it. if (!isFullyExcluded) { prunedInclusion.add(inc); } @@ -446,6 +473,19 @@ private static List pruneInclusionPatterns( /** * Prunes patterns from the exclusion list that do NOT overlap with any of the remaining inclusion * patterns. + * + *

Optimization Strategy: + * + *

    + *
  1. Build Inclusion Trie: Construct a Trie containing all paths from the inclusion + * list. This aggregates the search space of inclusion patterns. + *
  2. Filter Exclusions: Iterate through the exclusion list. For each exclusion pattern, + * check if it overlaps with the Inclusion Trie. Only exclusions that overlap with at least + * one inclusion pattern are kept. + *
+ * + *

Time Complexity: O((N + M) * L), where N is the number of inclusion patterns, M is + * the number of exclusion patterns, and L is the average path length. */ private static List pruneIrrelevantExclusions( final List inclusion, final List exclusion) { @@ -453,20 +493,30 @@ private static List pruneIrrelevantExclusions( return new ArrayList<>(); } if (inclusion == null || inclusion.isEmpty()) { - // If inclusion is empty, exclusion is irrelevant anyway, but usually this case - // throws exception earlier. + // If inclusion is empty, exclusion is irrelevant anyway. return new ArrayList<>(); } + // 1. Build Trie from Inclusion Patterns + final PatternTrie inclusionTrie = new PatternTrie(); + for (final TreePattern inc : inclusion) { + for (final PartialPath path : inc.getBaseInclusionPaths()) { + inclusionTrie.add(path); + } + } + + // 2. Filter Exclusion Patterns using the Trie final List relevantExclusion = new ArrayList<>(); for (final TreePattern exc : exclusion) { boolean overlapsWithAnyInclusion = false; - for (final TreePattern inc : inclusion) { - if (overlaps(exc, inc)) { + // An exclusion pattern is relevant if ANY of its base paths overlap with the inclusion Trie + for (final PartialPath path : exc.getBaseInclusionPaths()) { + if (inclusionTrie.overlaps(path)) { overlapsWithAnyInclusion = true; break; } } + if (overlapsWithAnyInclusion) { relevantExclusion.add(exc); } @@ -474,77 +524,6 @@ private static List pruneIrrelevantExclusions( return relevantExclusion; } - /** Checks if 'coverer' pattern fully covers 'coveree' pattern. */ - private static boolean covers(final TreePattern coverer, final TreePattern coveree) { - try { - final List covererPaths = coverer.getBaseInclusionPaths(); - final List covereePaths = coveree.getBaseInclusionPaths(); - - if (covererPaths.isEmpty() || covereePaths.isEmpty()) { - return false; - } - - // Logic: For 'coverer' to cover 'coveree', ALL paths in 'coveree' must be included - // by at least one path in 'coverer'. - for (final PartialPath sub : covereePaths) { - boolean isSubCovered = false; - for (final PartialPath sup : covererPaths) { - if (sup.include(sub)) { - isSubCovered = true; - break; - } - } - if (!isSubCovered) { - return false; - } - } - return true; - } catch (final Exception e) { - // In case of path parsing errors or unsupported operations, assume no coverage - // to be safe and avoid aggressive pruning. - LOGGER.warn( - "Pipe: Failed to check if pattern [{}] covers [{}]. Assuming false.", - coverer.getPattern(), - coveree.getPattern(), - e); - return false; - } - } - - /** Checks if 'patternA' overlaps with 'patternB' using a Trie optimization. */ - private static boolean overlaps(final TreePattern patternA, final TreePattern patternB) { - try { - final List pathsA = patternA.getBaseInclusionPaths(); - final List pathsB = patternB.getBaseInclusionPaths(); - - if (pathsA.isEmpty() || pathsB.isEmpty()) { - return false; - } - - // Optimization: Build Trie from the smaller list (usually) or just patternB - // to avoid O(N^2) comparisons. - final PatternTrie trie = new PatternTrie(); - for (final PartialPath path : pathsB) { - trie.add(path); - } - - // Check if any path in A overlaps with the Trie constructed from B - for (final PartialPath pathA : pathsA) { - if (trie.overlaps(pathA)) { - return true; - } - } - return false; - } catch (final Exception e) { - LOGGER.warn( - "Pipe: Failed to check if pattern [{}] overlaps with [{}]. Assuming false.", - patternA.getPattern(), - patternB.getPattern(), - e); - return false; - } - } - /** * A private helper method to parse a list of {@link TreePattern}s from the "pattern" parameter, * considering its "format".