apache
diff --git a/‎docs/content/docs/dev/table/sql/queries/hints.md‎
Lines changed: 43 additions & 0 deletions b/‎docs/content/docs/dev/table/sql/queries/hints.md‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎docs/content/docs/dev/table/tuning.md‎
Lines changed: 26 additions & 8 deletions b/‎docs/content/docs/dev/table/tuning.md‎
Lines changed: 26 additions & 8 deletions
diff --git a/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/hint/FlinkHintStrategies.java‎
Lines changed: 5 additions & 0 deletions b/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/hint/FlinkHintStrategies.java‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/hint/JoinStrategy.java‎
Lines changed: 10 additions & 1 deletion b/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/hint/JoinStrategy.java‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/rules/logical/JoinToMultiJoinRule.java‎
Lines changed: 39 additions & 1 deletion b/‎flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/rules/logical/JoinToMultiJoinRule.java‎
Lines changed: 39 additions & 1 deletion
diff --git a/‎flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/plan/optimize/program/FlinkStreamProgram.scala‎
Lines changed: 14 additions & 16 deletions b/‎flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/plan/optimize/program/FlinkStreamProgram.scala‎
Lines changed: 14 additions & 16 deletions
diff --git a/‎flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/MultiJoinSemanticTests.java‎
Lines changed: 2 additions & 1 deletion b/‎flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/MultiJoinSemanticTests.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/MultiJoinTestPrograms.java‎
Lines changed: 30 additions & 0 deletions b/‎flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/MultiJoinTestPrograms.java‎
Lines changed: 30 additions & 0 deletions
@@ -279,6 +279,49 @@ SELECT /*+ NEST_LOOP(t1) */ * FROM t1 JOIN t2 ON t1.id = t2.id;
 SELECT /*+ NEST_LOOP(t1, t3) */ * FROM t1 JOIN t2 ON t1.id = t2.id JOIN t3 ON t1.id = t3.id;
 ```
 
+#### MULTI_JOIN
+
+{{< label Streaming >}}
+
+`MULTI_JOIN` suggests that Flink uses the `MultiJoin operator` to process multiple regular joins simultaneously. This type of join hint is recommended when you have multiple joins that share at least one common join key and experience large intermediate state or record amplification. The MultiJoin operator eliminates intermediate state by processing joins across various input streams simultaneously, which can significantly reduce state size and improve performance in some cases.
+
+For more details on the MultiJoin operator, including when to use it and configuration options, see [Multiple Regular Joins]({{< ref "docs/dev/table/tuning" >}}#multiple-regular-joins).
+
+{{< hint info >}}
+Note:
+- The MULTI_JOIN hint can specify table names or table aliases. If a table has an alias, the hint must use the alias name.
+- At least one key must be shared between the join conditions for the MultiJoin operator to be applied.
+- When specified, the MULTI_JOIN hint applies to the tables listed in the hint within the current query block.
+{{< /hint >}}
+
+##### Examples
+
+```sql
+CREATE TABLE t1 (id BIGINT, name STRING, age INT) WITH (...);
+CREATE TABLE t2 (id BIGINT, name STRING, age INT) WITH (...);
+CREATE TABLE t3 (id BIGINT, name STRING, age INT) WITH (...);
+
+-- Flink will use the MultiJoin operator for the three-way join.
+SELECT /*+ MULTI_JOIN(t1, t2, t3) */ * FROM t1 
+JOIN t2 ON t1.id = t2.id 
+JOIN t3 ON t1.id = t3.id;
+
+-- Using table names instead of aliases.
+SELECT /*+ MULTI_JOIN(Users, Orders, Payments) */ * FROM Users 
+INNER JOIN Orders ON Users.user_id = Orders.user_id 
+INNER JOIN Payments ON Users.user_id = Payments.user_id;
+
+-- Partial match: only t1 and t2 will use MultiJoin, t3 will use regular join.
+SELECT /*+ MULTI_JOIN(t1, t2) */ * FROM t1 
+JOIN t2 ON t1.id = t2.id 
+JOIN t3 ON t1.id = t3.id;
+
+-- Combining MULTI_JOIN with STATE_TTL hint.
+SELECT /*+ MULTI_JOIN(t1, t2, t3), STATE_TTL('t1'='1d', 't2'='2d', 't3'='12h') */ * FROM t1 
+JOIN t2 ON t1.id = t2.id 
+JOIN t3 ON t1.id = t3.id;
+```
+
 #### LOOKUP
 
 {{< label Streaming >}}
 
@@ -307,9 +307,9 @@ MiniBatch optimization is disabled by default for regular join. In order to enab
 
 {{< label Streaming >}}
 
-Streaming Flink jobs with multiple non-temporal regular joins often experience operational instability and performance degradation due to large state sizes. This is often because the intermediate state created by a chain of joins is much larger than the input state itself. In Flink 2.1, we introduce a new multi-join operator, an optimization designed to significantly reduce state size and improve performance for join pipelines that involve record amplification and large intermediate state. This new operator eliminates the need to store intermediate state for joins across multiple tables by processing joins across various input streams simultaneously. This "zero intermediate state" approach primarily targets state reduction, offering substantial benefits in resource consumption and operational stability.
+Streaming Flink jobs with multiple non-temporal regular joins often experience operational instability and performance degradation due to large state sizes. This is often because the intermediate state created by a chain of joins is much larger than the input state itself. In Flink 2.1, we introduce a new multi-join operator, an optimization designed to significantly reduce state size and improve performance for join pipelines that involve record amplification and large intermediate state. This new operator eliminates the need to store intermediate state for joins across multiple tables by processing joins across various input streams simultaneously. This "zero intermediate state" approach primarily targets state reduction, offering substantial benefits in resource consumption and operational stability in some cases. This technique exchanges a reduction in storage requirements for a corresponding increase in computational effort, as intermediate states are re-evaluated upon necessity. 
 
-In most joins, a significant portion of processing time is spent fetching records from the state. The efficiency of the MultiJoin operator largely depends on the size of this intermediate state. In a common scenario where a pipeline experiences record amplification—meaning each join produces more data and records than the previous one, the MultiJoin operator is more efficient. This is because it keeps the state on which the operator interacts much smaller, leading to a more stable operator. If a chain of joins actually produces less state than the original records, the MultiJoin operator will still use less state overall. However, in this specific case, binary joins might perform better because the state that the final joins need to operate on is smaller. 
+In most joins, a significant portion of processing time is spent fetching records from the state. The efficiency of the MultiJoin operator largely depends on the size of this intermediate state and the selectivity of the common join key(s). In a common scenario where a pipeline experiences record amplification—meaning each join produces more data and records than the previous one, the MultiJoin operator is more efficient. This is because it keeps the state on which the operator interacts much smaller, leading to a more stable operator. If a chain of joins actually produces less state than the original records, the MultiJoin operator will still use less state overall. However, in this specific case, binary joins might perform better because the state that the final joins need to operate on is smaller. 
 
 ### The MultiJoin Operator
 The main benefits of the MultiJoin operator are:
@@ -318,21 +318,39 @@ The main benefits of the MultiJoin operator are:
 2) Improved performance for chained joins with record amplification.
 3) Improved stability: linear state growth with amount of records processed, instead of polynomial growth with binary joins. 
 
-Also, pipelines with MultiJoin instead of binary joins usually have faster initialization and recovery times due to smaller state and fewer amount of nodes.
+Also, pipelines with MultiJoin instead of binary joins usually have faster initialization and recovery times due to smaller state and fewer nodes.
 
 ### When to enable the MultiJoin?
 
-If your job has multiple joins that share at least one common join key, and you observe that the intermediate state in the intermediate joins is larger than the inputs sources, consider enabling the MultiJoin operator.
+If your job has multiple joins that share at least one common join key, and you observe that the intermediate state in the intermediate joins is larger than the input sources, consider enabling the MultiJoin operator.
+
+Recommended use cases:
+- The common join key(s) have a high selectivity (the number of records per key is small)
+- Statement with several chained joins and considerable intermediate state
+- No considerable data skew on the common join key(s)
+- Joins are generating large state (state 50+ GB)
+
+If your common join key(s) exhibit low selectivity (i.e., a high number of rows sharing the same key value), the MultiJoin operator's required recomputation of the intermediate state can severely impact performance. In such scenarios, binary joins are recommended, as these will partition the data using all join keys.
 
 ### How to enable the MultiJoin?
 
-To enable this optimization, set the following configuration
+To enable this optimization globally for all eligible joins, set the following configuration:
 
 ```sql
 SET 'table.optimizer.multi-join.enabled' = 'true';
 ```
 
-Important: This is currently in an experimental state - there are open optimizations and breaking changes might be implemented in this version. We currently support only streaming INNER/LEFT joins. Support for RIGHT joins will be added soon. Due to records partitioning, you need at least one key that is shared between the join conditions, see:
+Alternatively, you can enable the MultiJoin operator for specific tables using the `MULTI_JOIN` hint:
+
+```sql
+SELECT /*+ MULTI_JOIN(t1, t2, t3) */ * FROM t1 
+JOIN t2 ON t1.id = t2.id 
+JOIN t3 ON t1.id = t3.id;
+```
+
+The hint approach allows you to selectively apply the MultiJoin optimization to specific query blocks without enabling it globally. You can specify either table names or table aliases in the hint. For more details on the MULTI_JOIN hint, see [Join Hints]({{< ref "docs/dev/table/sql/queries/hints" >}}#multi_join).
+
+Important: This is currently in an experimental state - optimizations and breaking changes might be implemented. We currently support only streaming INNER/LEFT joins. Due to records partitioning, you need at least one key that is shared between the join conditions, see:
 
 - Supported: A JOIN B ON A.key = B.key JOIN C ON A.key = C.key (Partition by key)
 - Supported: A JOIN B ON A.key = B.key JOIN C ON B.key = C.key (Partition by key via transitivity)
@@ -349,9 +367,9 @@ For this 10-way join above, involving record amplification, we've observed signi
 - Performance: 2x to over 100x+ increase in processed records when both at 100% busyness.
 - State Size: 3x to over 1000x+ smaller as intermediate state grows.
 
-The total state is always smaller with the MultiJoin operator. In this case, the performance is initially the same, but as the intermediate state grows, the performance of binary joins degrade and the multi join remains stable and outperforms.
+The total state is always smaller with the MultiJoin operator. In this case, the performance is initially the same, but as the intermediate state grows, the performance of binary joins degrades and the multi join remains stable and outperforms.
 
-This general benchmark for the 10-way join was run with the following configuration: 10 upsert kafka topics, 10 parallelism, 1 record per second per topic. We used rocksdb with unaligned checkpoints and with incremental checkpoints. Each job ran in one TaskManager containing 8GB process memory, 1GB off-heap memory and 20% network memory. The JobManager had 4GB process memory. The host machine contained a M1 processor chip, 32GB RAM and 1TB SSD. The sink uses a blackhole connector so we only benchmark the joins. The SQL used to generate the benchmark data had this structure:
+This general benchmark for the 10-way join was run with the following configuration: 1 record per tenant_id (high selectivity), 10 upsert kafka topics, 10 parallelism, 1 record per second per topic. We used rocksdb with unaligned checkpoints and with incremental checkpoints. Each job ran in one TaskManager containing 8GB process memory, 1GB off-heap memory and 20% network memory. The JobManager had 4GB process memory. The host machine contained a M1 processor chip, 32GB RAM and 1TB SSD. The sink uses a blackhole connector so we only benchmark the joins. The SQL used to generate the benchmark data had this structure:
 
 ```sql
 INSERT INTO JoinResultsMJ
 
@@ -121,6 +121,11 @@ public static HintStrategyTable createHintStrategyTable() {
                                                 HintPredicates.CORRELATE, HintPredicates.JOIN))
                                 .optionChecker(LOOKUP_NON_EMPTY_KV_OPTION_CHECKER)
                                 .build())
+                .hintStrategy(
+                        JoinStrategy.MULTI_JOIN.getJoinHintName(),
+                        HintStrategy.builder(HintPredicates.JOIN)
+                                .optionChecker(NON_EMPTY_LIST_OPTION_CHECKER)
+                                .build())
                 .hintStrategy(
                         StateTtlHint.STATE_TTL.getHintName(),
                         HintStrategy.builder(
 
@@ -48,7 +48,14 @@ public enum JoinStrategy {
     NEST_LOOP("NEST_LOOP"),
 
     /** Instructs the optimizer to use lookup join strategy. Only accept key-value hint options. */
-    LOOKUP("LOOKUP");
+    LOOKUP("LOOKUP"),
+
+    /**
+     * Instructs the optimizer to use multi-way join strategy for streaming queries. This hint
+     * allows specifying multiple tables to be joined together in a single {@link
+     * org.apache.flink.table.runtime.operators.join.stream.StreamingMultiJoinOperator}.
+     */
+    MULTI_JOIN("MULTI_JOIN");
 
     private final String joinHintName;
 
@@ -83,6 +90,8 @@ public static boolean validOptions(String hintName, List<String> options) {
                 return options.size() > 0;
             case LOOKUP:
                 return null == options || options.size() == 0;
+            case MULTI_JOIN:
+                return options.size() > 0;
         }
         return false;
     }
 
@@ -18,12 +18,16 @@
 
 package org.apache.flink.table.planner.plan.rules.logical;
 
+import org.apache.flink.table.api.TableConfig;
 import org.apache.flink.table.api.TableException;
+import org.apache.flink.table.api.config.OptimizerConfigOptions;
 import org.apache.flink.table.planner.calcite.FlinkTypeFactory;
 import org.apache.flink.table.planner.hint.FlinkHints;
+import org.apache.flink.table.planner.hint.JoinStrategy;
 import org.apache.flink.table.planner.hint.StateTtlHint;
 import org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalMultiJoin;
 import org.apache.flink.table.planner.plan.utils.IntervalJoinUtil;
+import org.apache.flink.table.planner.utils.ShortcutUtils;
 import org.apache.flink.table.runtime.operators.join.stream.keyselector.AttributeBasedJoinKeyExtractor;
 import org.apache.flink.table.runtime.operators.join.stream.keyselector.JoinKeyExtractor;
 import org.apache.flink.table.types.logical.RowType;
@@ -170,7 +174,41 @@ public boolean matches(RelOptRuleCall call) {
             return false;
         }
 
-        return origJoin.getJoinType().projectsRight();
+        if (!origJoin.getJoinType().projectsRight()) {
+            return false;
+        }
+
+        // Enable multi-join if either config is enabled OR MULTI_JOIN hint is present
+        return isEnabledViaConfig(origJoin) || hasMultiJoinHint(origJoin);
+    }
+
+    /**
+     * Checks if multi-join optimization is enabled via configuration.
+     *
+     * @param join the join node
+     * @return true if TABLE_OPTIMIZER_MULTI_JOIN_ENABLED is set to true
+     */
+    private boolean isEnabledViaConfig(Join join) {
+        final TableConfig tableConfig = ShortcutUtils.unwrapTableConfig(join);
+        return tableConfig.get(OptimizerConfigOptions.TABLE_OPTIMIZER_MULTI_JOIN_ENABLED);
+    }
+
+    /**
+     * Checks if the MULTI_JOIN hint is present on the join node.
+     *
+     * <p>Note: By the time this rule sees the join, the QueryHintsResolver has already validated
+     * the hint. If the hint is present with valid options, it means both sides of this join were
+     * mentioned in the original hint and have been validated.
+     *
+     * @param join the join node
+     * @return true if MULTI_JOIN hint is present and valid
+     */
+    private boolean hasMultiJoinHint(Join join) {
+        return join.getHints().stream()
+                .anyMatch(
+                        hint ->
+                                JoinStrategy.MULTI_JOIN.getJoinHintName().equals(hint.hintName)
+                                        && !hint.listOptions.isEmpty());
     }
 
     @Override
 
@@ -233,22 +233,20 @@ object FlinkStreamProgram {
     }
 
     // multi-join
-    if (tableConfig.get(OptimizerConfigOptions.TABLE_OPTIMIZER_MULTI_JOIN_ENABLED)) {
-      chainedProgram.addLast(
-        MULTI_JOIN,
-        FlinkGroupProgramBuilder
-          .newBuilder[StreamOptimizeContext]
-          .addProgram(
-            FlinkHepRuleSetProgramBuilder.newBuilder
-              .setHepRulesExecutionType(HEP_RULES_EXECUTION_TYPE.RULE_COLLECTION)
-              .setHepMatchOrder(HepMatchOrder.BOTTOM_UP)
-              .add(FlinkStreamRuleSets.MULTI_JOIN_RULES)
-              .build(),
-            "merge binary regular joins into MultiJoin"
-          )
-          .build()
-      )
-    }
+    chainedProgram.addLast(
+      MULTI_JOIN,
+      FlinkGroupProgramBuilder
+        .newBuilder[StreamOptimizeContext]
+        .addProgram(
+          FlinkHepRuleSetProgramBuilder.newBuilder
+            .setHepRulesExecutionType(HEP_RULES_EXECUTION_TYPE.RULE_COLLECTION)
+            .setHepMatchOrder(HepMatchOrder.BOTTOM_UP)
+            .add(FlinkStreamRuleSets.MULTI_JOIN_RULES)
+            .build(),
+          "merge binary regular joins into MultiJoin"
+        )
+        .build()
+    )
 
     // project rewrite
     chainedProgram.addLast(
 
@@ -54,6 +54,7 @@ public List<TableTestProgram> programs() {
                 MultiJoinTestPrograms.MULTI_JOIN_WITH_TIME_ATTRIBUTES_IN_CONDITIONS_MATERIALIZATION,
                 MultiJoinTestPrograms.MULTI_JOIN_TWO_WAY_INNER_JOIN_WITH_WHERE_IN,
                 MultiJoinTestPrograms.MULTI_JOIN_THREE_WAY_INNER_JOIN_MULTI_KEY_TYPES,
-                MultiJoinTestPrograms.MULTI_JOIN_FOUR_WAY_MIXED_JOIN_MULTI_KEY_TYPES_SHUFFLED);
+                MultiJoinTestPrograms.MULTI_JOIN_FOUR_WAY_MIXED_JOIN_MULTI_KEY_TYPES_SHUFFLED,
+                MultiJoinTestPrograms.MULTI_JOIN_THREE_WAY_INNER_JOIN_WITH_HINT);
     }
 }
@@ -1823,4 +1823,34 @@ public class MultiJoinTestPrograms {
                                     + "LEFT JOIN Shipments4K AS S ON U.k3 = S.k3 AND U.k2 > 150 AND U.k4 = S.k4 "
                                     + "WHERE U.k2 > 50")
                     .build();
+
+    public static final TableTestProgram MULTI_JOIN_THREE_WAY_INNER_JOIN_WITH_HINT =
+            TableTestProgram.of(
+                            "three-way-inner-join-with-hint",
+                            "three way inner join using MULTI_JOIN hint")
+                    .setupConfig(OptimizerConfigOptions.TABLE_OPTIMIZER_MULTI_JOIN_ENABLED, false)
+                    .setupTableSource(USERS_SOURCE)
+                    .setupTableSource(ORDERS_SOURCE)
+                    .setupTableSource(PAYMENTS_SOURCE)
+                    .setupTableSink(
+                            SinkTestStep.newBuilder("sink")
+                                    .addSchema(
+                                            "user_id STRING",
+                                            "name STRING",
+                                            "order_id STRING",
+                                            "payment_id STRING")
+                                    .consumedValues(
+                                            "+I[1, Gus, order1, payment1]",
+                                            "+I[2, Bob, order2, payment2]",
+                                            "+I[2, Bob, order3, payment2]",
+                                            "+I[1, Gus, order1, payment3]")
+                                    .testMaterializedData()
+                                    .build())
+                    .runSql(
+                            "INSERT INTO sink "
+                                    + "SELECT /*+ MULTI_JOIN(u, o, p) */ u.user_id, u.name, o.order_id, p.payment_id "
+                                    + "FROM Users u "
+                                    + "INNER JOIN Orders o ON u.user_id = o.user_id "
+                                    + "INNER JOIN Payments p ON u.user_id = p.user_id")
+                    .build();
 }
Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,7 @@ public List<TableTestProgram> programs() {`
`54`	`54`	`MultiJoinTestPrograms.MULTI_JOIN_WITH_TIME_ATTRIBUTES_IN_CONDITIONS_MATERIALIZATION,`
`55`	`55`	`MultiJoinTestPrograms.MULTI_JOIN_TWO_WAY_INNER_JOIN_WITH_WHERE_IN,`
`56`	`56`	`MultiJoinTestPrograms.MULTI_JOIN_THREE_WAY_INNER_JOIN_MULTI_KEY_TYPES,`
`57`		`- MultiJoinTestPrograms.MULTI_JOIN_FOUR_WAY_MIXED_JOIN_MULTI_KEY_TYPES_SHUFFLED);`
	`57`	`+ MultiJoinTestPrograms.MULTI_JOIN_FOUR_WAY_MIXED_JOIN_MULTI_KEY_TYPES_SHUFFLED,`
	`58`	`+ MultiJoinTestPrograms.MULTI_JOIN_THREE_WAY_INNER_JOIN_WITH_HINT);`
`58`	`59`	`}`
`59`	`60`	`}`