AzureSDKAutomation
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-1_2-12/CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-1_2-12/CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-1_2-12/src/main/scala/com/azure/cosmos/spark/ChangeFeedMicroBatchStream.scala‎
Lines changed: 10 additions & 10 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-1_2-12/src/main/scala/com/azure/cosmos/spark/ChangeFeedMicroBatchStream.scala‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-2_2-12/CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-2_2-12/CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-2_2-12/src/main/scala/com/azure/cosmos/spark/ChangeFeedMicroBatchStream.scala‎
Lines changed: 10 additions & 10 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-2_2-12/src/main/scala/com/azure/cosmos/spark/ChangeFeedMicroBatchStream.scala‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3_2-12/src/main/scala/com/azure/cosmos/spark/CosmosPartitionPlanner.scala‎
Lines changed: 41 additions & 16 deletions b/‎sdk/cosmos/azure-cosmos-spark_3_2-12/src/main/scala/com/azure/cosmos/spark/CosmosPartitionPlanner.scala‎
Lines changed: 41 additions & 16 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3_2-12/src/main/scala/com/azure/cosmos/spark/PartitionMetadata.scala‎
Lines changed: 23 additions & 8 deletions b/‎sdk/cosmos/azure-cosmos-spark_3_2-12/src/main/scala/com/azure/cosmos/spark/PartitionMetadata.scala‎
Lines changed: 23 additions & 8 deletions
@@ -7,6 +7,8 @@
 #### Breaking Changes
 
 #### Bugs Fixed
+* Fixed an issue preventing preferred regions configured in `spark.cosmos.preferredRegionsList` from being used - See [PR 27084](https://github.com/Azure/azure-sdk-for-java/pull/27084)
+* Fixed `spark.cosmos.changeFeed.itemCountPerTriggerHint` handling when using  structured streaming - there was an issue that would reduce the throughput in subsequent micro batches too aggressively. - See [PR 27101](https://github.com/Azure/azure-sdk-for-java/pull/27101)
 
 #### Other Changes
 
 
@@ -74,15 +74,15 @@ private class ChangeFeedMicroBatchStream
     assert(startOffset.isInstanceOf[ChangeFeedOffset], "Argument 'startOffset' is not a change feed offset.")
     assert(endOffset.isInstanceOf[ChangeFeedOffset], "Argument 'endOffset' is not a change feed offset.")
 
-    log.logInfo(s"--> planInputPartitions.$streamId, startOffset: ${startOffset.json()} - endOffset: ${endOffset.json()}")
+    log.logDebug(s"--> planInputPartitions.$streamId, startOffset: ${startOffset.json()} - endOffset: ${endOffset.json()}")
     val start = startOffset.asInstanceOf[ChangeFeedOffset]
     val end = endOffset.asInstanceOf[ChangeFeedOffset]
 
     val startChangeFeedState = new String(java.util.Base64.getUrlDecoder.decode(start.changeFeedState))
-    log.logInfo(s"Start-ChangeFeedState.$streamId: $startChangeFeedState")
+    log.logDebug(s"Start-ChangeFeedState.$streamId: $startChangeFeedState")
 
     val endChangeFeedState = new String(java.util.Base64.getUrlDecoder.decode(end.changeFeedState))
-    log.logInfo(s"End-ChangeFeedState.$streamId: $endChangeFeedState")
+    log.logDebug(s"End-ChangeFeedState.$streamId: $endChangeFeedState")
 
     assert(end.inputPartitions.isDefined, "Argument 'endOffset.inputPartitions' must not be null or empty.")
 
@@ -100,7 +100,7 @@ private class ChangeFeedMicroBatchStream
    * Returns a factory to create a `PartitionReader` for each `InputPartition`.
    */
   override def createReaderFactory(): PartitionReaderFactory = {
-    log.logInfo(s"--> createReaderFactory.$streamId")
+    log.logDebug(s"--> createReaderFactory.$streamId")
     ChangeFeedScanPartitionReaderFactory(config, schema, cosmosClientStateHandle, diagnosticsConfig)
   }
 
@@ -121,7 +121,7 @@ private class ChangeFeedMicroBatchStream
   // serialize them in the end offset returned to avoid any IO calls for the actual partitioning
   override def latestOffset(startOffset: Offset, readLimit: ReadLimit): Offset = {
 
-    log.logInfo(s"--> latestOffset.$streamId")
+    log.logDebug(s"--> latestOffset.$streamId")
 
     val startChangeFeedOffset = startOffset.asInstanceOf[ChangeFeedOffset]
     val offset = CosmosPartitionPlanner.getLatestOffset(
@@ -138,11 +138,11 @@ private class ChangeFeedMicroBatchStream
     )
 
     if (offset.changeFeedState != startChangeFeedOffset.changeFeedState) {
-      log.logInfo(s"<-- latestOffset.$streamId - new offset ${offset.json()}")
+      log.logDebug(s"<-- latestOffset.$streamId - new offset ${offset.json()}")
       this.latestOffsetSnapshot = Some(offset)
       offset
     } else {
-      log.logInfo(s"<-- latestOffset.$streamId - Finished returning null")
+      log.logDebug(s"<-- latestOffset.$streamId - Finished returning null")
 
       this.latestOffsetSnapshot = None
 
@@ -173,7 +173,7 @@ private class ChangeFeedMicroBatchStream
       newOffsetJson
     }
 
-    log.logInfo(s"MicroBatch stream $streamId: Initial offset '$offsetJson'.")
+    log.logDebug(s"MicroBatch stream $streamId: Initial offset '$offsetJson'.")
     ChangeFeedOffset(offsetJson, None)
   }
 
@@ -210,15 +210,15 @@ private class ChangeFeedMicroBatchStream
    * equal to `end` and will only request offsets greater than `end` in the future.
    */
   override def commit(offset: Offset): Unit = {
-    log.logInfo(s"MicroBatch stream $streamId: Committed offset '${offset.json()}'.")
+    log.logDebug(s"MicroBatch stream $streamId: Committed offset '${offset.json()}'.")
   }
 
   /**
    * Stop this source and free any resources it has allocated.
    */
   override def stop(): Unit = {
     clientCacheItem.close()
-    log.logInfo(s"MicroBatch stream $streamId: stopped.")
+    log.logDebug(s"MicroBatch stream $streamId: stopped.")
   }
 }
 // scalastyle:on multiple.string.literals
@@ -7,6 +7,8 @@
 #### Breaking Changes
 
 #### Bugs Fixed
+* Fixed an issue preventing preferred regions configured in `spark.cosmos.preferredRegionsList` from being used - See [PR 27084](https://github.com/Azure/azure-sdk-for-java/pull/27084)
+* Fixed `spark.cosmos.changeFeed.itemCountPerTriggerHint` handling when using  structured streaming - there was an issue that would reduce the throughput in subsequent micro batches too aggressively. - See [PR 27101](https://github.com/Azure/azure-sdk-for-java/pull/27101)
 
 #### Other Changes
 
 
@@ -74,15 +74,15 @@ private class ChangeFeedMicroBatchStream
     assert(startOffset.isInstanceOf[ChangeFeedOffset], "Argument 'startOffset' is not a change feed offset.")
     assert(endOffset.isInstanceOf[ChangeFeedOffset], "Argument 'endOffset' is not a change feed offset.")
 
-    log.logInfo(s"--> planInputPartitions.$streamId, startOffset: ${startOffset.json()} - endOffset: ${endOffset.json()}")
+    log.logDebug(s"--> planInputPartitions.$streamId, startOffset: ${startOffset.json()} - endOffset: ${endOffset.json()}")
     val start = startOffset.asInstanceOf[ChangeFeedOffset]
     val end = endOffset.asInstanceOf[ChangeFeedOffset]
 
     val startChangeFeedState = new String(java.util.Base64.getUrlDecoder.decode(start.changeFeedState))
-    log.logInfo(s"Start-ChangeFeedState.$streamId: $startChangeFeedState")
+    log.logDebug(s"Start-ChangeFeedState.$streamId: $startChangeFeedState")
 
     val endChangeFeedState = new String(java.util.Base64.getUrlDecoder.decode(end.changeFeedState))
-    log.logInfo(s"End-ChangeFeedState.$streamId: $endChangeFeedState")
+    log.logDebug(s"End-ChangeFeedState.$streamId: $endChangeFeedState")
 
     assert(end.inputPartitions.isDefined, "Argument 'endOffset.inputPartitions' must not be null or empty.")
 
@@ -100,7 +100,7 @@ private class ChangeFeedMicroBatchStream
    * Returns a factory to create a `PartitionReader` for each `InputPartition`.
    */
   override def createReaderFactory(): PartitionReaderFactory = {
-    log.logInfo(s"--> createReaderFactory.$streamId")
+    log.logDebug(s"--> createReaderFactory.$streamId")
     ChangeFeedScanPartitionReaderFactory(config, schema, cosmosClientStateHandle, diagnosticsConfig)
   }
 
@@ -121,7 +121,7 @@ private class ChangeFeedMicroBatchStream
   // serialize them in the end offset returned to avoid any IO calls for the actual partitioning
   override def latestOffset(startOffset: Offset, readLimit: ReadLimit): Offset = {
 
-    log.logInfo(s"--> latestOffset.$streamId")
+    log.logDebug(s"--> latestOffset.$streamId")
 
     val startChangeFeedOffset = startOffset.asInstanceOf[ChangeFeedOffset]
     val offset = CosmosPartitionPlanner.getLatestOffset(
@@ -138,11 +138,11 @@ private class ChangeFeedMicroBatchStream
     )
 
     if (offset.changeFeedState != startChangeFeedOffset.changeFeedState) {
-      log.logInfo(s"<-- latestOffset.$streamId - new offset ${offset.json()}")
+      log.logDebug(s"<-- latestOffset.$streamId - new offset ${offset.json()}")
       this.latestOffsetSnapshot = Some(offset)
       offset
     } else {
-      log.logInfo(s"<-- latestOffset.$streamId - Finished returning null")
+      log.logDebug(s"<-- latestOffset.$streamId - Finished returning null")
 
       this.latestOffsetSnapshot = None
 
@@ -173,7 +173,7 @@ private class ChangeFeedMicroBatchStream
       newOffsetJson
     }
 
-    log.logInfo(s"MicroBatch stream $streamId: Initial offset '$offsetJson'.")
+    log.logDebug(s"MicroBatch stream $streamId: Initial offset '$offsetJson'.")
     ChangeFeedOffset(offsetJson, None)
   }
 
@@ -210,15 +210,15 @@ private class ChangeFeedMicroBatchStream
    * equal to `end` and will only request offsets greater than `end` in the future.
    */
   override def commit(offset: Offset): Unit = {
-    log.logInfo(s"MicroBatch stream $streamId: Committed offset '${offset.json()}'.")
+    log.logDebug(s"MicroBatch stream $streamId: Committed offset '${offset.json()}'.")
   }
 
   /**
    * Stop this source and free any resources it has allocated.
    */
   override def stop(): Unit = {
     clientCacheItem.close()
-    log.logInfo(s"MicroBatch stream $streamId: stopped.")
+    log.logDebug(s"MicroBatch stream $streamId: stopped.")
   }
 }
 // scalastyle:on multiple.string.literals
@@ -93,15 +93,23 @@ private object CosmosPartitionPlanner extends BasicLoggingTrait {
   }
 
   private[this] def getContinuationTokenLsnOfFirstItem(items: Iterable[ObjectNode]): Option[String] = {
+    getLsnOfFirstItem(items) match {
+      case Some(firstLsn) =>
+        Some(SparkBridgeImplementationInternal.toContinuationToken(firstLsn))
+      case None => None
+    }
+  }
+
+  private[spark] def getLsnOfFirstItem(items: Iterable[ObjectNode]): Option[Long] = {
     items
       .collectFirst({
         case item: ObjectNode if item != null =>
           val lsnNode = item.get(LsnAttributeName)
           if (lsnNode != null && lsnNode.isNumber) {
-            // when grabbing the LSN from the item we need to use the item's LSN -1
-            // to ensure we would retrieve this item again
             Some(
-              SparkBridgeImplementationInternal.toContinuationToken(lsnNode.asLong() - 1))
+              // when grabbing the LSN from the item we need to use the item's LSN -1
+              // to ensure we would retrieve this item again
+              lsnNode.asLong() - 1)
           } else {
             None
           }
@@ -432,7 +440,7 @@ private object CosmosPartitionPlanner extends BasicLoggingTrait {
         val scaleFactor = if (storageSizeInMB == 0) {
           1
         } else {
-          progressWeightFactor * storageSizeInMB.toDouble
+          progressWeightFactor * storageSizeInMB
         }
 
         val planningInfo = PartitionPlanningInfo(
@@ -466,18 +474,35 @@ private object CosmosPartitionPlanner extends BasicLoggingTrait {
       .map(metadata => {
         val endLsn = readLimit match {
           case _: ReadAllAvailable => metadata.latestLsn
-          case _: ReadMaxRows =>
-            val gap = math.max(0, metadata.latestLsn - metadata.startLsn)
-            val weightFactor = metadata.getWeightedLsnGap.toDouble / totalWeightedLsnGap.get
-            val allowedRate = (weightFactor * gap).toLong.max(1)
-            if (isDebugLogEnabled) {
-              val calculateDebugLine = s"calculateEndLsn - gap $gap weightFactor $weightFactor " +
-                s"documentCount ${metadata.documentCount} latestLsn ${metadata.latestLsn} " +
-                s"startLsn ${metadata.startLsn} allowedRate $allowedRate weightedGap ${metadata.getWeightedLsnGap}"
-              logDebug(calculateDebugLine)
-            }
+          case maxRowsLimit: ReadMaxRows =>
+            if (totalWeightedLsnGap.get <= maxRowsLimit.maxRows) {
+              if (isDebugLogEnabled) {
+                val calculateDebugLine = s"calculateEndLsn (feedRange: ${metadata.feedRange}) - avg. Docs " +
+                  s"per LSN: ${metadata.getAvgItemsPerLsn} documentCount ${metadata.documentCount} firstLsn " +
+                  s"${metadata.firstLsn} latestLsn ${metadata.latestLsn} startLsn ${metadata.startLsn} weightedGap " +
+                  s"${metadata.getWeightedLsnGap} effectiveEndLsn ${metadata.latestLsn} maxRows ${maxRowsLimit.maxRows}"
+                logDebug(calculateDebugLine)
+              }
+              metadata.latestLsn
+            } else {
+              // the weight of this feedRange compared to other feedRanges
+              val feedRangeWeightFactor = metadata.getWeightedLsnGap.toDouble / totalWeightedLsnGap.get
+
+              val allowedRate = (feedRangeWeightFactor * maxRowsLimit.maxRows() / metadata.getAvgItemsPerLsn)
+                .toLong
+                .max(1)
+              val effectiveEndLsn = math.min(metadata.latestLsn, metadata.startLsn + allowedRate)
+              if (isDebugLogEnabled) {
+                val calculateDebugLine = s"calculateEndLsn (feedRange: ${metadata.feedRange}) - avg. Docs/LSN: " +
+                  s"${metadata.getAvgItemsPerLsn} feedRangeWeightFactor $feedRangeWeightFactor documentCount " +
+                  s"${metadata.documentCount} firstLsn ${metadata.firstLsn} latestLsn ${metadata.latestLsn} startLsn " +
+                  s"${metadata.startLsn} allowedRate  $allowedRate weightedGap ${metadata.getWeightedLsnGap} " +
+                  s"effectiveEndLsn $effectiveEndLsn maxRows $maxRowsLimit.maxRows"
+                logDebug(calculateDebugLine)
+              }
 
-            math.min(metadata.latestLsn, metadata.startLsn + allowedRate)
+              effectiveEndLsn
+            }
           case _: ReadMaxFiles => throw new IllegalStateException("ReadLimitMaxFiles not supported by this source.")
         }
 
@@ -496,7 +521,7 @@ private object CosmosPartitionPlanner extends BasicLoggingTrait {
     } else if (effectiveEndLsn <= metadata.startLsn) {
       // If progress has caught up with estimation already make sure we only use one Spark partition
       // for the physical partition in Cosmos
-      1 / storageSizeInMB.toDouble
+      1 / storageSizeInMB
     } else {
       // Use weight factor based on progress. This estimate assumes equal distribution of storage
       // size per LSN - which is a "good enough" simplification
 
@@ -24,15 +24,16 @@ private object PartitionMetadata {
             feedRange: NormalizedRange,
             documentCount: Long,
             totalDocumentSizeInKB: Long,
-            continuationToken: String,
+            firstLsn: Option[Long],
+            fromNowContinuationToken: String,
             startLsn: Long = 0,
             endLsn: Option[Long] = None): PartitionMetadata = {
     // scalastyle:on parameter.number
 
     val nowEpochMs = Instant.now().toEpochMilli
 
     val latestLsn = SparkBridgeImplementationInternal.extractLsnFromChangeFeedContinuation(
-      continuationToken)
+      fromNowContinuationToken)
 
     PartitionMetadata(
       userConfig,
@@ -42,6 +43,7 @@ private object PartitionMetadata {
       feedRange,
       documentCount,
       totalDocumentSizeInKB,
+      firstLsn,
       latestLsn,
       startLsn,
       endLsn,
@@ -60,6 +62,7 @@ private[cosmos] case class PartitionMetadata
   feedRange: NormalizedRange,
   documentCount: Long,
   totalDocumentSizeInKB: Long,
+  firstLsn: Option[Long],
   latestLsn: Long,
   startLsn: Long,
   endLsn: Option[Long],
@@ -83,6 +86,7 @@ private[cosmos] case class PartitionMetadata
       subRange,
       this.documentCount,
       this.totalDocumentSizeInKB,
+      this.firstLsn,
       this.latestLsn,
       startLsn,
       this.endLsn,
@@ -100,6 +104,7 @@ private[cosmos] case class PartitionMetadata
       this.feedRange,
       this.documentCount,
       this.totalDocumentSizeInKB,
+      this.firstLsn,
       this.latestLsn,
       startLsn,
       Some(explicitEndLsn),
@@ -110,14 +115,24 @@ private[cosmos] case class PartitionMetadata
 
   def getWeightedLsnGap: Long = {
     val progressFactor = math.max(this.latestLsn - this.startLsn, 0)
-    val averageItemsPerLsn = if (this.documentCount == 0) {
-      1d
+    if (progressFactor == 0) {
+      0
     } else {
-      this.latestLsn / this.documentCount.toDouble
+      val averageItemsPerLsn = getAvgItemsPerLsn
+
+      val weightedGap: Double = progressFactor * averageItemsPerLsn
+      // Any double less than 1 gets rounded to 0 when toLong is invoked
+      weightedGap.toLong.max(1)
     }
+  }
 
-    val weightedGap: Double = progressFactor * averageItemsPerLsn
-    // Any double less than 1 gets rounded to 0 when toLong is invoked
-    weightedGap.toLong.max(1)
+  def getAvgItemsPerLsn: Double = {
+    if (this.firstLsn.isEmpty) {
+      math.max(1d, this.documentCount.toDouble / this.latestLsn)
+    } else if (this.documentCount == 0 || (this.latestLsn - this.firstLsn.get) <= 0) {
+      1d
+    } else {
+      this.documentCount.toDouble / (this.latestLsn - this.firstLsn.get)
+    }
   }
 }