dask · gioxc88 · Oct 28, 2020 · Nov 20, 2020 · Nov 20, 2020 · Nov 20, 2020
diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py
@@ -198,34 +198,40 @@ async def _fit(
     else:
         y_test = await client.scatter(y_test)
 
-    # Convert to batches of delayed objects of numpy arrays
-    X_train = sorted(futures_of(X_train), key=lambda f: f.key)
-    y_train = sorted(futures_of(y_train), key=lambda f: f.key)
-    assert len(X_train) == len(y_train)
-
-    train_eg = await client.gather(client.map(len, y_train))
-    msg = "[CV%s] For training there are between %d and %d examples in each chunk"
-    logger.info(msg, prefix, min(train_eg), max(train_eg))
+    if hasattr(X_train, 'npartitions'):
+        # Convert to batches of delayed objects of numpy arrays
+        X_train = sorted(futures_of(X_train), key=lambda f: f.key)
+        y_train = sorted(futures_of(y_train), key=lambda f: f.key)
+        assert len(X_train) == len(y_train)
+
+        train_eg = await client.gather(client.map(len, y_train))
+        msg = "[CV%s] For training there are between %d and %d examples in each chunk"
+        logger.info(msg, prefix, min(train_eg), max(train_eg))
+
+        def get_futures(partial_fit_calls):
+            """Policy to get training data futures
+
+            Currently we compute once, and then keep in memory.
+            Presumably in the future we'll want to let data drop and recompute.
+            This function handles that policy internally, and also controls random
+            access to training data.
+            """
+            # Shuffle blocks going forward to get uniform-but-random access
+            while partial_fit_calls >= len(order):
+                L = list(range(len(X_train)))
+                rng.shuffle(L)
+                order.extend(L)
+            j = order[partial_fit_calls]
+            return X_train[j], y_train[j]
+    # __addition__ start
+    else:
+        def get_futures(partial_fit_calls):
+            return X_train, y_train
+    # __addition__ end
 
     # Order by which we process training data futures
     order = []
 
-    def get_futures(partial_fit_calls):
-        """Policy to get training data futures
-
-        Currently we compute once, and then keep in memory.
-        Presumably in the future we'll want to let data drop and recompute.
-        This function handles that policy internally, and also controls random
-        access to training data.
-        """
-        # Shuffle blocks going forward to get uniform-but-random access
-        while partial_fit_calls >= len(order):
-            L = list(range(len(X_train)))
-            rng.shuffle(L)
-            order.extend(L)
-        j = order[partial_fit_calls]
-        return X_train[j], y_train[j]
-
     # Submit initial partial_fit and score computations on first batch of data
     X_future, y_future = get_futures(0)
     X_future_2, y_future_2 = get_futures(1)
@@ -566,7 +572,8 @@ def _get_train_test_split(self, X, y, **kwargs):
         X, y : dask.array.Array
         """
         if self.test_size is None:
-            test_size = min(0.2, 1 / X.npartitions)
+            npartitions = getattr(X, 'npartitions', 1)
+            test_size = min(0.2, 1 / npartitions)
         else:
             test_size = self.test_size
         X_train, X_test, y_train, y_test = train_test_split(