diff --git a/autotest/conftest.py b/autotest/conftest.py index 036ce9e67..53650cc00 100644 --- a/autotest/conftest.py +++ b/autotest/conftest.py @@ -7,6 +7,8 @@ import pytest from modflow_devtools.misc import is_in_ci +from autotest.test_grid_cases import GridCases + # import modflow-devtools fixtures pytest_plugins = ["modflow_devtools.fixtures", "modflow_devtools.snapshots"] @@ -79,6 +81,51 @@ def patch_macos_ci_matplotlib(): matplotlib.use("agg") +# grid fixtures (from GridCases) + + +@pytest.fixture +def minimal_vertex_grid(): + """Minimal 2-cell vertex grid.""" + return GridCases.vertex_minimal() + + +@pytest.fixture +def minimal_unstructured_grid(): + """Minimal 2-cell unstructured grid.""" + return GridCases.unstructured_minimal() + + +@pytest.fixture +def triangular_vertex_grid(): + """Triangular vertex grid using Delaunay triangulation.""" + return GridCases.vertex_triangular() + + +@pytest.fixture +def triangular_unstructured_grid(): + """Triangular unstructured grid using Delaunay triangulation.""" + return GridCases.unstructured_triangular() + + +@pytest.fixture +def simple_vertex_grid(): + """10x10 rectangular vertex grid (100 cells, 10x10 each).""" + return GridCases.vertex_rectangular(nrow=10, ncol=10, cell_size=10.0) + + +@pytest.fixture +def rotated_vertex_grid(): + """Rotated 5x5 rectangular vertex grid.""" + return GridCases.vertex_rectangular(nrow=5, ncol=5, cell_size=20.0, angrot=45.0) + + +@pytest.fixture +def layered_unstructured_grid(): + """3-layer unstructured grid for 3D testing.""" + return GridCases.unstructured_layered() + + # pytest configuration hooks diff --git a/autotest/test_grid.py b/autotest/test_grid.py index 6f1211e60..bbf7a7a67 100644 --- a/autotest/test_grid.py +++ b/autotest/test_grid.py @@ -26,6 +26,7 @@ gridlist_to_disv_gridprops, to_cvfd, ) +from flopy.utils.geospatial_index import GeospatialIndex from flopy.utils.triangle import Triangle from flopy.utils.voronoi import VoronoiGrid @@ -1603,6 +1604,53 @@ def test_geo_dataframe(structured_grid, vertex_grid, unstructured_grid): raise AssertionError(f"Cell vertices incorrect for node={node}") +def test_structured_boundary_tiebreaker(simple_structured_grid): + """Test StructuredGrid uses lowest row/col for boundary points.""" + grid = simple_structured_grid + + # Boundary at x=10 -> col 0 + _, col = grid.intersect(10.0, 95.0) + assert col == 0 + + # Boundary at y=90 -> row 0 + row, _ = grid.intersect(5.0, 90.0) + assert row == 0 + + +def test_return_type_structured(simple_structured_grid): + """Test StructuredGrid return types are consistent.""" + grid = simple_structured_grid + + # Scalar inside -> int + row, col = grid.intersect(5.0, 95.0) + assert isinstance(row, (int, np.integer)) + assert isinstance(col, (int, np.integer)) + + # Scalar outside -> nan + row, col = grid.intersect(150.0, 50.0, forgive=True) + assert np.isnan(row) and np.isnan(col) + + # Scalar with z inside -> int + lay, row, col = grid.intersect(5.0, 95.0, z=5.0) + assert all(isinstance(v, (int, np.integer)) for v in [lay, row, col]) + + # Scalar with z outside -> nan + lay, row, col = grid.intersect(150.0, 50.0, z=5.0, forgive=True) + assert all(np.isnan(v) for v in [lay, row, col]) + + # Array all inside -> any integer dtype + rows, cols = grid.intersect(np.array([5.0, 55.0]), np.array([95.0, 95.0])) + assert np.issubdtype(rows.dtype, np.integer) + assert np.issubdtype(cols.dtype, np.integer) + + # Array with outside -> must be float64 (due to NaNs) + rows, cols = grid.intersect( + np.array([5.0, 150.0]), np.array([95.0, 50.0]), forgive=True + ) + assert rows.dtype == np.float64 + assert cols.dtype == np.float64 + + def test_unstructured_iverts_cleanup(): grid = GridCases.structured_small() @@ -1657,18 +1705,108 @@ def test_unstructured_iverts_cleanup(): raise AssertionError("Improper number of vertices for cleaned 'shared' iverts") -def test_structured_grid_intersect_edge_case(simple_structured_grid): - """Test StructuredGrid.intersect() edge case: out-of-bounds x,y with z. +# ============================================================================ +# GeospatialIndex-specific Tests +# ============================================================================ + - This tests the specific case where x,y are out of bounds and z is provided. - The expected behavior is to return (None, nan, nan). +@pytest.mark.parametrize( + "grid_fixture,grid_type,expected_cells,expected_points_per_cell", + [ + ("minimal_vertex_grid", "vertex", 2, 5), + ("minimal_unstructured_grid", "unstructured", 2, 5), + ("simple_vertex_grid", "vertex", 100, 5), + ], +) +def test_index_build( + grid_fixture, grid_type, expected_cells, expected_points_per_cell, request +): + """Test building GeospatialIndex for different grid types.""" + grid = request.getfixturevalue(grid_fixture) + index = GeospatialIndex(grid) + + assert index.grid.grid_type == grid_type + assert index.grid is grid + assert index.tree is not None + assert index.points.shape[0] == expected_cells * expected_points_per_cell + assert len(np.unique(index.point_to_cell)) == expected_cells + + +def test_index_repr(simple_vertex_grid): + """Test string representation of GeospatialIndex.""" + index = GeospatialIndex(simple_vertex_grid) + repr_str = repr(index) + + assert "GeospatialIndex" in repr_str + assert "vertex grid" in repr_str + assert "100 cells" in repr_str + assert "500 indexed points" in repr_str + + +@pytest.mark.parametrize("k", [1, 5, 10, 20]) +def test_index_k_parameter(simple_vertex_grid, k): + """Test that different k values still find correct cell.""" + index = GeospatialIndex(simple_vertex_grid) + + assert index.query_point(5.0, 95.0, k=k) == 0 + + cellids = index.query_points( + np.array([5.0, 55.0, 95.0]), + np.array([95.0, 95.0, 5.0]), + k=k, + ) + np.testing.assert_array_equal(cellids, [0, 5, 99]) + + +def test_index_thin_sliver_cell(): + """Test GeospatialIndex finds points in very thin "sliver" cells. + + Tests the cell center + vertices KD-tree approach for cells where the + cell center may be far from the query point. """ - grid = simple_structured_grid + from matplotlib.path import Path + + np.random.seed(42) + + # Create base random points with thin sliver vertices + n_points = 8 + x_verts = np.random.uniform(0, 100, n_points).tolist() + y_verts = np.random.uniform(0, 100, n_points).tolist() + + for i in range(4): + x_verts.append(50.0 + i * 0.05) # Very thin: 0.15 units wide + y_verts.append(i * 33.33) # Tall: 100 units high + + points = np.column_stack([x_verts, y_verts]) + tri = Delaunay(points) + + vertices = [[i, x_verts[i], y_verts[i]] for i in range(len(x_verts))] + cell2d = [] + for i, simplex in enumerate(tri.simplices): + cell_x = np.mean([x_verts[j] for j in simplex]) + cell_y = np.mean([y_verts[j] for j in simplex]) + cell2d.append([i, cell_x, cell_y, len(simplex)] + list(simplex)) + + ncells = len(cell2d) + grid = VertexGrid( + vertices=vertices, + cell2d=cell2d, + top=np.ones(ncells) * 10.0, + botm=np.zeros(ncells), + ) + + index = GeospatialIndex(grid) + + # Test points in sliver region + test_points = [(50.025, 50.0), (50.075, 25.0), (50.025, 75.0)] + found_count = 0 - # Test out-of-bounds x,y with z coordinate - lay, row, col = grid.intersect(-50.0, -50.0, z=5.0, forgive=True) + for x, y in test_points: + result = index.query_point(x, y, k=20) + if not np.isnan(result): + xv, yv, _ = grid.xyzvertices + verts = np.column_stack([xv[result], yv[result]]) + if Path(verts).contains_point((x, y), radius=1e-9): + found_count += 1 - # Expected behavior: lay=None, row=nan, col=nan - assert lay is None, f"Expected lay=None, got {lay}" - assert np.isnan(row), f"Expected row=nan, got {row}" - assert np.isnan(col), f"Expected col=nan, got {col}" + assert found_count > 0, f"Should find points in sliver cells, found {found_count}/3" diff --git a/autotest/test_grid_cases.py b/autotest/test_grid_cases.py index 9415a237b..12432002e 100644 --- a/autotest/test_grid_cases.py +++ b/autotest/test_grid_cases.py @@ -2,6 +2,7 @@ from tempfile import TemporaryDirectory import numpy as np +from scipy.spatial import Delaunay from flopy.discretization import StructuredGrid, UnstructuredGrid, VertexGrid from flopy.utils.triangle import Triangle @@ -387,3 +388,167 @@ def voronoi_many_polygons(): grid = VertexGrid(**gridprops, nlay=1) return ncpl, vor, gridprops, grid + + # ========================================================================= + # Minimal grids for GeospatialIndex testing + # ========================================================================= + + @staticmethod + def _minimal_geometry(): + """Create shared 2-cell geometry used by multiple grid methods.""" + vertices = [ + [0, 0.0, 1.0], + [1, 1.0, 1.0], + [2, 2.0, 1.0], + [3, 0.0, 0.0], + [4, 1.0, 0.0], + [5, 2.0, 0.0], + ] + iverts = [[0, 1, 4, 3], [1, 2, 5, 4]] + xcenters = [0.5, 1.5] + ycenters = [0.5, 0.5] + return vertices, iverts, xcenters, ycenters + + @staticmethod + def _triangular_geometry(seed=42, n_points=30): + """Create shared Delaunay triangulation geometry.""" + np.random.seed(seed) + x_verts = np.random.uniform(0, 100, n_points) + y_verts = np.random.uniform(0, 100, n_points) + points = np.column_stack([x_verts, y_verts]) + + tri = Delaunay(points) + vertices = [[i, x_verts[i], y_verts[i]] for i in range(len(x_verts))] + iverts = tri.simplices.tolist() + xcenters = np.mean(points[tri.simplices], axis=1)[:, 0] + ycenters = np.mean(points[tri.simplices], axis=1)[:, 1] + + return vertices, iverts, xcenters, ycenters + + @staticmethod + def vertex_minimal(): + """Create a minimal 2-cell vertex grid.""" + vertices, iverts, xcenters, ycenters = GridCases._minimal_geometry() + cell2d = [ + [0, xcenters[0], ycenters[0], 4] + iverts[0], + [1, xcenters[1], ycenters[1], 4] + iverts[1], + ] + return VertexGrid(vertices=vertices, cell2d=cell2d, nlay=1) + + @staticmethod + def unstructured_minimal(): + """Create a minimal 2-cell unstructured grid.""" + vertices, iverts, xcenters, ycenters = GridCases._minimal_geometry() + return UnstructuredGrid( + vertices=vertices, iverts=iverts, xcenters=xcenters, ycenters=ycenters + ) + + @staticmethod + def vertex_triangular(seed=42, n_points=30): + """Create a triangular vertex grid using Delaunay triangulation.""" + vertices, iverts, xcenters, ycenters = GridCases._triangular_geometry( + seed, n_points + ) + cell2d = [ + [i, xcenters[i], ycenters[i], 3] + iverts[i] for i in range(len(iverts)) + ] + ncells = len(cell2d) + return VertexGrid( + vertices=vertices, + cell2d=cell2d, + top=np.ones(ncells) * 10.0, + botm=np.zeros(ncells), + ) + + @staticmethod + def unstructured_triangular(seed=42, n_points=30): + """Create a triangular unstructured grid using Delaunay triangulation.""" + vertices, iverts, xcenters, ycenters = GridCases._triangular_geometry( + seed, n_points + ) + ncells = len(iverts) + return UnstructuredGrid( + vertices=vertices, + iverts=iverts, + xcenters=xcenters, + ycenters=ycenters, + top=np.ones(ncells) * 10.0, + botm=np.zeros(ncells), + ) + + @staticmethod + def vertex_rectangular(nrow=10, ncol=10, cell_size=10.0, angrot=0.0): + """Create a rectangular vertex grid. + + Parameters + ---------- + nrow : int + Number of rows. + ncol : int + Number of columns. + cell_size : float + Size of each cell (square cells). + angrot : float + Grid rotation angle in degrees. + """ + vertices = [] + vid = 0 + for i in range(nrow + 1): + for j in range(ncol + 1): + x = j * cell_size + y = (nrow - i) * cell_size + vertices.append([vid, x, y]) + vid += 1 + + cell2d = [] + cellid = 0 + for i in range(nrow): + for j in range(ncol): + xc = (j + 0.5) * cell_size + yc = (nrow - i - 0.5) * cell_size + v0 = i * (ncol + 1) + j + v1 = v0 + 1 + v2 = v1 + (ncol + 1) + v3 = v0 + (ncol + 1) + cell2d.append([cellid, xc, yc, 4, v0, v1, v2, v3]) + cellid += 1 + + top = np.ones(nrow * ncol) * 10.0 + botm = np.zeros(nrow * ncol) + + return VertexGrid( + vertices=vertices, cell2d=cell2d, top=top, botm=botm, nlay=1, angrot=angrot + ) + + @staticmethod + def unstructured_layered(): + """Create a 3-layer unstructured grid for 3D testing. + + Grid has 2 cells per layer, 3 layers total = 6 cells. + Cell IDs: 0-1 (layer 0), 2-3 (layer 1), 4-5 (layer 2). + Z elevations: layer 0 (10-7), layer 1 (7-4), layer 2 (4-1). + + Uses 2D indexing with layer search (grid_varies_by_layer=False). + """ + vertices, base_iverts, base_xc, base_yc = GridCases._minimal_geometry() + + nlay = 3 + ncpl = 2 + + # Only provide iverts for first layer (grid_varies_by_layer=False) + iverts = base_iverts + xcenters = list(base_xc) + ycenters = list(base_yc) + + top = np.array([10.0, 10.0, 7.0, 7.0, 4.0, 4.0]) + botm = np.array([7.0, 7.0, 4.0, 4.0, 1.0, 1.0]) + + return UnstructuredGrid( + vertices=vertices, + iverts=iverts, + xcenters=xcenters, + ycenters=ycenters, + top=top, + botm=botm, + ncpl=np.array([ncpl] * nlay), + ) diff --git a/flopy/discretization/structuredgrid.py b/flopy/discretization/structuredgrid.py index 0d2df58fd..e7bc920b3 100644 --- a/flopy/discretization/structuredgrid.py +++ b/flopy/discretization/structuredgrid.py @@ -1023,37 +1023,38 @@ def intersect(self, x, y, z=None, local=False, forgive=False): # get the cell edges in local coordinates xe, ye = self.xyedges - # Vectorized row/col calculation + # Fully vectorized row/col calculation n_points = len(x) - rows = np.full(n_points, np.nan if forgive else -1, dtype=float) - cols = np.full(n_points, np.nan if forgive else -1, dtype=float) - - for i in range(n_points): - xi, yi = x[i], y[i] - - # Find column - xcomp = xi > xe - if np.all(xcomp) or not np.any(xcomp): - if forgive: - cols[i] = np.nan - else: - raise ValueError( - f"x, y point given is outside of the model area: ({xi}, {yi})" - ) - else: - cols[i] = np.asarray(xcomp).nonzero()[0][-1] - - # Find row - ycomp = yi < ye - if np.all(ycomp) or not np.any(ycomp): - if forgive: - rows[i] = np.nan - else: - raise ValueError( - f"x, y point given is outside of the model area: ({xi}, {yi})" - ) - else: - rows[i] = np.asarray(ycomp).nonzero()[0][-1] + rows = np.full(n_points, np.nan, dtype=float) + cols = np.full(n_points, np.nan, dtype=float) + + # Vectorized column finding using searchsorted + # When point is on edge, use lower column index (tie-breaking rule) + # side="left" ensures x==edge goes to the cell on the left + cols_valid = np.searchsorted(xe, x, side="left") - 1 + # searchsorted returns index in [0, len(xe)], but valid cols are [0, ncol-1] + cols_mask = (cols_valid >= 0) & (cols_valid < self.ncol) + cols[cols_mask] = cols_valid[cols_mask] + + # Vectorized row finding using searchsorted + # When point is on edge, use lower row index (tie-breaking rule) + # Since ye is in descending order (top to bottom), we need to flip it + # side="right" on flipped array ensures y==edge goes to the cell below + ye_flipped = ye[::-1] + rows_flipped = np.searchsorted(ye_flipped, y, side="right") + rows_valid = len(ye) - 1 - rows_flipped + rows_mask = (rows_valid >= 0) & (rows_valid < self.nrow) + rows[rows_mask] = rows_valid[rows_mask] + + # Check for errors if not forgiving + if not forgive: + invalid_mask = np.isnan(rows) | np.isnan(cols) + if np.any(invalid_mask): + idx = np.where(invalid_mask)[0][0] + raise ValueError( + f"x, y point given is outside of the model area: " + f"({x[idx]}, {y[idx]})" + ) # If either row or col is NaN, set both to NaN invalid_mask = np.isnan(rows) | np.isnan(cols) @@ -1078,39 +1079,55 @@ def intersect(self, x, y, z=None, local=False, forgive=False): int ) if np.all(valid_mask) else cols - # Handle z-coordinate - lays = np.full(n_points, np.nan if forgive else -1, dtype=float) - - for i in range(n_points): - if np.isnan(rows[i]) or np.isnan(cols[i]): - continue + # Handle z-coordinate - vectorized layer finding + lays = np.full(n_points, np.nan, dtype=float) + + # Only process points that have valid row/col + valid_mask = ~(np.isnan(rows) | np.isnan(cols)) + valid_indices = np.where(valid_mask)[0] + + if len(valid_indices) > 0: + # Extract valid coordinates and z-values + valid_rows = rows[valid_indices].astype(int) + valid_cols = cols[valid_indices].astype(int) + valid_z = z[valid_indices] + n_valid = len(valid_indices) + + # Extract top/bottom elevations for all valid points and all layers + # Shape: (n_valid, n_layers + 1) + tops_bottoms = self.top_botm[:, valid_rows, valid_cols].T + + # Check which layer each point belongs to + # For each point, check if top[layer] >= z >= bottom[layer] + # Shape: (n_valid, n_layers) + in_layer = (tops_bottoms[:, :-1] >= valid_z[:, np.newaxis]) & ( + valid_z[:, np.newaxis] >= tops_bottoms[:, 1:] + ) - row, col = int(rows[i]), int(cols[i]) - zi = z[i] + # Find the first (topmost) layer for each point + # argmax returns the first True value, or 0 if all False + layer_indices = np.argmax(in_layer, axis=1) - for layer in range(self.__nlay): - if ( - self.top_botm[layer, row, col] - >= zi - >= self.top_botm[layer + 1, row, col] - ): - lays[i] = layer - break + # Set layer values only where a valid layer was found + found_layer = in_layer[np.arange(n_valid), layer_indices] + lays[valid_indices[found_layer]] = layer_indices[found_layer] - if np.isnan(lays[i]) and not forgive: - raise ValueError( - f"point given is outside the model area: ({x[i]}, {y[i]}, {zi})" - ) + # Check for errors if not forgiving + if not forgive: + not_found = ~found_layer + if np.any(not_found): + idx = valid_indices[not_found][0] + raise ValueError( + f"point given is outside the model area: " + f"({x[idx]}, {y[idx]}, {z[idx]})" + ) # Return results if is_scalar_input: lay, row, col = lays[0], rows[0], cols[0] if not np.isnan(lay): lay, row, col = int(lay), int(row), int(col) - else: - # When x,y are out of bounds: lay=None, row/col keep NaN - lay = None - # row and col already have their NaN values + # Otherwise lay, row, col keep their nan values for consistency return lay, row, col else: valid_3d = ~np.isnan(lays) & ~np.isnan(rows) & ~np.isnan(cols) diff --git a/flopy/discretization/unstructuredgrid.py b/flopy/discretization/unstructuredgrid.py index f34764dd6..232db14db 100644 --- a/flopy/discretization/unstructuredgrid.py +++ b/flopy/discretization/unstructuredgrid.py @@ -7,6 +7,7 @@ from matplotlib.path import Path from ..utils.geometry import is_clockwise, transform +from ..utils.geospatial_index import GeospatialIndex from .grid import CachedData, Grid @@ -25,13 +26,17 @@ class UnstructuredGrid(Grid): size nodes, if the grid_varies_by_nodes argument is true, or it must be of size ncpl[0] if the same 2d spatial grid is used for each layer. xcenters : list or ndarray - list of x center coordinates for all cells in the grid if the grid - varies by layer or for all cells in a layer if the same grid is used - for all layers + x-coordinates of cell centers for all cells in the grid if the grid + varies by layer, or for all cells in a layer if the same grid is used + for all layers. These are typically geometric centroids but may be + any representative point. For convex cells (triangles, rectangles), + the arithmetic mean of vertices is equivalent to the true centroid. + For concave cells, the provided center should ideally lie inside the + cell boundary for optimal spatial indexing performance. ycenters : list or ndarray - list of y center coordinates for all cells in the grid if the grid - varies by layer or for all cells in a layer if the same grid is used - for all layers + y-coordinates of cell centers for all cells in the grid if the grid + varies by layer, or for all cells in a layer if the same grid is used + for all layers. See ``xcenters`` for details on center point placement. top : list or ndarray top elevations for all cells in the grid. botm : list or ndarray @@ -770,60 +775,32 @@ def intersect(self, x, y, z=None, local=False, forgive=False): # transform x and y to real-world coordinates x, y = super().get_coords(x, y) - xv, yv, zv = self.xyzvertices + # Build or retrieve geospatial index for fast queries + if not hasattr(self, "_geospatial_index") or self._geospatial_index is None: + self._geospatial_index = GeospatialIndex(self) - if self.grid_varies_by_layer: - ncpl = self.nnodes - else: - ncpl = self.ncpl[0] + # Use GeospatialIndex for spatial queries + # For grid_varies_by_layer=True, z is required (3D query) + # For grid_varies_by_layer=False, z-search handled internally + cellids = self._geospatial_index.query_points(x, y, z=z) # Initialize result array - n_points = len(x) - results = np.full(n_points, np.nan if forgive else -1, dtype=float) - - # Process each point - for i in range(n_points): - xi, yi = x[i], y[i] - zi = z[i] if z is not None else None - found = False - - for icell2d in range(ncpl): - xa = np.array(xv[icell2d]) - ya = np.array(yv[icell2d]) - # x and y at least have to be within the bounding box of the cell - if ( - np.any(xi <= xa) - and np.any(xi >= xa) - and np.any(yi <= ya) - and np.any(yi >= ya) - ): - if is_clockwise(xa, ya): - radius = -1e-9 - else: - radius = 1e-9 - path = Path(np.stack((xa, ya)).transpose()) - # use a small radius, so that the edge of the cell is included - if path.contains_point((xi, yi), radius=radius): - if zi is None: - results[i] = icell2d - found = True - break - - # Search through layers for z-coordinate - cell_idx_3d = icell2d - for lay in range(self.nlay): - if zv[0, cell_idx_3d] >= zi >= zv[1, cell_idx_3d]: - results[i] = cell_idx_3d - found = True - break - # Move to next layer - if lay < self.nlay - 1 and not self.grid_varies_by_layer: - cell_idx_3d += self.ncpl[lay] - if found: - break - - if not found and not forgive: - raise ValueError(f"point ({xi}, {yi}) is outside of the model area") + results = cellids.copy() + + # Error checking for points not found + if not forgive: + unfound_mask = np.isnan(cellids) + if np.any(unfound_mask): + idx = np.where(unfound_mask)[0][0] + if z is not None: + raise ValueError( + f"point ({x[idx]}, {y[idx]}, {z[idx]}) is outside of " + f"the model area" + ) + else: + raise ValueError( + f"point ({x[idx]}, {y[idx]}) is outside of the model area" + ) # Return scalar if input was scalar, otherwise return array if is_scalar_input: diff --git a/flopy/discretization/vertexgrid.py b/flopy/discretization/vertexgrid.py index dc5be8710..bbf90bb38 100644 --- a/flopy/discretization/vertexgrid.py +++ b/flopy/discretization/vertexgrid.py @@ -5,6 +5,7 @@ from matplotlib.path import Path from ..utils.geometry import is_clockwise, transform +from ..utils.geospatial_index import GeospatialIndex from .grid import CachedData, Grid @@ -15,9 +16,16 @@ class for a vertex model grid Parameters ---------- vertices - list of vertices that make up the grid + list of vertices that make up the grid. Each vertex is + [iv, xv, yv] where iv is the vertex number. cell2d - list of cells and their vertices + list of cells and their vertices. Each cell is + [icell2d, xc, yc, ncvert, icvert1, icvert2, ...] where xc, yc are + cell center coordinates. These are typically geometric centroids but + may be any representative point inside the cell. For convex cells + (triangles, rectangles), the arithmetic mean of vertices equals the + true centroid. For concave cells, the center should ideally lie + inside the cell boundary for optimal spatial indexing performance. top : list or ndarray top elevations for all cells in the grid. botm : list or ndarray @@ -400,57 +408,40 @@ def intersect(self, x, y, z=None, local=False, forgive=False): # transform x and y to real-world coordinates x, y = super().get_coords(x, y) - xv, yv, zv = self.xyzvertices + # Build or retrieve geospatial index for fast queries + if not hasattr(self, "_geospatial_index") or self._geospatial_index is None: + self._geospatial_index = GeospatialIndex(self) - # Initialize result arrays - n_points = len(x) - results = np.full(n_points, np.nan if forgive else -1, dtype=float) + # Use GeospatialIndex for spatial queries + # For VertexGrid, z-search is handled internally by GeospatialIndex + # which returns layer index if z is not None: - lays = np.full(n_points, np.nan if forgive else -1, dtype=float) - - # Process each point - for i in range(n_points): - xi, yi = x[i], y[i] - found = False - - # Check each cell - for icell2d in range(self.ncpl): - xa = np.array(xv[icell2d]) - ya = np.array(yv[icell2d]) - # x and y at least have to be within the bounding box of the cell - if ( - np.any(xi <= xa) - and np.any(xi >= xa) - and np.any(yi <= ya) - and np.any(yi >= ya) - ): - path = Path(np.stack((xa, ya)).transpose()) - # use a small radius, so that the edge of the cell is included - if is_clockwise(xa, ya): - radius = -1e-9 - else: - radius = 1e-9 - if path.contains_point((xi, yi), radius=radius): - results[i] = icell2d - found = True - - if z is not None: - zi = z[i] - for lay in range(self.nlay): - if ( - self.top_botm[lay, icell2d] - >= zi - >= self.top_botm[lay + 1, icell2d] - ): - lays[i] = lay - break - - break - - if not found and not forgive: - raise ValueError( - f"point given is outside of the model area: ({xi}, {yi})" - ) + lays_raw = self._geospatial_index.query_points(x, y, z=z) + # GeospatialIndex returns layer index for VertexGrid + lays = lays_raw.copy() + # Get icell2d from 2D query (all layers have same 2D geometry) + cellids = self._geospatial_index.query_points(x, y, z=None) + else: + cellids = self._geospatial_index.query_points(x, y, z=None) + + # Vectorized processing of results + results = cellids.copy() + + # Check for unfound points if not forgiving + if not forgive: + unfound_mask = np.isnan(cellids) + if np.any(unfound_mask): + idx = np.where(unfound_mask)[0][0] + if z is not None: + raise ValueError( + f"point given is outside of the model area: " + f"({x[idx]}, {y[idx]}, {z[idx]})" + ) + else: + raise ValueError( + f"point given is outside of the model area: " + f"({x[idx]}, {y[idx]})" + ) # Return results if z is None: diff --git a/flopy/utils/geospatial_index.py b/flopy/utils/geospatial_index.py new file mode 100644 index 000000000..4c2635b44 --- /dev/null +++ b/flopy/utils/geospatial_index.py @@ -0,0 +1,646 @@ +""" +Geospatial indexing for FloPy vertex and unstructured grids. + +Provides efficient spatial queries using KD-tree with cell centers +AND vertices for robust edge case handling, plus pre-computed ConvexHull +equations for fast point-in-polygon testing. + +Note: StructuredGrid has its own optimized spatial methods and does not +use this index. +""" + +import numpy as np +from scipy.spatial import ConvexHull, cKDTree + + +class GeospatialIndex: + """ + Geospatial index for efficient geometric queries on vertex/unstructured grids. + + Uses KD-tree indexing with cell centers + vertices to find candidate cells, + then pre-computed ConvexHull hyperplane equations for fast vectorized + point-in-polygon testing. + + The cell center + vertices approach ensures edge cases are handled: + - Points near cell boundaries + - Points in thin/sliver cells where the cell center may be far from the query + + Note + ---- + This index uses the grid's ``xcellcenters`` and ``ycellcenters`` properties, + which represent user-provided or computed cell center coordinates. These + are not necessarily true geometric centroids (center of mass). For convex + polygons like triangles and rectangles, the difference is negligible. For + concave or irregular cells, the cell center may fall outside the cell + boundary. The index handles this by also indexing all cell vertices, + ensuring robust spatial queries regardless of cell center placement. + + Note: StructuredGrid has its own optimized spatial methods and should not + use this index. + + Parameters + ---------- + grid : VertexGrid or UnstructuredGrid + A FloPy vertex or unstructured grid object + epsilon : float, optional + Tolerance for point-in-cell tests. Used for both bounding box + expansion and ConvexHull hyperplane distance tests. Ensures boundary + points are included in adjacent cells. + + Attributes + ---------- + grid : Grid + The grid object this index was built for + epsilon : float + Tolerance for geometric tests + is_3d : bool + True if index uses 3D coordinates (x,y,z), False for 2D (x,y only). + Automatically True when grid has grid_varies_by_layer=True. + tree : scipy.spatial.cKDTree + KD-tree of cell centers + vertices for fast spatial queries. + Uses 2D (x,y) or 3D (x,y,z) depending on is_3d. + point_to_cell : ndarray + Mapping from KD-tree point index to cell index + hull_equations : list + Pre-computed ConvexHull equations for each cell (2D cells only) + bounding_boxes : ndarray + Pre-computed bounding boxes for each cell + + Examples + -------- + >>> from flopy.discretization import VertexGrid + >>> from flopy.utils.geospatial_index import GeospatialIndex + >>> + >>> # Create a simple triangular grid + >>> grid = VertexGrid(vertices, cell2d) + >>> index = GeospatialIndex(grid) + >>> + >>> # Single point query + >>> cellid = index.query_point(x=5.5, y=5.5) + >>> + >>> # Multiple points (vectorized) + >>> cellids = index.query_points(x=[1.5, 5.5, 9.5], y=[1.5, 5.5, 9.5]) + """ + + def __init__(self, grid, epsilon=1e-3): + """ + Build geospatial index for a vertex or unstructured grid. + + Parameters + ---------- + grid : VertexGrid or UnstructuredGrid + A FloPy vertex or unstructured grid + epsilon : float, optional + Tolerance for point-in-cell tests. + Used for bounding box expansion and ConvexHull tests. + """ + self.grid = grid + self.epsilon = epsilon + + self._build_index() + + @property + def is_3d(self): + """True if grid geometry varies by layer (requires 3D indexing).""" + return getattr(self.grid, "grid_varies_by_layer", False) + + def _build_index(self): + """ + Build KD-tree with centroids + vertices and pre-compute + ConvexHull equations. + + For 3D grids (grid_varies_by_layer=True), indexes all nnodes with x,y,z. + For 2D grids, indexes 2D cells with x,y only. + """ + points = [] + point_to_cell = [] + + # Get grid dimensions for vertex/unstructured grids + if self.grid.grid_type not in ("vertex", "unstructured"): + raise ValueError( + f"GeospatialIndex only supports vertex and unstructured grids, " + f"got: {self.grid.grid_type}" + ) + + if self.is_3d: + # 3D indexing: index all nnodes with x,y,z coordinates + self.ncells = self.grid.nnodes + self._build_3d_index(points, point_to_cell) + else: + # 2D indexing: index only 2D cells with x,y coordinates + if hasattr(self.grid, "ncpl"): + ncpl = self.grid.ncpl + if ncpl is None: + # ncpl not set, fall back to xcellcenters + self.ncells = len(self.grid.xcellcenters) + elif np.isscalar(ncpl): + # VertexGrid: ncpl is scalar + self.ncells = ncpl + else: + # UnstructuredGrid: ncpl is array + # Use first layer's cell count for 2D spatial indexing + if len(ncpl) > 0: + self.ncells = ncpl[0] + else: + self.ncells = len(self.grid.xcellcenters) + else: + self.ncells = len(self.grid.xcellcenters) + self._build_vertex_index(points, point_to_cell) + + # Build KD-tree from centroids + vertices + self.points = np.array(points) + self.point_to_cell = np.array(point_to_cell, dtype=int) + self.tree = cKDTree(self.points) + + # Pre-compute ConvexHull equations and bounding boxes (2D only) + if not self.is_3d: + self._precompute_hulls() + else: + # For 3D, store z-bounds for each cell + self._precompute_3d_bounds() + + def _build_vertex_index(self, points, point_to_cell): + """Build index for vertex/unstructured grid - centroids + vertices.""" + # Disable copy cache to avoid expensive deepcopy during index build + original_copy_cache = getattr(self.grid, "_copy_cache", True) + self.grid._copy_cache = False + + xc = self.grid.xcellcenters + yc = self.grid.ycellcenters + xv, yv, _ = self.grid.xyzvertices + + for cellid in range(self.ncells): + # Add centroid + points.append([xc[cellid], yc[cellid]]) + point_to_cell.append(cellid) + + # Add all cell vertices + cell_xv = xv[cellid] + cell_yv = yv[cellid] + for vi in range(len(cell_xv)): + points.append([cell_xv[vi], cell_yv[vi]]) + point_to_cell.append(cellid) + + # Restore copy cache setting + self.grid._copy_cache = original_copy_cache + + def _build_3d_index(self, points, point_to_cell): + """Build 3D index for grid_varies_by_layer grids. + + Includes centroids + vertices with z-coordinates. + """ + # Disable copy cache to avoid expensive deepcopy during index build + original_copy_cache = getattr(self.grid, "_copy_cache", True) + self.grid._copy_cache = False + + xc = np.asarray(self.grid.xcellcenters) + yc = np.asarray(self.grid.ycellcenters) + xv, yv, zv = self.grid.xyzvertices + + # Get z-coordinates for cell centroids (use mid-point of top/bottom) + zc = (zv[0] + zv[1]) / 2.0 + + # For multi-layer unstructured grids, xc/yc may be per-layer (ncpl values) + # while we iterate over all nnodes. Build mapping from node to layer-local cell. + ncpl = getattr(self.grid, "ncpl", None) + if ncpl is not None and not np.isscalar(ncpl): + ncpl = np.atleast_1d(ncpl) + xc_is_per_layer = len(xc) < self.ncells + else: + xc_is_per_layer = False + + # Build xc_idx mapping vectorized + cellids = np.arange(self.ncells) + if xc_is_per_layer: + cumulative_ncpl = np.concatenate([[0], np.cumsum(ncpl[:-1])]) + layers = np.searchsorted(np.cumsum(ncpl), cellids, side="right") + xc_idx = cellids - cumulative_ncpl[layers] + else: + xc_idx = cellids + + # Add centroids + centroids = np.column_stack([xc[xc_idx], yc[xc_idx], zc]) + centroid_cells = cellids + + # Add vertices - must loop due to variable vertex count per cell + vertex_points = [] + vertex_cells = [] + zc_mid = (zv[0] + zv[1]) / 2.0 + for cellid in range(self.ncells): + cell_xv = np.atleast_1d(xv[cellid]) + cell_yv = np.atleast_1d(yv[cellid]) + cell_z = zc_mid[cellid] + n_verts = len(cell_xv) + vertex_points.append( + np.column_stack([cell_xv, cell_yv, np.full(n_verts, cell_z)]) + ) + vertex_cells.append(np.full(n_verts, cellid, dtype=int)) + + # Combine centroids and vertices + all_points = np.vstack([centroids] + vertex_points) + all_cells = np.concatenate([centroid_cells] + vertex_cells) + + points.extend(all_points.tolist()) + point_to_cell.extend(all_cells.tolist()) + + # Restore copy cache setting + self.grid._copy_cache = original_copy_cache + + def _precompute_hulls(self): + """Pre-compute ConvexHull equations and bounding boxes for all cells.""" + # Disable copy cache to avoid expensive deepcopy during precomputation + original_copy_cache = getattr(self.grid, "_copy_cache", True) + self.grid._copy_cache = False + + self.hull_equations = [] + self.bounding_boxes = np.zeros((self.ncells, 4)) # xmin, xmax, ymin, ymax + + for cellid in range(self.ncells): + verts = self._get_cell_vertices(cellid) + + # Handle empty or degenerate cells + if len(verts) < 3: + self.bounding_boxes[cellid] = [np.inf, -np.inf, np.inf, -np.inf] + self.hull_equations.append(None) + continue + + # Compute bounding box + self.bounding_boxes[cellid] = [ + verts[:, 0].min(), + verts[:, 0].max(), + verts[:, 1].min(), + verts[:, 1].max(), + ] + + # Compute ConvexHull equations + try: + hull = ConvexHull(verts) + self.hull_equations.append(hull.equations) + except Exception: + # Degenerate geometry + self.hull_equations.append(None) + + # Restore copy cache setting + self.grid._copy_cache = original_copy_cache + + def _precompute_3d_bounds(self): + """Pre-compute 3D bounding boxes (x,y,z bounds) for all cells.""" + xv, yv, zv = self.grid.xyzvertices + + # Store 3D bounding boxes: [xmin, xmax, ymin, ymax, zmin, zmax] + self.bounding_boxes_3d = np.zeros((self.ncells, 6)) + + for cellid in range(self.ncells): + cell_xv = np.atleast_1d(xv[cellid]) + cell_yv = np.atleast_1d(yv[cellid]) + cell_z_top = zv[0, cellid] + cell_z_bot = zv[1, cellid] + + self.bounding_boxes_3d[cellid] = [ + np.min(cell_xv), + np.max(cell_xv), + np.min(cell_yv), + np.max(cell_yv), + min(cell_z_top, cell_z_bot), + max(cell_z_top, cell_z_bot), + ] + + def _get_cell_vertices(self, cellid): + """Get vertices for a cell.""" + xv, yv, _ = self.grid.xyzvertices + return np.column_stack([xv[cellid], yv[cellid]]) + + def query_point(self, x, y, z=None, k=None): + """ + Find cell containing a single point. + + Parameters + ---------- + x, y : float + Point coordinates + z : float, optional + Z-coordinate. Required for 3D grids (grid_varies_by_layer=True). + k : int, optional + Number of unique cells to check (default 30) + + Returns + ------- + cellid : int or np.nan + Cell index containing the point, or np.nan if outside grid + """ + z_array = np.array([z]) if z is not None else None + result = self.query_points(np.array([x]), np.array([y]), z=z_array, k=k) + cellid = result[0] + return int(cellid) if not np.isnan(cellid) else np.nan + + def query_points(self, x, y, z=None, k=None): + """ + Find cells containing multiple points (vectorized). + + Uses KD-tree to find k nearest unique cells, then tests for containment. + For 2D grids: uses ConvexHull testing. + For 3D grids: uses 3D bounding box testing. + + Parameters + ---------- + x, y : array-like + Point coordinates (must have same length) + z : array-like, optional + Z-coordinates. Required for 3D grids (grid_varies_by_layer=True). + For 2D grids with layers, z-search is handled internally. + k : int, optional + Number of unique candidate cells to check per point (default 30) + + Returns + ------- + cellids : ndarray + Array of cell indices (np.nan for points outside grid). + For 3D grids, returns 3D cell index (nnodes). + For 2D grids, returns 2D cell index (ncpl). + """ + x = np.atleast_1d(x) + y = np.atleast_1d(y) + + if len(x) != len(y): + raise ValueError("x and y must have the same length") + + # For 3D grids, z is required + if self.is_3d: + if z is None: + raise ValueError( + "Z-coordinate required for 3D grids (grid_varies_by_layer=True)" + ) + z = np.atleast_1d(z) + if len(z) != len(x): + raise ValueError("z must have the same length as x and y") + else: + if z is not None: + z = np.atleast_1d(z) + if len(z) != len(x): + raise ValueError("z must have the same length as x and y") + + if k is None: + k = 30 # Default: check 30 unique cells + + # Build query points (2D or 3D) + if self.is_3d: + points = np.column_stack([x, y, z]) + else: + points = np.column_stack([x, y]) + + n_points = len(points) + results = np.full(n_points, np.nan, dtype=float) + + # For each point, query KD-tree to get k unique candidate cells + for i in range(n_points): + point = points[i] + + # Query KD-tree adaptively to get k unique cells + candidates = self._get_k_unique_cells(point, k) + + # Collect all matching cells for tie-breaking + matching_cells = [] + + # Test candidates in order (nearest first) + for cellid in candidates: + if self.is_3d: + # 3D: check 3D bounding box + if self._point_in_cell_3d(point, cellid): + matching_cells.append(cellid) + else: + # 2D: use ConvexHull test + if self._point_in_cell_vectorized(point[:2], cellid): + # Found 2D cell, now check z if provided + if z is None: + matching_cells.append(cellid) + else: + # Search through layers to find the right z + cell_3d = self._find_layer_for_z(cellid, z[i]) + if cell_3d is not None: + matching_cells.append(cell_3d) + + # Apply grid-specific tie-breaking when multiple cells match + if len(matching_cells) > 0: + results[i] = self._apply_tiebreaker(matching_cells) + + # Return int array if all points found, float array otherwise (to preserve nan) + valid_mask = ~np.isnan(results) + if np.all(valid_mask): + return results.astype(int) + return results + + def _get_k_unique_cells(self, point, k): + """ + Query KD-tree to get k unique candidate cells. + + Since KD-tree contains centroids + vertices, we may need to query + more than k points to get k unique cells. + + Parameters + ---------- + point : ndarray + Query point [x, y] + k : int + Number of unique cells desired + + Returns + ------- + unique_cells : ndarray + Array of up to k unique cell indices, ordered by distance + """ + # Query k*5 points to get k unique cells (accounting for vertices) + k_points = min(k * 5, len(self.points)) + + distances, indices = self.tree.query(point, k=k_points) + + # Handle scalar result + if np.isscalar(indices): + indices = [indices] + + # Extract unique cells while preserving order + seen = set() + unique_cells = [] + for idx in indices: + cellid = self.point_to_cell[idx] + if cellid not in seen: + seen.add(cellid) + unique_cells.append(cellid) + if len(unique_cells) >= k: + break + + # If we still don't have k unique cells, query more points + while len(unique_cells) < k and k_points < len(self.points): + k_points = min(k_points * 2, len(self.points)) + distances, indices = self.tree.query(point, k=k_points) + + if np.isscalar(indices): + indices = [indices] + + for idx in indices: + cellid = self.point_to_cell[idx] + if cellid not in seen: + seen.add(cellid) + unique_cells.append(cellid) + if len(unique_cells) >= k: + break + + if k_points >= len(self.points): + break # Can't query any more points + + return np.array(unique_cells[:k], dtype=int) + + def _point_in_cell_vectorized(self, point, cellid): + """ + Test if point is inside cell using pre-computed bounding box + ConvexHull. + + Parameters + ---------- + point : ndarray + Point coordinates [x, y] + cellid : int + Cell index + + Returns + ------- + bool + True if point is inside cell + """ + # Fast bounding box rejection with epsilon tolerance for edge cases + # Expand bounding box slightly to handle points on boundaries + bbox = self.bounding_boxes[cellid] + if not ( + bbox[0] - self.epsilon <= point[0] <= bbox[1] + self.epsilon + and bbox[2] - self.epsilon <= point[1] <= bbox[3] + self.epsilon + ): + return False + + # Precise ConvexHull test + hull_eq = self.hull_equations[cellid] + if hull_eq is not None: + # Vectorized hyperplane test: Ax + By + C <= epsilon + # Tolerance allows points on edges to be included + distances = point @ hull_eq[:, :-1].T + hull_eq[:, -1] + return np.all(distances <= self.epsilon) + else: + # Degenerate geometry - should rarely happen + return False + + def _point_in_cell_3d(self, point, cellid): + """ + Test if 3D point is inside cell using 3D bounding box. + + Parameters + ---------- + point : ndarray + Point coordinates [x, y, z] + cellid : int + Cell index + + Returns + ------- + bool + True if point is inside cell's 3D bounding box + """ + # Use 3D bounding box test with epsilon tolerance + bbox = self.bounding_boxes_3d[cellid] + return ( + bbox[0] - self.epsilon <= point[0] <= bbox[1] + self.epsilon + and bbox[2] - self.epsilon <= point[1] <= bbox[3] + self.epsilon + and bbox[4] - self.epsilon <= point[2] <= bbox[5] + self.epsilon + ) + + def _apply_tiebreaker(self, matching_cells): + """ + Apply tie-breaking when multiple cells match. + + For vertex/unstructured grids: choose cell with lowest cell ID. + + Parameters + ---------- + matching_cells : list + List of cell indices that all contain the point + + Returns + ------- + cellid : int + The selected cell after tie-breaking + """ + if len(matching_cells) == 1: + return matching_cells[0] + + return min(matching_cells) + + def _find_layer_for_z(self, icell2d, z): + """ + Find 3D cell index for a 2D cell and z-coordinate. + + Searches through layers to find which layer contains the z-coordinate. + + Parameters + ---------- + icell2d : int + 2D cell index (from layer 0) + z : float + Z-coordinate + + Returns + ------- + cell_3d : int or None + 3D cell index, or None if z not found in any layer + """ + # Get z-bounds for all cells + _, _, zv = self.grid.xyzvertices + + # For VertexGrid: same 2D geometry for all layers + if self.grid.grid_type == "vertex": + # Search through all layers to find which contains z + # For VertexGrid: zv[lay, icell2d] = top, zv[lay+1, icell2d] = bottom + for lay in range(self.grid.nlay): + z_top = zv[lay, icell2d] + z_bot = zv[lay + 1, icell2d] + z_min = min(z_top, z_bot) + z_max = max(z_top, z_bot) + + if z_min <= z <= z_max: + # Found the layer! Return the layer index + return lay + + # z not found in any layer + return None + + # For UnstructuredGrid: compute 3D cell index using vectorized search + elif self.grid.grid_type == "unstructured": + # Build array of 3D cell indices for this 2D cell across all layers + if hasattr(self.grid, "ncpl") and isinstance(self.grid.ncpl, np.ndarray): + # Compute cumulative sum to get 3D cell indices + ncpl_cumsum = np.concatenate([[0], np.cumsum(self.grid.ncpl[:-1])]) + cell_indices_3d = icell2d + ncpl_cumsum + else: + # Fallback: assume constant ncpl + cell_indices_3d = ( + icell2d + np.arange(self.grid.nlay) * self.grid.ncpl[0] + ) + + # Get z-bounds for all layers of this 2D cell + z_tops = zv[0, cell_indices_3d] + z_bots = zv[1, cell_indices_3d] + z_mins = np.minimum(z_tops, z_bots) + z_maxs = np.maximum(z_tops, z_bots) + + # Find layers containing z (vectorized) + mask = (z_mins <= z) & (z <= z_maxs) + matching_layers = np.where(mask)[0] + + if len(matching_layers) > 0: + # Return first matching layer's 3D cell index + return cell_indices_3d[matching_layers[0]] + + return None + + def __repr__(self): + """String representation.""" + return ( + f"GeospatialIndex({self.grid.grid_type} grid, " + f"{self.ncells} cells, " + f"{len(self.points)} indexed points)" + )