1+ import pyarrow as pa
2+
3+ class GeoHasher :
4+ """Geohash encoder/decoder with PyArrow optimizations
5+ Args:
6+ precision: Hash length (1-12)
7+ """
8+ BASE32 = pa .array (list ('0123456789bcdefghjkmnpqrstuvwxyz' ))
9+ BASE32_MAP = {c :i for i ,c in enumerate (BASE32 .to_pylist ())}
10+ def __init__ (self , precision : int = 10 ):
11+ self .precision = min (max (precision , 1 ), 12 )
12+ self .bits = self .precision * 5
13+ self .mask = pa .bit_mask (self .bits )
14+
15+ def encode (self , lat : float , lon : float ) -> pa .StringScalar :
16+ """Encode coordinates to geohash"""
17+ lat = pa .scalar (max (- 90.0 , min (90.0 , lat )))
18+ lon = pa .scalar (((lon + 180 ) % 360 ) - 180 )
19+ bits = pa .BitArrayBuilder ()
20+ lat_range , lon_range = [- 90.0 , 90.0 ], [- 180.0 , 180.0 ]
21+
22+ for i in pa .compute .range (self .bits ):
23+ if i % 2 : # Latitude bits
24+ mid = (lat_range [0 ] + lat_range [1 ]) / 2
25+ bit = lat >= mid
26+ lat_range [bit ] = mid
27+ else : # Longitude bits
28+ mid = (lon_range [0 ] + lon_range [1 ]) / 2
29+ bit = lon >= mid
30+ lon_range [bit ] = mid
31+ bits .append (bit )
32+
33+ return self ._pack_bits (bits .finish ())
34+
35+ def decode (self , geohash : pa .StringScalar ) -> pa .StructScalar :
36+ """Decode geohash to coordinates with error margins
37+ Returns:
38+ Struct with fields: lon, lat, lon_err, lat_err
39+ """
40+ bits = self ._unpack_bits (geohash )
41+ lon_range = pa .array ([- 180.0 , 180.0 ])
42+ lat_range = pa .array ([- 90.0 , 90.0 ])
43+ for i , bit in enumerate (bits ):
44+ arr , idx = (lon_range , 0 ) if i % 2 == 0 else (lat_range , 1 )
45+ mid = (arr [0 ] + arr [1 ]) / 2
46+ arr = pa .array ([arr [0 ], mid ]) if not bit else pa .array ([mid , arr [1 ]])
47+ if i % 2 == 0 :
48+ lon_range = arr
49+ else :
50+ lat_range = arr
51+
52+ return pa .struct ([
53+ ('lon' , (lon_range [0 ] + lon_range [1 ]) / 2 ),
54+ ('lat' , (lat_range [0 ] + lat_range [1 ]) / 2 ),
55+ ('lon_err' , (lon_range [1 ] - lon_range [0 ]) / 2 ),
56+ ('lat_err' , (lat_range [1 ] - lat_range [0 ]) / 2 )
57+ ])
58+
59+ def _pack_bits (self , bits : pa .BitArray ) -> pa .StringScalar :
60+ """Pack bits into base32 string"""
61+ chunks = bits .buffers ()[1 ].cast (pa .uint32 ())
62+ return pa .compute .utf8_lower (pa .compute .base32_encode (chunks ))[:self .precision ]
63+
64+ def _unpack_bits (self , geohash : pa .StringScalar ) -> pa .BitArray :
65+ """Unpack base32 to bit array"""
66+ decoded = pa .compute .base32_decode (geohash .utf8_upper ())
67+ return pa .BitArray .from_buffers (
68+ pa .binary (self .bits // 8 + 1 ),
69+ [None , decoded .buffers ()[1 ].copy ()]
70+ ).mask (self .mask )
71+
72+ def neighbors (self , geohash : pa .StringScalar ) -> pa .StructScalar :
73+ """
74+ Calculate all 8 adjacent geohashes with error boundaries
75+ Args:
76+ geohash: Input geohash string scalar
77+ Returns:
78+ Arrow Struct containing:
79+ - center: Original geohash coordinates
80+ - n/nw/ne/e/se/s/sw/w: Neighboring geohashes
81+ - bounds: Error boundaries for neighbors
82+ """
83+ decoded = self .decode (geohash )
84+ lat , lon = decoded ['lat' ], decoded ['lon' ]
85+ lat_err , lon_err = decoded ['lat_err' ], decoded ['lon_err' ]
86+ # Calculate step sizes using vectorized operations
87+ steps = pa .array ([
88+ (lat_err , 0 ), # north
89+ (lat_err , lon_err ), # ne
90+ (0 , lon_err ), # east
91+ (- lat_err , lon_err ), # se
92+ (- lat_err , 0 ), # south
93+ (- lat_err , - lon_err ), # sw
94+ (0 , - lon_err ), # west
95+ (lat_err , - lon_err ) # nw
96+ ], type = pa .struct ([
97+ ('dlat' , pa .float64 ()),
98+ ('dlon' , pa .float64 ())
99+ ]))
100+
101+ # Vectorized coordinate calculations
102+ new_lats = pa .compute .add (lat , steps ['dlat' ])
103+ new_lons = pa .compute .add (lon , steps ['dlon' ])
104+ # Clamp latitudes and wrap longitudes
105+ new_lats = pa .compute .clip (new_lats , - 90.0 , 90.0 )
106+ new_lons = pa .compute .subtract (
107+ pa .compute .modulo (
108+ pa .compute .add (new_lons , 180.0 ),
109+ 360.0
110+ ),
111+ 180.0
112+ )
113+
114+ # Batch encode neighbors
115+ neighbor_hashes = self .encode (new_lats , new_lons )
116+
117+ return pa .struct ([
118+ ('center' , geohash ),
119+ ('n' , neighbor_hashes [0 ]),
120+ ('ne' , neighbor_hashes [1 ]),
121+ ('e' , neighbor_hashes [2 ]),
122+ ('se' , neighbor_hashes [3 ]),
123+ ('s' , neighbor_hashes [4 ]),
124+ ('sw' , neighbor_hashes [5 ]),
125+ ('w' , neighbor_hashes [6 ]),
126+ ('nw' , neighbor_hashes [7 ]),
127+ ('bounds' , pa .struct ([
128+ ('lat_step' , lat_err * 2 ),
129+ ('lon_step' , lon_err * 2 )
130+ ]))
131+ ])
0 commit comments