codeamt
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 20 additions & 0 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 30 additions & 0 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎.devcontainer/docker-compose.yml‎
Lines changed: 32 additions & 0 deletions b/‎.devcontainer/docker-compose.yml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎.github/dependabot.yml‎
Lines changed: 12 additions & 0 deletions b/‎.github/dependabot.yml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 5 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/__init__.py‎ b/‎src/__init__.py‎
diff --git a/‎src/algorithms/bloom_filter.py‎
Lines changed: 36 additions & 0 deletions b/‎src/algorithms/bloom_filter.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎src/algorithms/consistent_hash.py‎
Lines changed: 63 additions & 0 deletions b/‎src/algorithms/consistent_hash.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎src/algorithms/geo_hash.py‎
Lines changed: 131 additions & 0 deletions b/‎src/algorithms/geo_hash.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎src/algorithms/hyper_log_log.py‎
Lines changed: 25 additions & 0 deletions b/‎src/algorithms/hyper_log_log.py‎
Lines changed: 25 additions & 0 deletions
@@ -0,0 +1,20 @@
+FROM mcr.microsoft.com/devcontainers/python:1-3.11-bullseye
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    redis-server \
+    libsnappy-dev \
+    libzstd-dev \
+    && apt-get clean -y && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy requirements first for better cache utilization
+COPY requirements.txt /tmp/pip-tmp/
+RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
+    && rm -rf /tmp/pip-tmp
+
+# Configure Redis for testing
+RUN mkdir -p /var/lib/redis && \
+    chown -R redis:redis /var/lib/redis && \
+    sed -i 's/bind 127.0.0.1/bind 0.0.0.0/' /etc/redis/redis.conf
@@ -0,0 +1,30 @@
+
+{
+    "name": "algokit-dev",
+    "dockerComposeFile": [
+      "./docker-compose.yml"
+    ],
+    "service": "code",
+    "workspaceFolder": "/workspace",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.python",
+                "ms-python.vscode-pylance",
+                "ms-azuretools.vscode-docker",
+                "ms-azuretools.vscode-docker-compose"
+            ],
+            "settings": {
+                "python.defaultInterpreterPath": "/usr/local/bin/python",
+                "python.linting.enabled": true,
+                "python.linting.pylintEnabled": true,
+                "python.testing.pytestEnabled": true,
+                "python.testing.pytestArgs": ["tests/"]
+            }
+
+        }
+    },
+    "postCreateCommand": "sudo service redis-server start && pytest tests/",
+    "forwardPorts": [6379], // optional: exposes Redis port
+    "remoteUser": "vscode"
+  }
@@ -0,0 +1,32 @@
+version: '3.8'
+
+services:
+  code:
+    build:
+      context: ..
+      dockerfile: .devcontainer/Dockerfile
+    volumes:
+      - ..:/workspace:cached
+    command: sleep infinity
+    environment:
+      - REDIS_HOST=redis
+      - PYTHONPATH=/workspace/src
+    depends_on:
+      redis:
+        condition: service_healthy
+
+  redis:
+    image: redis:6-alpine
+    restart: unless-stopped
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 1s
+      timeout: 3s
+      retries: 30
+
+volumes:
+  redis_data:
@@ -0,0 +1,12 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for more information:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+# https://containers.dev/guide/dependabot
+
+version: 2
+updates:
+ - package-ecosystem: "devcontainers"
+   directory: "/"
+   schedule:
+     interval: weekly
@@ -0,0 +1,5 @@
+pyarrow>=8.0.0
+pytest>=7.0
+hypothesis>=6.0
+redis>=4.0
+numpy>=1.0
@@ -0,0 +1,36 @@
+import hashlib
+import math 
+import pyarrow as pa
+
+class BloomFilter:
+    """Space-efficient probabilistic membership tester using PyArrow buffers
+        Args:
+        capacity: Expected maximum number of elements
+        error_rate: Acceptable false positive rate 01
+    """
+    def __init__(self, capacity: int, error_rate: float):
+        self.size = self._calc_size(capacity, error_rate)
+        self.hash_count = self._calc_hash_count(capacity, self.size)
+        self.bits = pa.BufferOutputStream()
+        self.bits.write(b'\x00' * self.size)
+        
+    def add(self, item: str):
+        """Insert item into filter"""
+        h = int.from_bytes(hashlib.blake2s(item.encode()).digest(), 'big')
+        for s in range(self.hash_count):
+            idx = (h + s*1299721) % self.size
+            self.bits.set_bit(idx)
+            
+    def __contains__(self, item: str) -> bool:
+        """Check item membership"""
+        h = int.from_bytes(hashlib.blake2s(item.encode()).digest(), 'big')
+        return all(self.bits.get_bit((h+s*1299721)%self.size) for s in range(self.hash_count))
+        
+
+    @staticmethod
+    def _calc_size(n: int, p: float) -> int:
+        return math.ceil(-(n * math.log(p)) / (math.log(2)**2))
+                                               
+    @staticmethod
+    def _calc_hash_count(n: int, m: int) -> int:
+        return math.ceil((m/n) * math.log(2))
@@ -0,0 +1,63 @@
+import pyarrow as pa
+import pyarrow.compute as pc
+import hashlib
+
+class ConsistentHash:
+    """PyArrow-optimized consistent hashing with weighted nodes
+    Args:
+    nodes: Initial nodes with weights {node: weight}
+    replicas: Base number of virtual nodes per weight unit
+    """
+    def __init__(self, nodes: dict, replicas: int = 100):
+        self.replicas = replicas
+        self.ring = pa.Table.from_arrays(
+            arrays=[[], []],
+            names=['hash', 'node'],
+            schema=pa.schema([
+                ('hash', pa.uint64()),
+                ('node', pa.string())
+            ])
+        )
+        for node, weight in nodes.items():
+            self.add_weighted_node(node, weight)
+
+    def add_weighted_node(self, node: str, weight: int = 1):
+        """Add node with weight using vectorized operations"""
+        virtual_nodes = weight * self.replicas
+        hashes = pa.array([
+            int.from_bytes(
+                hashlib.blake2s(f"{node}-{i}".encode()).digest()[:8],
+                'little'
+            ) for i in range(virtual_nodes)
+        ], type=pa.uint64())
+        nodes = pa.array([node] * virtual_nodes)
+        new_entries = pa.Table.from_arrays(
+        [hashes, nodes],
+        names=['hash', 'node']
+        )
+        self.ring = pa.concat_tables([self.ring, new_entries])
+        self.ring = self.ring.sort_by('hash')
+
+    def remove_node(self, node: str):
+        """Remove all virtual nodes for a physical node"""
+        mask = pc.not_equal(self.ring['node'], node)
+        self.ring = self.ring.filter(mask)
+
+    def get_node(self, key: str) -> str:
+        """Find node for key using binary search"""
+        key_hash = int.from_bytes(
+            hashlib.blake2s(key.encode()).digest()[:8],
+            'little'
+        )
+        hashes = self.ring['hash'].combine_chunks()
+        idx = pc.binary_search(hashes, value=key_hash)
+        if idx == len(hashes):
+            idx = 0
+        return self.ring['node'][idx].as_py()
+
+    def balance_quality(self) -> float:
+        """Calculate balance quality (0-1) using Arrow compute"""
+        total_vnodes = len(self.ring)
+        node_counts = pc.value_counts(self.ring['node'])
+        counts = node_counts['counts'].combine_chunks()
+        return pc.stddev(counts).as_py() / (total_vnodes / len(counts))
@@ -0,0 +1,131 @@
+import pyarrow as pa
+
+class GeoHasher:
+    """Geohash encoder/decoder with PyArrow optimizations
+    Args:
+    precision: Hash length (1-12)
+    """
+    BASE32 = pa.array(list('0123456789bcdefghjkmnpqrstuvwxyz'))
+    BASE32_MAP = {c:i for i,c in enumerate(BASE32.to_pylist())}
+    def __init__(self, precision: int = 10):
+        self.precision = min(max(precision, 1), 12)
+        self.bits = self.precision * 5
+        self.mask = pa.bit_mask(self.bits)
+        
+    def encode(self, lat: float, lon: float) -> pa.StringScalar:
+        """Encode coordinates to geohash"""
+        lat = pa.scalar(max(-90.0, min(90.0, lat)))
+        lon = pa.scalar(((lon + 180) % 360) - 180)
+        bits = pa.BitArrayBuilder()
+        lat_range, lon_range = [-90.0, 90.0], [-180.0, 180.0]
+
+        for i in pa.compute.range(self.bits):
+            if i % 2: # Latitude bits
+                mid = (lat_range[0] + lat_range[1]) / 2
+                bit = lat >= mid
+                lat_range[bit] = mid
+            else: # Longitude bits
+                mid = (lon_range[0] + lon_range[1]) / 2
+                bit = lon >= mid
+                lon_range[bit] = mid
+                bits.append(bit)
+                
+        return self._pack_bits(bits.finish())
+
+    def decode(self, geohash: pa.StringScalar) -> pa.StructScalar:
+        """Decode geohash to coordinates with error margins
+        Returns:
+        Struct with fields: lon, lat, lon_err, lat_err
+        """
+        bits = self._unpack_bits(geohash)
+        lon_range = pa.array([-180.0, 180.0])
+        lat_range = pa.array([-90.0, 90.0])
+        for i, bit in enumerate(bits):
+            arr, idx = (lon_range, 0) if i%2==0 else (lat_range, 1)
+            mid = (arr[0] + arr[1]) / 2
+            arr = pa.array([arr[0], mid]) if not bit else pa.array([mid, arr[1]])
+            if i%2 == 0: 
+                lon_range = arr 
+            else: 
+                lat_range = arr
+        
+        return pa.struct([
+            ('lon', (lon_range[0] + lon_range[1]) / 2),
+            ('lat', (lat_range[0] + lat_range[1]) / 2),
+            ('lon_err', (lon_range[1] - lon_range[0]) / 2),
+            ('lat_err', (lat_range[1] - lat_range[0]) / 2)
+        ])
+    
+    def _pack_bits(self, bits: pa.BitArray) -> pa.StringScalar:
+        """Pack bits into base32 string"""
+        chunks = bits.buffers()[1].cast(pa.uint32())
+        return pa.compute.utf8_lower(pa.compute.base32_encode(chunks))[:self.precision]
+
+    def _unpack_bits(self, geohash: pa.StringScalar) -> pa.BitArray:
+        """Unpack base32 to bit array"""
+        decoded = pa.compute.base32_decode(geohash.utf8_upper())
+        return pa.BitArray.from_buffers(
+            pa.binary(self.bits//8 + 1),
+            [None, decoded.buffers()[1].copy()]
+        ).mask(self.mask)
+
+    def neighbors(self, geohash: pa.StringScalar) -> pa.StructScalar:
+        """
+        Calculate all 8 adjacent geohashes with error boundaries
+        Args:
+        geohash: Input geohash string scalar
+        Returns:
+        Arrow Struct containing:
+        - center: Original geohash coordinates
+        - n/nw/ne/e/se/s/sw/w: Neighboring geohashes
+        - bounds: Error boundaries for neighbors
+        """
+        decoded = self.decode(geohash)
+        lat, lon = decoded['lat'], decoded['lon']
+        lat_err, lon_err = decoded['lat_err'], decoded['lon_err']
+        # Calculate step sizes using vectorized operations
+        steps = pa.array([
+            (lat_err, 0), # north
+            (lat_err, lon_err), # ne
+            (0, lon_err), # east
+            (-lat_err, lon_err), # se
+            (-lat_err, 0), # south
+            (-lat_err, -lon_err), # sw
+            (0, -lon_err), # west
+            (lat_err, -lon_err) # nw
+        ], type=pa.struct([
+            ('dlat', pa.float64()),
+            ('dlon', pa.float64())
+        ]))
+
+        # Vectorized coordinate calculations
+        new_lats = pa.compute.add(lat, steps['dlat'])
+        new_lons = pa.compute.add(lon, steps['dlon'])
+        # Clamp latitudes and wrap longitudes
+        new_lats = pa.compute.clip(new_lats, -90.0, 90.0)
+        new_lons = pa.compute.subtract(
+            pa.compute.modulo(
+                pa.compute.add(new_lons, 180.0),
+                360.0
+            ),
+            180.0
+        )
+
+        # Batch encode neighbors
+        neighbor_hashes = self.encode(new_lats, new_lons)
+        
+        return pa.struct([
+            ('center', geohash),
+            ('n', neighbor_hashes[0]),
+            ('ne', neighbor_hashes[1]),
+            ('e', neighbor_hashes[2]),
+            ('se', neighbor_hashes[3]),
+            ('s', neighbor_hashes[4]),
+            ('sw', neighbor_hashes[5]),
+            ('w', neighbor_hashes[6]),
+            ('nw', neighbor_hashes[7]),
+            ('bounds', pa.struct([
+                ('lat_step', lat_err * 2),
+                ('lon_step', lon_err * 2)
+            ]))
+        ])
@@ -0,0 +1,25 @@
+import math
+import hashlib
+import pyarrow as pa
+
+class HyperLogLog:
+    """Cardinality estimator using PyArrow arrays
+    Args:
+    precision: Accuracy vs memory tradeoff (4-16)
+    """
+    def __init__(self, precision: int = 12):
+        self.p = precision
+        self.m = 1 << precision
+        self.reg = pa.array([0]*self.m, type=pa.uint8())
+        
+    def add(self, item: str):
+        """Add item to cardinality estimate"""
+        h = int.from_bytes(hashlib.sha256(item.encode()).digest(), 'big')
+        idx = h & (self.m-1)
+        self.reg[idx] = max(self.reg[idx], 64 - (h >> self.p).bit_length())
+
+    def count(self) -> int:
+        """Get cardinality estimate"""
+        # epsilon
+        e = 0.7213/(1+1.079/self.m) * self.m**2 / sum(2**-v for v in self.reg)
+        return int(e if e > 2.5*self.m else self.m * math.log(self.m/self.reg.null_count))