Skip to content

Commit 9f8b66f

Browse files
committed
wrapped up some conflicts in patches
2 parents 5b81f51 + 136abbf commit 9f8b66f

File tree

6 files changed

+204
-160
lines changed

6 files changed

+204
-160
lines changed

ice_scrapers/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees – typically these are operated by private contractors pursuant to their agreements with local governments.",
4040
},
4141
"DOD": {
42-
"expanded_name": "Department of Defence",
42+
"expanded_name": "Department of Defense",
4343
"description": "Department of Defence facilities - Often Army bases",
4444
},
4545
"FAMILY": {
@@ -66,14 +66,14 @@
6666
"expanded_name": "Other",
6767
"description": "Facilities including but not limited to transportation-related facilities, hotels, and/or other facilities",
6868
},
69-
"Unknown": {
70-
"expanded_name": "Unknown",
71-
"description": "A facility whose type could not be identified",
72-
},
7369
"SPC": {
7470
"expanded_name": "Service Processing Center",
7571
"description": "A facility owned by the government and staffed by a combination of federal and contract employees.",
7672
},
73+
"Unknown": {
74+
"expanded_name": "Unknown",
75+
"description": "A facility whose type could not be identified",
76+
},
7777
"USMS": {
7878
"expanded_name": "United States Marshals Service",
7979
"description": "A facility primarily contracted with the USMS for housing of USMS detainees, in which ICE contracts with the USMS for bed space.",
@@ -134,9 +134,9 @@
134134
from .utils import ( # noqa: E402
135135
get_ice_scrape_pages, # noqa: F401
136136
repair_locality, # noqa: F401
137-
repair_name, # noqa: F401
138137
repair_street, # noqa: F401
139138
repair_zip, # noqa: F401
139+
repair_name, # noqa: F401
140140
special_facilities, # noqa: F401
141141
update_facility, # noqa: F401
142142
)

ice_scrapers/facilities_scraper.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
repair_locality,
88
repair_street,
99
repair_zip,
10+
repair_name,
1011
special_facilities,
1112
update_facility,
1213
)
@@ -28,6 +29,7 @@ def scrape_facilities(facilities_data: dict) -> dict:
2829
logger.info("Starting to scrape ICE.gov detention facilities...")
2930
facilities_data["scraped_date"] = datetime.datetime.now(datetime.UTC)
3031
urls = get_ice_scrape_pages(base_scrape_url)
32+
3133
scraped_count = 0
3234
for page_num, url in enumerate(urls):
3335
logger.info("Scraping page %s/%s...", page_num + 1, len(urls))
@@ -53,7 +55,7 @@ def scrape_facilities(facilities_data: dict) -> dict:
5355
if cleaned:
5456
addr["locality"] = locality
5557
facility["_repaired_record"] = True
56-
name, cleaned = repair_locality(facility["name"], addr["locality"])
58+
name, cleaned = repair_name(facility["name"], addr["locality"])
5759
if cleaned:
5860
facility["name"] = name
5961
facility["_repaired_record"] = True
@@ -181,6 +183,7 @@ def _scrape_page(page_url: str) -> list:
181183
facilities.append(facility_data)
182184

183185
logger.info(" Extracted %s facilities from page", len(facilities))
186+
184187
return facilities
185188

186189

@@ -193,6 +196,7 @@ def _find_facility_patterns(container):
193196
r"([A-Z][^|]+(?:\|[^|]+)?)\s*([A-Z][^A-Z]*Field Office)",
194197
r"([^-]+)\s*-\s*([A-Z][^A-Z]*Field Office)",
195198
]
199+
196200
text_content = container.get_text()
197201

198202
for pattern in facility_patterns:

ice_scrapers/spreadsheet_load.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
2727
base_xlsx_url = "https://www.ice.gov/detain/detention-management"
2828
filename = f"{SCRIPT_DIR}{os.sep}detentionstats.xlsx"
29-
3029
# extracted ADP sheet header list 2025-09-07
3130
facility_sheet_header = [
3231
"Name",
@@ -198,5 +197,5 @@ def load_sheet(keep_sheet: bool = True, force_download: bool = True) -> dict:
198197
details["field_office"]["id"] = row["AOR"]
199198
details["address_str"] = full_address
200199
results[full_address] = details
201-
logger.info(" Loaded %s facilties", len(results.keys()))
200+
logger.info(" Loaded %s facilities", len(results.keys()))
202201
return results

ice_scrapers/utils.py

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,67 @@ def special_facilities(facility: dict) -> dict:
3535
return facility
3636

3737

38+
def repair_name(name: str, locality: str) -> Tuple[str, bool]:
39+
"""Even facility names are occasionally bad"""
40+
matches = [
41+
{"match": "ALEXANDRIA STAGING FACILI", "replace": "Alexandria Staging Facility", "locality": "ALEXANDRIA"},
42+
{"match": "ORANGE COUNTY JAIL (NY)", "replace": "ORANGE COUNTY JAIL", "locality": "GOSHEN"},
43+
{"match": "NORTH LAKE CORRECTIONAL F", "replace": "NORTH LAKE CORRECTIONAL FACILITY", "locality": "BALDWIN"},
44+
{"match": "PHELPS COUNTY JAIL (MO)", "replace": "Phelps County Jail", "locality": "ROLLA"},
45+
{
46+
"match": "PENNINGTON COUNTY JAIL (SOUTH DAKOTA)",
47+
"replace": "PENNINGTON COUNTY JAIL",
48+
"locality": "RAPID CITY",
49+
},
50+
{
51+
"match": "CORR. CTR OF NORTHWEST OHIO",
52+
"replace": "CORRECTIONS CENTER OF NORTHWEST OHIO",
53+
"locality": "STRYKER",
54+
},
55+
{
56+
"match": "FOLKSTON D RAY ICE PROCES",
57+
"replace": "D. RAY JAMES CORRECTIONAL INSTITUTION",
58+
"locality": "FOLKSTON",
59+
},
60+
{"match": "COLLIER COUNTY NAPLES JAIL CENTER", "replace": "COLLIER COUNTY JAIL", "locality": "NAPLES"},
61+
{
62+
"match": "IAH SECURE ADULT DETENTION FACILITY (POLK)",
63+
"replace": "IAM SECURE ADULT DET. FACILITY",
64+
"locality": "LIVINGSTON",
65+
},
66+
{"match": "CIMMARRON CORR FACILITY", "replace": "CIMMARRON CORRECTIONAL FACILITY", "locality": "CUSHING"},
67+
{"match": "ORANGE COUNTY JAIL (FL)", "replace": "ORANGE COUNTY JAIL", "locality": "ORLANDO"},
68+
{"match": "CLARK COUNTY JAIL (IN)", "replace": "CLARK COUNTY JAIL", "locality": "JEFFERSONVILLE"},
69+
{"match": "PRINCE EDWARD COUNTY (FARMVILLE)", "replace": "ICA - FARMVILLE", "locality": "FARMVILLE"},
70+
{"match": "PHELPS COUNTY JAIL (NE)", "replace": "PHELPS COUNTY JAIL", "locality": "HOLDREGE"},
71+
{
72+
"match": "WASHINGTON COUNTY JAIL (PURGATORY CORRECTIONAL FAC",
73+
"replace": "WASHINGTON COUNTY JAIL",
74+
"locality": "HURRICANE",
75+
},
76+
{"match": "ETOWAH COUNTY JAIL (ALABAMA)", "replace": "ETOWAH COUNTY JAIL", "locality": "GADSDEN"},
77+
{"match": "BURLEIGH COUNTY", "replace": "BURLEIGH COUNTY JAIL", "locality": "BISMARCK"},
78+
{"match": "NELSON COLEMAN CORRECTION", "replace": "NELSON COLEMAN CORRECTIONS CENTER", "locality": "KILLONA"},
79+
{
80+
"match": "CIMMARRON CORR FACILITY",
81+
"replace": "CIMARRON CORRECTIONAL FACILITY",
82+
"locality": "CUSHING",
83+
},
84+
{
85+
"match": "IAM SECURE ADULT DET. FACILITY",
86+
"replace": "IAH SECURE ADULT DET. FACILITY",
87+
"locality": "LIVINGSTON",
88+
},
89+
]
90+
cleaned = False
91+
for m in matches:
92+
if m["match"] == name and m["locality"] == locality:
93+
name = m["replace"]
94+
cleaned = True
95+
break
96+
return name, cleaned
97+
98+
3899
def repair_street(street: str, locality: str = "") -> Tuple[str, bool]:
39100
"""Generally, we'll let the spreadsheet win arguments just to be consistent"""
40101
street_filters = [
@@ -156,67 +217,6 @@ def repair_street(street: str, locality: str = "") -> Tuple[str, bool]:
156217
return street, cleaned
157218

158219

159-
def repair_name(name: str, locality: str) -> Tuple[str, bool]:
160-
"""Even facility names are occasionally bad"""
161-
matches = [
162-
{"match": "ALEXANDRIA STAGING FACILI", "replace": "Alexandria Staging Facility", "locality": "ALEXANDRIA"},
163-
{"match": "ORANGE COUNTY JAIL (NY)", "replace": "ORANGE COUNTY JAIL", "locality": "GOSHEN"},
164-
{"match": "NORTH LAKE CORRECTIONAL F", "replace": "NORTH LAKE CORRECTIONAL FACILITY", "locality": "BALDWIN"},
165-
{"match": "PHELPS COUNTY JAIL (MO)", "replace": "Phelps County Jail", "locality": "ROLLA"},
166-
{
167-
"match": "PENNINGTON COUNTY JAIL (SOUTH DAKOTA)",
168-
"replace": "PENNINGTON COUNTY JAIL",
169-
"locality": "RAPID CITY",
170-
},
171-
{
172-
"match": "CORR. CTR OF NORTHWEST OHIO",
173-
"replace": "CORRECTIONS CENTER OF NORTHWEST OHIO",
174-
"locality": "STRYKER",
175-
},
176-
{
177-
"match": "FOLKSTON D RAY ICE PROCES",
178-
"replace": "D. RAY JAMES CORRECTIONAL INSTITUTION",
179-
"locality": "FOLKSTON",
180-
},
181-
{"match": "COLLIER COUNTY NAPLES JAIL CENTER", "replace": "COLLIER COUNTY JAIL", "locality": "NAPLES"},
182-
{
183-
"match": "IAH SECURE ADULT DETENTION FACILITY (POLK)",
184-
"replace": "IAM SECURE ADULT DET. FACILITY",
185-
"locality": "LIVINGSTON",
186-
},
187-
{"match": "CIMMARRON CORR FACILITY", "replace": "CIMMARRON CORRECTIONAL FACILITY", "locality": "CUSHING"},
188-
{"match": "ORANGE COUNTY JAIL (FL)", "replace": "ORANGE COUNTY JAIL", "locality": "ORLANDO"},
189-
{"match": "CLARK COUNTY JAIL (IN)", "replace": "CLARK COUNTY JAIL", "locality": "JEFFERSONVILLE"},
190-
{"match": "PRINCE EDWARD COUNTY (FARMVILLE)", "replace": "ICA - FARMVILLE", "locality": "FARMVILLE"},
191-
{"match": "PHELPS COUNTY JAIL (NE)", "replace": "PHELPS COUNTY JAIL", "locality": "HOLDREGE"},
192-
{
193-
"match": "WASHINGTON COUNTY JAIL (PURGATORY CORRECTIONAL FAC",
194-
"replace": "WASHINGTON COUNTY JAIL",
195-
"locality": "HURRICANE",
196-
},
197-
{"match": "ETOWAH COUNTY JAIL (ALABAMA)", "replace": "ETOWAH COUNTY JAIL", "locality": "GADSDEN"},
198-
{"match": "BURLEIGH COUNTY", "replace": "BURLEIGH COUNTY JAIL", "locality": "BISMARCK"},
199-
{"match": "NELSON COLEMAN CORRECTION", "replace": "NELSON COLEMAN CORRECTIONS CENTER", "locality": "KILLONA"},
200-
{
201-
"match": "CIMMARRON CORR FACILITY",
202-
"replace": "CIMARRON CORRECTIONAL FACILITY",
203-
"locality": "CUSHING",
204-
},
205-
{
206-
"match": "IAM SECURE ADULT DET. FACILITY",
207-
"replace": "IAH SECURE ADULT DET. FACILITY",
208-
"locality": "LIVINGSTON",
209-
},
210-
]
211-
cleaned = False
212-
for m in matches:
213-
if m["match"] == name and m["locality"] == locality:
214-
name = m["replace"]
215-
cleaned = True
216-
break
217-
return name, cleaned
218-
219-
220220
def repair_zip(zip_code: int, locality: str) -> Tuple[str, bool]:
221221
"""
222222
Excel does a cool thing where it strips leading 0s

schemas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,10 @@
118118
}
119119

120120
# enrichment print details
121-
enrichment_print_schema = {
121+
enrichment_print_schema: dict = {
122122
"osm_found": 0,
123123
"wiki_found": 0,
124124
"wikidata_found": 0,
125125
}
126126

127-
supported_output_types = ["csv", "json", "parquet", "xlsx"]
127+
supported_output_types: list = ["csv", "json", "parquet", "xlsx"]

0 commit comments

Comments
 (0)