Skip to content

Commit da853d7

Browse files
committed
track total population, security threat, and more inspection data. Also fix a AOR matching bug
Signed-off-by: John Seekins <john.seekins@spoileralert.com>
1 parent b1bf3a0 commit da853d7

File tree

6 files changed

+38
-4
lines changed

6 files changed

+38
-4
lines changed

ice_scrapers/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@
3636
"Last Final Rating",
3737
]
3838

39+
ice_inspection_types = {
40+
# found in https://www.ice.gov/foia/odo-facility-inspections
41+
"ODO": "Office of Detention Oversight",
42+
# found in https://ia803100.us.archive.org/16/items/6213032-ORSA-MOU-ICE/6213032-ORSA-MOU-ICE_text.pdf
43+
"ORSA": "Operational Review Self-Assessment",
44+
}
45+
3946
# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
4047
ice_facility_types = {
4148
"BOP": {

ice_scrapers/field_offices.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _extract_single_office(element: BeautifulSoup, page_url: str) -> dict:
152152
office["email"] = email[0]["href"].split(":", 1)[1]
153153
detail_txt = details.text # type: ignore [union-attr]
154154
logger.debug("Detail text: %s", detail_txt)
155-
aor_match = re.search(r"Area of Responsibility:(.+)Email", detail_txt)
155+
aor_match = re.search(r"Area of Responsibility:(.+)\n?Email", detail_txt)
156156
if aor_match:
157157
office["aor"] = aor_match.group(1).strip().replace("\xa0", " ")
158158

ice_scrapers/spreadsheet_load.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
clean_street,
1313
facility_sheet_header,
1414
ice_facility_types,
15+
ice_inspection_types,
1516
repair_zip,
1617
repair_locality,
1718
)
@@ -99,10 +100,15 @@ def load_sheet(keep_sheet: bool = True) -> dict:
99100
details["address"]["postal_code"] = zcode
100101
details["address"]["street"] = street
101102
details["name"] = row["Name"]
103+
104+
# population statistics
102105
details["population"]["male"]["criminal"] = row["Male Crim"]
103106
details["population"]["male"]["non_criminal"] = row["Male Non-Crim"]
104107
details["population"]["female"]["criminal"] = row["Female Crim"]
105108
details["population"]["female"]["non_criminal"] = row["Female Non-Crim"]
109+
details["population"]["total"] = (
110+
row["Male Crim"] + row["Male Non-Crim"] + row["Female Crim"] + row["Female Non-Crim"]
111+
)
106112
if row["Male/Female"]:
107113
if "/" in row["Male/Female"]:
108114
details["population"]["female"]["allowed"] = True
@@ -117,6 +123,15 @@ def load_sheet(keep_sheet: bool = True) -> dict:
117123
"level_3": row["ICE Threat Level 3"],
118124
"none": row["No ICE Threat Level"],
119125
}
126+
"""
127+
extraced from
128+
Upon admission and periodically thereafter, detainees are categorized into a security level based on a variety of public safety factors, and are housed accordingly. Factors include prior convictions, threat risk, disciplinary record, special vulnerabilities, and special management concerns. Detainees are categorized into one of four classes of security risk: A/low, B/medium low, C/medium high, and D/high.
129+
"""
130+
details["population"]["security_threat"]["low"] = row["Level A"]
131+
details["population"]["security_threat"]["medium_low"] = row["Level B"]
132+
details["population"]["security_threat"]["medium_high"] = row["Level C"]
133+
details["population"]["security_threat"]["high"] = row["Level D"]
134+
120135
details["facility_type"] = {
121136
"id": row["Type Detailed"],
122137
"housing": {
@@ -130,6 +145,8 @@ def load_sheet(keep_sheet: bool = True) -> dict:
130145
details["facility_type"]["expanded_name"] = ft_details["expanded_name"]
131146
details["avg_stay_length"] = row["FY25 ALOS"]
132147
details["inspection"] = {
148+
# fall back to type code
149+
"last_type": ice_inspection_types.get(row["Last Inspection Type"], row["Last Inspection Type"]),
133150
"last_date": row["Last Inspection End Date"],
134151
"last_rating": row["Last Final Rating"],
135152
}

ice_scrapers/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def repair_zip(zip_code: int, locality: str) -> Tuple[str, bool]:
135135
"""
136136
zcode = str(zip_code)
137137
cleaned = False
138-
if len(zcode) == 4:
139-
zcode = f"0{zcode}"
138+
if len(zcode) < 5:
139+
# pad any prefix
140+
zeros = "0" * (5 - len(zcode))
141+
zcode = f"{zeros}{zcode}"
140142
return zcode, cleaned
141143
matches = [
142144
{"match": "89512", "replace": "89506", "locality": "Reno"},

schemas.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@
7979
"level_3": 0,
8080
"none": 0,
8181
},
82+
"total": 0,
83+
"security_threat": {
84+
"low": 0,
85+
"medium_low": 0,
86+
"medium_high": 0,
87+
"high": 0,
88+
},
8289
},
8390
"facility_type": {
8491
"id": "",
@@ -90,6 +97,7 @@
9097
},
9198
},
9299
"inspection": {
100+
"last_type": "",
93101
"last_date": None,
94102
"last_rating": "",
95103
},

utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def _flatdict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
4040
"""flatten a nested dictionary for nicer printing to workbooks (excel/csv/etc.)"""
4141
items: list = []
4242
for k, v in d.items():
43-
new_key = parent_key + sep + str(k) if parent_key else str(k)
43+
new_key = f"{parent_key}{sep}{str(k)}" if parent_key else str(k)
4444
if isinstance(v, dict):
4545
items.extend(_flatdict(v, new_key, sep=sep).items())
4646
else:

0 commit comments

Comments
 (0)