From 3c21e5800b131bf06eb88f20f715511618323006 Mon Sep 17 00:00:00 2001 From: Sai Date: Mon, 29 Oct 2018 14:05:09 +0100 Subject: [PATCH] add scripts to export MuckRock IDs & info to JSON --- .gitignore | 3 +++ export_agencies.py | 32 ++++++++++++++++++++++++++++++++ export_exemptions.py | 31 +++++++++++++++++++++++++++++++ export_jurisdictions.py | 31 +++++++++++++++++++++++++++++++ export_muckrock_identifiers.py | 4 ++++ 5 files changed, 101 insertions(+) create mode 100755 export_agencies.py create mode 100755 export_exemptions.py create mode 100755 export_jurisdictions.py create mode 100755 export_muckrock_identifiers.py diff --git a/.gitignore b/.gitignore index 39f6eb9..f8dd0f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ *.pyc *.DS_Store .DS_Store +agencies/ +exemptions/ +jurisdictions/ diff --git a/export_agencies.py b/export_agencies.py new file mode 100755 index 0000000..58e5d63 --- /dev/null +++ b/export_agencies.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python2 +# -- coding: utf-8 -- + +import utils +import urllib, os, json, datetime, requests, urlparse + +api_url = utils.API_URL +token = utils.get_api_key() +headers = utils.get_headers(token) + +page = 1 +next_url = api_url + "agency/?page=" + str(page) +done_so_far = 0 + + +try: + os.mkdir('agencies') +except Exception as e: + print 'dir exists' + +while next_url: + agencies = requests.get(next_url , headers=headers).json() + agency_data = agencies['results'] + for agency in agency_data: + text_file = open('agencies/' + str(agency["id"]) + ".json", "w+") + text_file.write(json.dumps(agency, sort_keys=True, indent=4, separators=(',', ': '))) + text_file.close() + + done_so_far = done_so_far + len(agency_data) + count = agencies['count'] + print 'Getting agencies: %d of %d' % (done_so_far, count) + next_url = agencies['next'] diff --git a/export_exemptions.py b/export_exemptions.py new file mode 100755 index 0000000..0d8f0f6 --- /dev/null +++ b/export_exemptions.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +# -- coding: utf-8 -- + +import utils +import urllib, os, json, datetime, requests, urlparse + +api_url = utils.API_URL +token = utils.get_api_key() +headers = utils.get_headers(token) + +page = 1 +next_url = api_url + "exemption/?page=" + str(page) +done_so_far = 0 + +try: + os.mkdir('exemptions') +except Exception as e: + print 'dir exists' + +while next_url: + exemptions = requests.get(next_url , headers=headers).json() + exemption_data = exemptions['results'] + for exemption in exemption_data: + text_file = open('exemptions/' + str(exemption["id"]) + ".json", "w+") + text_file.write(json.dumps(exemption, sort_keys=True, indent=4, separators=(',', ': '))) + text_file.close() + + done_so_far = done_so_far + len(exemption_data) + count = exemptions['count'] + print 'Getting exemptions: %d of %d' % (done_so_far, count) + next_url = exemptions['next'] diff --git a/export_jurisdictions.py b/export_jurisdictions.py new file mode 100755 index 0000000..7e88857 --- /dev/null +++ b/export_jurisdictions.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +# -- coding: utf-8 -- + +import utils +import urllib, os, json, datetime, requests, urlparse + +api_url = utils.API_URL +token = utils.get_api_key() +headers = utils.get_headers(token) + +page = 1 +next_url = api_url + "jurisdiction/?page=" + str(page) +done_so_far = 0 + +try: + os.mkdir('jurisdictions') +except Exception as e: + print 'dir exists' + +while next_url: + jurisdictions = requests.get(next_url , headers=headers).json() + jurisdiction_data = jurisdictions['results'] + for jurisdiction in jurisdiction_data: + text_file = open('jurisdictions/' + str(jurisdiction["id"]) + ".json", "w+") + text_file.write(json.dumps(jurisdiction, sort_keys=True, indent=4, separators=(',', ': '))) + text_file.close() + + done_so_far = done_so_far + len(jurisdiction_data) + count = jurisdictions['count'] + print 'Getting jurisdictions: %d of %d' % (done_so_far, count) + next_url = jurisdictions['next'] diff --git a/export_muckrock_identifiers.py b/export_muckrock_identifiers.py new file mode 100755 index 0000000..c2c8068 --- /dev/null +++ b/export_muckrock_identifiers.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python2 +# -- coding: utf-8 -- + +import export_agencies, export_exceptions, export_jurisdictions \ No newline at end of file