From ebf116f34762ec4173cc1e0f912c3e88e700e2b5 Mon Sep 17 00:00:00 2001 From: Patrick Moore Date: Wed, 18 Jun 2025 01:50:45 -0400 Subject: [PATCH 1/8] feat: Add Overpass API integration for animal shelter data retrieval --- scripts/upload_animal_shelters.py | 42 +++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 26414a3..e2adfaa 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -1,16 +1,21 @@ import argparse -import json import os import requests +# TODO: consider using OSMPythonTools instead of overpass +import overpass from tqdm import tqdm DEFAULT_OSM_DATA_FILE = "export.geojson" +# Search area must be the official name, e.g. "Germany" is not a valid area name in Overpass API +# Consider instead finding & using the code within the query itself, e.g. "ISO3166-1"="DE" +DEFAULT_OVERPASS_SEARCH_AREA = "Deutschland" def parse_args(): """Parse command-line arguments.""" - parser = argparse.ArgumentParser(description="Upload animal shelter data to the Notfellchen API.") + parser = argparse.ArgumentParser(description="Download animal shelter data from the Overpass API to the Notfellchen API.") parser.add_argument("--api-token", type=str, help="API token for authentication.") + parser.add_argument("--area", type=str, help="Area to search for animal shelters (default: Deutschland).") parser.add_argument("--instance", type=str, help="API instance URL.") parser.add_argument("--data-file", type=str, help="Path to the GeoJSON file containing (only) animal shelters.") return parser.parse_args() @@ -21,13 +26,15 @@ def get_config(): args = parse_args() api_token = args.api_token or os.getenv("NOTFELLCHEN_API_TOKEN") + # TODO: document new environment variable NOTFELLCHEN_AREA + area = args.area or os.getenv("NOTFELLCHEN_AREA", DEFAULT_OVERPASS_SEARCH_AREA) instance = args.instance or os.getenv("NOTFELLCHEN_INSTANCE") data_file = args.data_file or os.getenv("NOTFELLCHEN_DATA_FILE", DEFAULT_OSM_DATA_FILE) if not api_token or not instance: raise ValueError("API token and instance URL must be provided via environment variables or CLI arguments.") - return api_token, instance, data_file + return api_token, area, instance, data_file def get_or_none(data, key): @@ -68,16 +75,35 @@ def https(value): return None +# TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work +def get_overpass_result(area): + """Build the Overpass query for fetching animal shelters in the specified area.""" + api = overpass.API() + result = api.get(f""" + // fetch area to search within + area[name="{area}"]->.searchArea; + // gather results + nwr["amenity"="animal_shelter"](area.searchArea); + """, verbosity="geom" + ) + return result + + def main(): - api_token, instance, data_file = get_config() + api_token, area, instance, data_file = get_config() + # Query shelters + overpass_result = get_overpass_result(area) + if overpass_result is None: + print("Error: get_overpass_result returned None") + return + print(f"Response type: {type(overpass_result)}") + print(f"Response content: {overpass_result}") + # Set headers and endpoint endpoint = f"{instance}/api/organizations/" h = {'Authorization': f'Token {api_token}', "content-type": "application/json"} - with open(data_file, encoding="utf8") as f: - d = json.load(f) - - for idx, tierheim in tqdm(enumerate(d["features"])): + for idx, tierheim in tqdm(enumerate(overpass_result["features"])): if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys(): continue From c968b3965722566689141b0edbc4b1da675795f9 Mon Sep 17 00:00:00 2001 From: Patrick Moore Date: Wed, 18 Jun 2025 07:07:25 -0400 Subject: [PATCH 2/8] fix: Refactor Overpass query to use requests and osmtogeojson for animal shelter data retrieval --- scripts/upload_animal_shelters.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index e2adfaa..41c2a79 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -1,8 +1,8 @@ import argparse import os import requests -# TODO: consider using OSMPythonTools instead of overpass -import overpass +# TODO: consider using OSMPythonTools instead of requests or overpass library +from osmtogeojson import osmtogeojson from tqdm import tqdm DEFAULT_OSM_DATA_FILE = "export.geojson" @@ -78,15 +78,19 @@ def https(value): # TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work def get_overpass_result(area): """Build the Overpass query for fetching animal shelters in the specified area.""" - api = overpass.API() - result = api.get(f""" - // fetch area to search within - area[name="{area}"]->.searchArea; - // gather results - nwr["amenity"="animal_shelter"](area.searchArea); - """, verbosity="geom" - ) - return result + overpass_endpoint = "https://overpass-api.de/api/interpreter" + overpass_query = f""" + [out:json][timeout:25]; + area[name="{area}"]->.searchArea; + nwr["amenity"="animal_shelter"](area.searchArea); + out body; + >; + out skel qt; + """ + r = requests.get(overpass_endpoint, params={'data': overpass_query}) + if r.status_code == 200: + result = osmtogeojson.process_osm_json(r.json()) + return result def main(): From 7d264fe1311d7b980037c19a6cbe44e2953e8b81 Mon Sep 17 00:00:00 2001 From: Patrick Moore Date: Wed, 18 Jun 2025 10:21:28 -0400 Subject: [PATCH 3/8] fix: Update get_overpass_result to accept data_file parameter and save response to file --- scripts/upload_animal_shelters.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 41c2a79..440895c 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -1,4 +1,5 @@ import argparse +import json import os import requests # TODO: consider using OSMPythonTools instead of requests or overpass library @@ -76,7 +77,7 @@ def https(value): # TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work -def get_overpass_result(area): +def get_overpass_result(area, data_file): """Build the Overpass query for fetching animal shelters in the specified area.""" overpass_endpoint = "https://overpass-api.de/api/interpreter" overpass_query = f""" @@ -89,14 +90,17 @@ def get_overpass_result(area): """ r = requests.get(overpass_endpoint, params={'data': overpass_query}) if r.status_code == 200: - result = osmtogeojson.process_osm_json(r.json()) + rjson = r.json() + with open(data_file, 'w', encoding='utf-8') as f: + json.dump(rjson, f, ensure_ascii=False) + result = osmtogeojson.process_osm_json(rjson) return result def main(): api_token, area, instance, data_file = get_config() # Query shelters - overpass_result = get_overpass_result(area) + overpass_result = get_overpass_result(area, data_file) if overpass_result is None: print("Error: get_overpass_result returned None") return From 53c0e8b3b8385c65773c73052489103903d39807 Mon Sep 17 00:00:00 2001 From: Patrick Moore Date: Thu, 19 Jun 2025 23:11:58 -0400 Subject: [PATCH 4/8] feat: Merge moan0s changes These changes may enhance animal shelter data handling with new add_if_available function and improved organization checks --- scripts/upload_animal_shelters.py | 89 ++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 440895c..0f7be1b 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -1,6 +1,8 @@ import argparse import json import os +from types import SimpleNamespace + import requests # TODO: consider using OSMPythonTools instead of requests or overpass library from osmtogeojson import osmtogeojson @@ -97,6 +99,14 @@ def get_overpass_result(area, data_file): return result +def add_if_available(base_data, keys, result): + # Loads the data into the org if available + for key in keys: + if getattr(base_data, key) is not None: + result[key] = getattr(base_data, key) + return result + + def main(): api_token, area, instance, data_file = get_config() # Query shelters @@ -111,29 +121,80 @@ def main(): endpoint = f"{instance}/api/organizations/" h = {'Authorization': f'Token {api_token}', "content-type": "application/json"} - for idx, tierheim in tqdm(enumerate(overpass_result["features"])): + tierheime = overpass_result["features"] + for idx, tierheim in enumerate(tqdm(tierheime)): + # Check if data is low quality if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys(): continue - data = {"name": tierheim["properties"]["name"], - "location_string": f"{get_or_none(tierheim, "addr:street")} {get_or_none(tierheim, "addr:housenumber")}, {get_or_none(tierheim, "addr:postcode")} {tierheim["properties"]["addr:city"]}", - "phone_number": choose(("contact:phone", "phone"), tierheim["properties"], replace=True), - "fediverse_profile": get_or_none(tierheim, "contact:mastodon"), - "facebook": https(add(get_or_none(tierheim, "contact:facebook"), "facebook")), - "instagram": https(add(get_or_none(tierheim, "contact:instagram"), "instagram")), - "website": https(choose(("contact:website", "website"), tierheim["properties"])), - "email": choose(("contact:email", "email"), tierheim["properties"]), - "description": get_or_none(tierheim, "opening_hours"), - "external_object_identifier": f"{tierheim["id"]}", - "external_source_identifier": "OSM" - } + # Load TH data in for easier accessing + th_data = SimpleNamespace( + name=tierheim["properties"]["name"], + email=choose(("contact:email", "email"), tierheim["properties"]), + phone_number=choose(("contact:phone", "phone"), tierheim["properties"], replace=True), + fediverse_profile=get_or_none(tierheim, "contact:mastodon"), + facebook=https(add(get_or_none(tierheim, "contact:facebook"), "facebook")), + instagram=https(add(get_or_none(tierheim, "contact:instagram"), "instagram")), + website=https(choose(("contact:website", "website"), tierheim["properties"])), + description=get_or_none(tierheim, "opening_hours"), + external_object_identifier=tierheim["id"], + EXTERNAL_SOURCE_IDENTIFIER="OSM", + ) - result = requests.post(endpoint, json=data, headers=h) + # Define here for later + optional_data = ["email", "phone_number", "website", "description", "fediverse_profile", "facebook", + "instagram"] + + # Check if rescue organization exits + search_data = {"external_source_identifier": "OSM", + "external_object_identifier": f"{tierheim["id"]}"} + search_result = requests.get(f"{instance}/api/organizations", json=search_data, headers=h) + if search_result.status_code == 200: + org_id = search_result.json()[0]["id"] + print(f"{th_data.name} already exists as ID {org_id}.") + org_patch_data = {"id": org_id, + "name": th_data.name} + + add_if_available(th_data, optional_data, org_patch_data) + + result = requests.patch(endpoint, json=org_patch_data, headers=h) + if result.status_code != 200: + print(f"Updating {tierheim['properties']['name']} failed:{result.status_code} {result.json()}") + exit() + continue + + location_data = { + "place_id": tierheim["id"], + "latitude": tierheim["geometry"]["coordinates"][0][0][0], + "longitude": tierheim["geometry"]["coordinates"][0][0][1], + "name": tierheim["properties"]["name"], + "city": tierheim["properties"]["addr:city"], + "housenumber": get_or_none(tierheim, "addr:housenumber"), + "postcode": get_or_none(tierheim, "addr:postcode"), + "street": get_or_none(tierheim, "addr:street"), + "countrycode": get_or_none(tierheim, "addr:country"), + } + + location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=h) + + if location_result.status_code != 201: + print(f"{idx} Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created") + exit() + + org_data = {"name": tierheim["properties"]["name"], + "external_object_identifier": f"{tierheim["id"]}", + "external_source_identifier": "OSM" + } + + add_if_available(th_data, optional_data, org_data) + + result = requests.post(endpoint, json=org_data, headers=h) if result.status_code != 201: print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}") + if __name__ == "__main__": main() From 3b9f10dad7ace2fcb933b295c7e2475cc4f86149 Mon Sep 17 00:00:00 2001 From: moanos Date: Fri, 20 Jun 2025 08:03:40 +0200 Subject: [PATCH 5/8] fix: Make sure that location uses center, don't provide fediverse or description if empty and actually create and use location --- scripts/upload_animal_shelters.py | 121 ++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 31 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 0f7be1b..8486b61 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -41,6 +41,13 @@ def get_config(): def get_or_none(data, key): + if key in data["properties"].keys(): + return data["properties"][key] + else: + return None + + +def get_or_empty(data, key): if key in data["properties"].keys(): return data["properties"][key] else: @@ -78,6 +85,50 @@ def https(value): return None +def calc_coordinate_center(coordinates): + """ + Calculates the center as the arithmetic mean of the list of coordinates. + + Not perfect because earth is a sphere (citation needed) but good enough. + """ + if not coordinates: + return None, None + + lon_sum = 0.0 + lat_sum = 0.0 + count = 0 + + for lon, lat in coordinates: + lon_sum += lon + lat_sum += lat + count += 1 + + return lon_sum / count, lat_sum / count + + +def get_center_coordinates(geometry): + """ + Given a GeoJSON geometry dict, return (longitude, latitude) + + If a shape, calculate the center, else reurn the point + """ + geom_type = geometry["type"] + coordinates = geometry["coordinates"] + + if geom_type == "Point": + return coordinates[0], coordinates[1] + + elif geom_type == "LineString": + return calc_coordinate_center(coordinates) + + elif geom_type == "Polygon": + outer_ring = coordinates[0] + return calc_coordinate_center(outer_ring) + + else: + raise ValueError(f"Unsupported geometry type: {geom_type}") + + # TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work def get_overpass_result(area, data_file): """Build the Overpass query for fetching animal shelters in the specified area.""" @@ -107,6 +158,28 @@ def add_if_available(base_data, keys, result): return result +def create_location(tierheim, instance, headers): + location_data = { + "place_id": tierheim["id"], + "longitude": get_center_coordinates(tierheim["geometry"])[0], + "latitude": get_center_coordinates(tierheim["geometry"])[1], + "name": tierheim["properties"]["name"], + "city": tierheim["properties"]["addr:city"], + "housenumber": get_or_empty(tierheim, "addr:housenumber"), + "postcode": get_or_empty(tierheim, "addr:postcode"), + "street": get_or_empty(tierheim, "addr:street"), + "countrycode": get_or_empty(tierheim, "addr:country"), + } + + location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=headers) + + if location_result.status_code != 201: + print( + f"Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created") + exit() + return location_result.json() + + def main(): api_token, area, instance, data_file = get_config() # Query shelters @@ -134,8 +207,8 @@ def main(): email=choose(("contact:email", "email"), tierheim["properties"]), phone_number=choose(("contact:phone", "phone"), tierheim["properties"], replace=True), fediverse_profile=get_or_none(tierheim, "contact:mastodon"), - facebook=https(add(get_or_none(tierheim, "contact:facebook"), "facebook")), - instagram=https(add(get_or_none(tierheim, "contact:instagram"), "instagram")), + facebook=https(add(get_or_empty(tierheim, "contact:facebook"), "facebook")), + instagram=https(add(get_or_empty(tierheim, "contact:instagram"), "instagram")), website=https(choose(("contact:website", "website"), tierheim["properties"])), description=get_or_none(tierheim, "opening_hours"), external_object_identifier=tierheim["id"], @@ -155,45 +228,31 @@ def main(): print(f"{th_data.name} already exists as ID {org_id}.") org_patch_data = {"id": org_id, "name": th_data.name} + if search_result.json()[0]["location"] is None: + location = create_location(tierheim, instance, h) + org_patch_data["location"] = location["id"] - add_if_available(th_data, optional_data, org_patch_data) + org_patch_data = add_if_available(th_data, optional_data, org_patch_data) result = requests.patch(endpoint, json=org_patch_data, headers=h) if result.status_code != 200: print(f"Updating {tierheim['properties']['name']} failed:{result.status_code} {result.json()}") exit() continue + else: + location = create_location(tierheim, instance, h) + org_data = {"name": tierheim["properties"]["name"], + "external_object_identifier": f"{tierheim["id"]}", + "external_source_identifier": "OSM", + "location": location["id"] + } - location_data = { - "place_id": tierheim["id"], - "latitude": tierheim["geometry"]["coordinates"][0][0][0], - "longitude": tierheim["geometry"]["coordinates"][0][0][1], - "name": tierheim["properties"]["name"], - "city": tierheim["properties"]["addr:city"], - "housenumber": get_or_none(tierheim, "addr:housenumber"), - "postcode": get_or_none(tierheim, "addr:postcode"), - "street": get_or_none(tierheim, "addr:street"), - "countrycode": get_or_none(tierheim, "addr:country"), - } + org_data = add_if_available(th_data, optional_data, org_data) - location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=h) - - if location_result.status_code != 201: - print(f"{idx} Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created") - exit() - - org_data = {"name": tierheim["properties"]["name"], - "external_object_identifier": f"{tierheim["id"]}", - "external_source_identifier": "OSM" - } - - add_if_available(th_data, optional_data, org_data) - - result = requests.post(endpoint, json=org_data, headers=h) - - if result.status_code != 201: - print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}") + result = requests.post(endpoint, json=org_data, headers=h) + if result.status_code != 201: + print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}") if __name__ == "__main__": From b73f6db7b632096f49462902a8308ba652b7cc04 Mon Sep 17 00:00:00 2001 From: moanos Date: Fri, 20 Jun 2025 08:04:13 +0200 Subject: [PATCH 6/8] feat: dump data in geojson format --- scripts/upload_animal_shelters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 8486b61..1345469 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -144,9 +144,9 @@ def get_overpass_result(area, data_file): r = requests.get(overpass_endpoint, params={'data': overpass_query}) if r.status_code == 200: rjson = r.json() - with open(data_file, 'w', encoding='utf-8') as f: - json.dump(rjson, f, ensure_ascii=False) result = osmtogeojson.process_osm_json(rjson) + with open(data_file, 'w', encoding='utf-8') as f: + json.dump(result, f, ensure_ascii=False) return result From abd34ec7cbdc683a0b2656748f3eea67227f1fb3 Mon Sep 17 00:00:00 2001 From: moanos Date: Fri, 20 Jun 2025 08:05:11 +0200 Subject: [PATCH 7/8] feat: allow using cached results --- scripts/upload_animal_shelters.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 1345469..bfa4d6d 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -21,6 +21,7 @@ def parse_args(): parser.add_argument("--area", type=str, help="Area to search for animal shelters (default: Deutschland).") parser.add_argument("--instance", type=str, help="API instance URL.") parser.add_argument("--data-file", type=str, help="Path to the GeoJSON file containing (only) animal shelters.") + parser.add_argument("--use-cached", action='store_true', help="Use the stored GeoJSON file") return parser.parse_args() @@ -33,11 +34,12 @@ def get_config(): area = args.area or os.getenv("NOTFELLCHEN_AREA", DEFAULT_OVERPASS_SEARCH_AREA) instance = args.instance or os.getenv("NOTFELLCHEN_INSTANCE") data_file = args.data_file or os.getenv("NOTFELLCHEN_DATA_FILE", DEFAULT_OSM_DATA_FILE) + use_cached = args.use_cached or os.getenv("NOTFELLCHEN_USE_CACHED", False) if not api_token or not instance: raise ValueError("API token and instance URL must be provided via environment variables or CLI arguments.") - return api_token, area, instance, data_file + return api_token, area, instance, data_file, use_cached def get_or_none(data, key): @@ -181,14 +183,18 @@ def create_location(tierheim, instance, headers): def main(): - api_token, area, instance, data_file = get_config() - # Query shelters - overpass_result = get_overpass_result(area, data_file) - if overpass_result is None: - print("Error: get_overpass_result returned None") - return - print(f"Response type: {type(overpass_result)}") - print(f"Response content: {overpass_result}") + api_token, area, instance, data_file, use_cached = get_config() + if not use_cached: + # Query shelters + overpass_result = get_overpass_result(area, data_file) + if overpass_result is None: + print("Error: get_overpass_result returned None") + return + print(f"Response type: {type(overpass_result)}") + print(f"Response content: {overpass_result}") + else: + with open(data_file, 'r', encoding='utf-8') as f: + overpass_result = json.load(f) # Set headers and endpoint endpoint = f"{instance}/api/organizations/" From 5ee1e61eac5c6c0150051d604e8486fa167c877a Mon Sep 17 00:00:00 2001 From: moanos Date: Fri, 20 Jun 2025 08:05:34 +0200 Subject: [PATCH 8/8] fix: ensure that search is actually using search params --- scripts/upload_animal_shelters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index bfa4d6d..cf95428 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -228,7 +228,7 @@ def main(): # Check if rescue organization exits search_data = {"external_source_identifier": "OSM", "external_object_identifier": f"{tierheim["id"]}"} - search_result = requests.get(f"{instance}/api/organizations", json=search_data, headers=h) + search_result = requests.get(f"{instance}/api/organizations", params=search_data, headers=h) if search_result.status_code == 200: org_id = search_result.json()[0]["id"] print(f"{th_data.name} already exists as ID {org_id}.")