Merge branch 'overpass-take-2' into develop
# Conflicts: # scripts/upload_animal_shelters.py
This commit is contained in:
@@ -4,17 +4,25 @@ import os
|
|||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
# TODO: consider using OSMPythonTools instead of requests or overpass library
|
||||||
|
from osmtogeojson import osmtogeojson
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
DEFAULT_OSM_DATA_FILE = "export.geojson"
|
DEFAULT_OSM_DATA_FILE = "export.geojson"
|
||||||
|
# Search area must be the official name, e.g. "Germany" is not a valid area name in Overpass API
|
||||||
|
# Consider instead finding & using the code within the query itself, e.g. "ISO3166-1"="DE"
|
||||||
|
DEFAULT_OVERPASS_SEARCH_AREA = "Deutschland"
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
"""Parse command-line arguments."""
|
"""Parse command-line arguments."""
|
||||||
parser = argparse.ArgumentParser(description="Upload animal shelter data to the Notfellchen API.")
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download animal shelter data from the Overpass API to the Notfellchen API.")
|
||||||
parser.add_argument("--api-token", type=str, help="API token for authentication.")
|
parser.add_argument("--api-token", type=str, help="API token for authentication.")
|
||||||
|
parser.add_argument("--area", type=str, help="Area to search for animal shelters (default: Deutschland).")
|
||||||
parser.add_argument("--instance", type=str, help="API instance URL.")
|
parser.add_argument("--instance", type=str, help="API instance URL.")
|
||||||
parser.add_argument("--data-file", type=str, help="Path to the GeoJSON file containing (only) animal shelters.")
|
parser.add_argument("--data-file", type=str, help="Path to the GeoJSON file containing (only) animal shelters.")
|
||||||
|
parser.add_argument("--use-cached", action='store_true', help="Use the stored GeoJSON file")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -23,16 +31,26 @@ def get_config():
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
api_token = args.api_token or os.getenv("NOTFELLCHEN_API_TOKEN")
|
api_token = args.api_token or os.getenv("NOTFELLCHEN_API_TOKEN")
|
||||||
|
# TODO: document new environment variable NOTFELLCHEN_AREA
|
||||||
|
area = args.area or os.getenv("NOTFELLCHEN_AREA", DEFAULT_OVERPASS_SEARCH_AREA)
|
||||||
instance = args.instance or os.getenv("NOTFELLCHEN_INSTANCE")
|
instance = args.instance or os.getenv("NOTFELLCHEN_INSTANCE")
|
||||||
data_file = args.data_file or os.getenv("NOTFELLCHEN_DATA_FILE", DEFAULT_OSM_DATA_FILE)
|
data_file = args.data_file or os.getenv("NOTFELLCHEN_DATA_FILE", DEFAULT_OSM_DATA_FILE)
|
||||||
|
use_cached = args.use_cached or os.getenv("NOTFELLCHEN_USE_CACHED", False)
|
||||||
|
|
||||||
if not api_token or not instance:
|
if not api_token or not instance:
|
||||||
raise ValueError("API token and instance URL must be provided via environment variables or CLI arguments.")
|
raise ValueError("API token and instance URL must be provided via environment variables or CLI arguments.")
|
||||||
|
|
||||||
return api_token, instance, data_file
|
return api_token, area, instance, data_file, use_cached
|
||||||
|
|
||||||
|
|
||||||
def get_or_none(data, key):
|
def get_or_none(data, key):
|
||||||
|
if key in data["properties"].keys():
|
||||||
|
return data["properties"][key]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_empty(data, key):
|
||||||
if key in data["properties"].keys():
|
if key in data["properties"].keys():
|
||||||
return data["properties"][key]
|
return data["properties"][key]
|
||||||
else:
|
else:
|
||||||
@@ -70,6 +88,27 @@ def https(value):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def calc_coordinate_center(coordinates):
|
||||||
|
"""
|
||||||
|
Calculates the center as the arithmetic mean of the list of coordinates.
|
||||||
|
|
||||||
|
Not perfect because earth is a sphere (citation needed) but good enough.
|
||||||
|
"""
|
||||||
|
if not coordinates:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
lon_sum = 0.0
|
||||||
|
lat_sum = 0.0
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
for lon, lat in coordinates:
|
||||||
|
lon_sum += lon
|
||||||
|
lat_sum += lat
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
return lon_sum / count, lat_sum / count
|
||||||
|
|
||||||
|
|
||||||
def get_center_coordinates(geometry):
|
def get_center_coordinates(geometry):
|
||||||
"""
|
"""
|
||||||
Given a GeoJSON geometry dict, return (longitude, latitude)
|
Given a GeoJSON geometry dict, return (longitude, latitude)
|
||||||
@@ -93,25 +132,25 @@ def get_center_coordinates(geometry):
|
|||||||
raise ValueError(f"Unsupported geometry type: {geom_type}")
|
raise ValueError(f"Unsupported geometry type: {geom_type}")
|
||||||
|
|
||||||
|
|
||||||
def calc_coordinate_center(coordinates):
|
# TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work
|
||||||
"""
|
def get_overpass_result(area, data_file):
|
||||||
Calculates the center as the arithmetic mean of the list of coordinates.
|
"""Build the Overpass query for fetching animal shelters in the specified area."""
|
||||||
|
overpass_endpoint = "https://overpass-api.de/api/interpreter"
|
||||||
Not perfect because earth is a sphere (citation needed) but good enough.
|
overpass_query = f"""
|
||||||
"""
|
[out:json][timeout:25];
|
||||||
if not coordinates:
|
area[name="{area}"]->.searchArea;
|
||||||
return None, None
|
nwr["amenity"="animal_shelter"](area.searchArea);
|
||||||
|
out body;
|
||||||
lon_sum = 0.0
|
>;
|
||||||
lat_sum = 0.0
|
out skel qt;
|
||||||
count = 0
|
"""
|
||||||
|
r = requests.get(overpass_endpoint, params={'data': overpass_query})
|
||||||
for lon, lat in coordinates:
|
if r.status_code == 200:
|
||||||
lon_sum += lon
|
rjson = r.json()
|
||||||
lat_sum += lat
|
result = osmtogeojson.process_osm_json(rjson)
|
||||||
count += 1
|
with open(data_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(result, f, ensure_ascii=False)
|
||||||
return lon_sum / count, lat_sum / count
|
return result
|
||||||
|
|
||||||
|
|
||||||
def add_if_available(base_data, keys, result):
|
def add_if_available(base_data, keys, result):
|
||||||
@@ -122,23 +161,51 @@ def add_if_available(base_data, keys, result):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def create_location(tierheim, instance, headers):
|
||||||
|
location_data = {
|
||||||
|
"place_id": tierheim["id"],
|
||||||
|
"longitude": get_center_coordinates(tierheim["geometry"])[0],
|
||||||
|
"latitude": get_center_coordinates(tierheim["geometry"])[1],
|
||||||
|
"name": tierheim["properties"]["name"],
|
||||||
|
"city": tierheim["properties"]["addr:city"],
|
||||||
|
"housenumber": get_or_empty(tierheim, "addr:housenumber"),
|
||||||
|
"postcode": get_or_empty(tierheim, "addr:postcode"),
|
||||||
|
"street": get_or_empty(tierheim, "addr:street"),
|
||||||
|
"countrycode": get_or_empty(tierheim, "addr:country"),
|
||||||
|
}
|
||||||
|
|
||||||
|
location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=headers)
|
||||||
|
|
||||||
|
if location_result.status_code != 201:
|
||||||
|
print(
|
||||||
|
f"Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created")
|
||||||
|
exit()
|
||||||
|
return location_result.json()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
api_token, instance, data_file = get_config()
|
api_token, area, instance, data_file, use_cached = get_config()
|
||||||
|
if not use_cached:
|
||||||
|
# Query shelters
|
||||||
|
overpass_result = get_overpass_result(area, data_file)
|
||||||
|
if overpass_result is None:
|
||||||
|
print("Error: get_overpass_result returned None")
|
||||||
|
return
|
||||||
|
print(f"Response type: {type(overpass_result)}")
|
||||||
|
print(f"Response content: {overpass_result}")
|
||||||
|
else:
|
||||||
|
with open(data_file, 'r', encoding='utf-8') as f:
|
||||||
|
overpass_result = json.load(f)
|
||||||
|
|
||||||
# Set headers and endpoint
|
# Set headers and endpoint
|
||||||
endpoint = f"{instance}/api/organizations/"
|
endpoint = f"{instance}/api/organizations/"
|
||||||
h = {'Authorization': f'Token {api_token}', "content-type": "application/json"}
|
h = {'Authorization': f'Token {api_token}', "content-type": "application/json"}
|
||||||
|
|
||||||
with open(data_file, encoding="utf8") as f:
|
tierheime = overpass_result["features"]
|
||||||
d = json.load(f)
|
|
||||||
|
|
||||||
skipped_low_quality = 0
|
for idx, tierheim in enumerate(tqdm(tierheime)):
|
||||||
|
|
||||||
tierheime = d["features"]
|
|
||||||
|
|
||||||
for idx, tierheim in enumerate(tierheime):
|
|
||||||
# Check if data is low quality
|
# Check if data is low quality
|
||||||
if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys():
|
if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys():
|
||||||
skipped_low_quality = skipped_low_quality + 1
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Load TH data in for easier accessing
|
# Load TH data in for easier accessing
|
||||||
@@ -147,8 +214,8 @@ def main():
|
|||||||
email=choose(("contact:email", "email"), tierheim["properties"]),
|
email=choose(("contact:email", "email"), tierheim["properties"]),
|
||||||
phone_number=choose(("contact:phone", "phone"), tierheim["properties"], replace=True),
|
phone_number=choose(("contact:phone", "phone"), tierheim["properties"], replace=True),
|
||||||
fediverse_profile=get_or_none(tierheim, "contact:mastodon"),
|
fediverse_profile=get_or_none(tierheim, "contact:mastodon"),
|
||||||
facebook=https(add(get_or_none(tierheim, "contact:facebook"), "facebook")),
|
facebook=https(add(get_or_empty(tierheim, "contact:facebook"), "facebook")),
|
||||||
instagram=https(add(get_or_none(tierheim, "contact:instagram"), "instagram")),
|
instagram=https(add(get_or_empty(tierheim, "contact:instagram"), "instagram")),
|
||||||
website=https(choose(("contact:website", "website"), tierheim["properties"])),
|
website=https(choose(("contact:website", "website"), tierheim["properties"])),
|
||||||
description=get_or_none(tierheim, "opening_hours"),
|
description=get_or_none(tierheim, "opening_hours"),
|
||||||
external_object_identifier=tierheim["id"],
|
external_object_identifier=tierheim["id"],
|
||||||
@@ -168,51 +235,31 @@ def main():
|
|||||||
print(f"{th_data.name} already exists as ID {org_id}.")
|
print(f"{th_data.name} already exists as ID {org_id}.")
|
||||||
org_patch_data = {"id": org_id,
|
org_patch_data = {"id": org_id,
|
||||||
"name": th_data.name}
|
"name": th_data.name}
|
||||||
|
if search_result.json()[0]["location"] is None:
|
||||||
|
location = create_location(tierheim, instance, h)
|
||||||
|
org_patch_data["location"] = location["id"]
|
||||||
|
|
||||||
add_if_available(th_data, optional_data, org_patch_data)
|
org_patch_data = add_if_available(th_data, optional_data, org_patch_data)
|
||||||
|
|
||||||
result = requests.patch(endpoint, json=org_patch_data, headers=h)
|
result = requests.patch(endpoint, json=org_patch_data, headers=h)
|
||||||
if result.status_code != 200:
|
if result.status_code != 200:
|
||||||
print(f"Updating {tierheim['properties']['name']} failed:{result.status_code} {result.json()}")
|
print(f"Updating {tierheim['properties']['name']} failed:{result.status_code} {result.json()}")
|
||||||
exit()
|
exit()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
location_data = {
|
|
||||||
"place_id": tierheim["id"],
|
|
||||||
"longitude": get_center_coordinates(tierheim["geometry"])[0],
|
|
||||||
"latitude": get_center_coordinates(tierheim["geometry"])[1],
|
|
||||||
"name": tierheim["properties"]["name"],
|
|
||||||
"city": tierheim["properties"]["addr:city"],
|
|
||||||
"housenumber": get_or_none(tierheim, "addr:housenumber"),
|
|
||||||
"postcode": get_or_none(tierheim, "addr:postcode"),
|
|
||||||
"street": get_or_none(tierheim, "addr:street"),
|
|
||||||
"countrycode": get_or_none(tierheim, "addr:country"),
|
|
||||||
}
|
|
||||||
|
|
||||||
location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=h)
|
|
||||||
|
|
||||||
if location_result.status_code != 201:
|
|
||||||
print(
|
|
||||||
f"{idx} Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
org_data = {"name": tierheim["properties"]["name"],
|
|
||||||
"location": location_result.json()["id"],
|
|
||||||
"external_object_identifier": f"{tierheim["id"]}",
|
|
||||||
"external_source_identifier": "OSM"
|
|
||||||
}
|
|
||||||
|
|
||||||
add_if_available(th_data, optional_data, org_data)
|
|
||||||
|
|
||||||
result = requests.post(endpoint, json=org_data, headers=h)
|
|
||||||
|
|
||||||
if result.status_code != 201:
|
|
||||||
print(f"{idx} {tierheim["properties"]["name"]} failed:{result.status_code} {result.json()}")
|
|
||||||
exit(1)
|
|
||||||
else:
|
else:
|
||||||
print(f"{idx} - {json.loads(result.content)["id"]} {tierheim["properties"]["name"]} created")
|
location = create_location(tierheim, instance, h)
|
||||||
|
org_data = {"name": tierheim["properties"]["name"],
|
||||||
|
"external_object_identifier": f"{tierheim["id"]}",
|
||||||
|
"external_source_identifier": "OSM",
|
||||||
|
"location": location["id"]
|
||||||
|
}
|
||||||
|
|
||||||
print(f"{skipped_low_quality} datapoints skipped for low quality ({100*skipped_low_quality / len(tierheime):.2}%)")
|
org_data = add_if_available(th_data, optional_data, org_data)
|
||||||
|
|
||||||
|
result = requests.post(endpoint, json=org_data, headers=h)
|
||||||
|
|
||||||
|
if result.status_code != 201:
|
||||||
|
print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Reference in New Issue
Block a user