Merge branch 'overpass-take-2' into develop
# Conflicts: # scripts/upload_animal_shelters.py
This commit is contained in:
		@@ -4,17 +4,25 @@ import os
 | 
			
		||||
from types import SimpleNamespace
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
# TODO: consider using OSMPythonTools instead of requests or overpass library
 | 
			
		||||
from osmtogeojson import osmtogeojson
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
 | 
			
		||||
DEFAULT_OSM_DATA_FILE = "export.geojson"
 | 
			
		||||
# Search area must be the official name, e.g. "Germany" is not a valid area name in Overpass API
 | 
			
		||||
# Consider instead finding & using the code within the query itself, e.g. "ISO3166-1"="DE"
 | 
			
		||||
DEFAULT_OVERPASS_SEARCH_AREA = "Deutschland"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_args():
 | 
			
		||||
    """Parse command-line arguments."""
 | 
			
		||||
    parser = argparse.ArgumentParser(description="Upload animal shelter data to the Notfellchen API.")
 | 
			
		||||
    parser = argparse.ArgumentParser(
 | 
			
		||||
        description="Download animal shelter data from the Overpass API to the Notfellchen API.")
 | 
			
		||||
    parser.add_argument("--api-token", type=str, help="API token for authentication.")
 | 
			
		||||
    parser.add_argument("--area", type=str, help="Area to search for animal shelters (default: Deutschland).")
 | 
			
		||||
    parser.add_argument("--instance", type=str, help="API instance URL.")
 | 
			
		||||
    parser.add_argument("--data-file", type=str, help="Path to the GeoJSON file containing (only) animal shelters.")
 | 
			
		||||
    parser.add_argument("--use-cached", action='store_true', help="Use the stored GeoJSON file")
 | 
			
		||||
    return parser.parse_args()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -23,16 +31,26 @@ def get_config():
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
 | 
			
		||||
    api_token = args.api_token or os.getenv("NOTFELLCHEN_API_TOKEN")
 | 
			
		||||
    # TODO: document new environment variable NOTFELLCHEN_AREA
 | 
			
		||||
    area = args.area or os.getenv("NOTFELLCHEN_AREA", DEFAULT_OVERPASS_SEARCH_AREA)
 | 
			
		||||
    instance = args.instance or os.getenv("NOTFELLCHEN_INSTANCE")
 | 
			
		||||
    data_file = args.data_file or os.getenv("NOTFELLCHEN_DATA_FILE", DEFAULT_OSM_DATA_FILE)
 | 
			
		||||
    use_cached = args.use_cached or os.getenv("NOTFELLCHEN_USE_CACHED", False)
 | 
			
		||||
 | 
			
		||||
    if not api_token or not instance:
 | 
			
		||||
        raise ValueError("API token and instance URL must be provided via environment variables or CLI arguments.")
 | 
			
		||||
 | 
			
		||||
    return api_token, instance, data_file
 | 
			
		||||
    return api_token, area, instance, data_file, use_cached
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_or_none(data, key):
 | 
			
		||||
    if key in data["properties"].keys():
 | 
			
		||||
        return data["properties"][key]
 | 
			
		||||
    else:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_or_empty(data, key):
 | 
			
		||||
    if key in data["properties"].keys():
 | 
			
		||||
        return data["properties"][key]
 | 
			
		||||
    else:
 | 
			
		||||
@@ -70,6 +88,27 @@ def https(value):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def calc_coordinate_center(coordinates):
 | 
			
		||||
    """
 | 
			
		||||
    Calculates the center as the arithmetic mean of the list of coordinates.
 | 
			
		||||
 | 
			
		||||
    Not perfect because earth is a sphere (citation needed) but good enough.
 | 
			
		||||
    """
 | 
			
		||||
    if not coordinates:
 | 
			
		||||
        return None, None
 | 
			
		||||
 | 
			
		||||
    lon_sum = 0.0
 | 
			
		||||
    lat_sum = 0.0
 | 
			
		||||
    count = 0
 | 
			
		||||
 | 
			
		||||
    for lon, lat in coordinates:
 | 
			
		||||
        lon_sum += lon
 | 
			
		||||
        lat_sum += lat
 | 
			
		||||
        count += 1
 | 
			
		||||
 | 
			
		||||
    return lon_sum / count, lat_sum / count
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_center_coordinates(geometry):
 | 
			
		||||
    """
 | 
			
		||||
    Given a GeoJSON geometry dict, return (longitude, latitude)
 | 
			
		||||
@@ -93,25 +132,25 @@ def get_center_coordinates(geometry):
 | 
			
		||||
        raise ValueError(f"Unsupported geometry type: {geom_type}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def calc_coordinate_center(coordinates):
 | 
			
		||||
    """
 | 
			
		||||
    Calculates the center as the arithmetic mean of the list of coordinates.
 | 
			
		||||
 | 
			
		||||
    Not perfect because earth is a sphere (citation needed) but good enough.
 | 
			
		||||
    """
 | 
			
		||||
    if not coordinates:
 | 
			
		||||
        return None, None
 | 
			
		||||
 | 
			
		||||
    lon_sum = 0.0
 | 
			
		||||
    lat_sum = 0.0
 | 
			
		||||
    count = 0
 | 
			
		||||
 | 
			
		||||
    for lon, lat in coordinates:
 | 
			
		||||
        lon_sum += lon
 | 
			
		||||
        lat_sum += lat
 | 
			
		||||
        count += 1
 | 
			
		||||
 | 
			
		||||
    return lon_sum / count, lat_sum / count
 | 
			
		||||
# TODO: take note of new get_overpass_result function which does the bulk of the new overpass query work
 | 
			
		||||
def get_overpass_result(area, data_file):
 | 
			
		||||
    """Build the Overpass query for fetching animal shelters in the specified area."""
 | 
			
		||||
    overpass_endpoint = "https://overpass-api.de/api/interpreter"
 | 
			
		||||
    overpass_query = f"""
 | 
			
		||||
        [out:json][timeout:25];
 | 
			
		||||
        area[name="{area}"]->.searchArea;
 | 
			
		||||
        nwr["amenity"="animal_shelter"](area.searchArea);
 | 
			
		||||
        out body;
 | 
			
		||||
        >;
 | 
			
		||||
        out skel qt;
 | 
			
		||||
        """
 | 
			
		||||
    r = requests.get(overpass_endpoint, params={'data': overpass_query})
 | 
			
		||||
    if r.status_code == 200:
 | 
			
		||||
        rjson = r.json()
 | 
			
		||||
        result = osmtogeojson.process_osm_json(rjson)
 | 
			
		||||
        with open(data_file, 'w', encoding='utf-8') as f:
 | 
			
		||||
            json.dump(result, f, ensure_ascii=False)
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def add_if_available(base_data, keys, result):
 | 
			
		||||
@@ -122,23 +161,51 @@ def add_if_available(base_data, keys, result):
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def create_location(tierheim, instance, headers):
 | 
			
		||||
    location_data = {
 | 
			
		||||
        "place_id": tierheim["id"],
 | 
			
		||||
        "longitude": get_center_coordinates(tierheim["geometry"])[0],
 | 
			
		||||
        "latitude": get_center_coordinates(tierheim["geometry"])[1],
 | 
			
		||||
        "name": tierheim["properties"]["name"],
 | 
			
		||||
        "city": tierheim["properties"]["addr:city"],
 | 
			
		||||
        "housenumber": get_or_empty(tierheim, "addr:housenumber"),
 | 
			
		||||
        "postcode": get_or_empty(tierheim, "addr:postcode"),
 | 
			
		||||
        "street": get_or_empty(tierheim, "addr:street"),
 | 
			
		||||
        "countrycode": get_or_empty(tierheim, "addr:country"),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=headers)
 | 
			
		||||
 | 
			
		||||
    if location_result.status_code != 201:
 | 
			
		||||
        print(
 | 
			
		||||
            f"Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created")
 | 
			
		||||
        exit()
 | 
			
		||||
    return location_result.json()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    api_token, instance, data_file = get_config()
 | 
			
		||||
    api_token, area, instance, data_file, use_cached = get_config()
 | 
			
		||||
    if not use_cached:
 | 
			
		||||
        # Query shelters
 | 
			
		||||
        overpass_result = get_overpass_result(area, data_file)
 | 
			
		||||
        if overpass_result is None:
 | 
			
		||||
            print("Error: get_overpass_result returned None")
 | 
			
		||||
            return
 | 
			
		||||
        print(f"Response type: {type(overpass_result)}")
 | 
			
		||||
        print(f"Response content: {overpass_result}")
 | 
			
		||||
    else:
 | 
			
		||||
        with open(data_file, 'r', encoding='utf-8') as f:
 | 
			
		||||
            overpass_result = json.load(f)
 | 
			
		||||
 | 
			
		||||
    # Set headers and endpoint
 | 
			
		||||
    endpoint = f"{instance}/api/organizations/"
 | 
			
		||||
    h = {'Authorization': f'Token {api_token}', "content-type": "application/json"}
 | 
			
		||||
 | 
			
		||||
    with open(data_file, encoding="utf8") as f:
 | 
			
		||||
        d = json.load(f)
 | 
			
		||||
    tierheime = overpass_result["features"]
 | 
			
		||||
 | 
			
		||||
    skipped_low_quality = 0
 | 
			
		||||
 | 
			
		||||
    tierheime = d["features"]
 | 
			
		||||
 | 
			
		||||
    for idx, tierheim in enumerate(tierheime):
 | 
			
		||||
    for idx, tierheim in enumerate(tqdm(tierheime)):
 | 
			
		||||
        # Check if data is low quality
 | 
			
		||||
        if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys():
 | 
			
		||||
            skipped_low_quality = skipped_low_quality + 1
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        # Load TH data in for easier accessing
 | 
			
		||||
@@ -147,8 +214,8 @@ def main():
 | 
			
		||||
            email=choose(("contact:email", "email"), tierheim["properties"]),
 | 
			
		||||
            phone_number=choose(("contact:phone", "phone"), tierheim["properties"], replace=True),
 | 
			
		||||
            fediverse_profile=get_or_none(tierheim, "contact:mastodon"),
 | 
			
		||||
            facebook=https(add(get_or_none(tierheim, "contact:facebook"), "facebook")),
 | 
			
		||||
            instagram=https(add(get_or_none(tierheim, "contact:instagram"), "instagram")),
 | 
			
		||||
            facebook=https(add(get_or_empty(tierheim, "contact:facebook"), "facebook")),
 | 
			
		||||
            instagram=https(add(get_or_empty(tierheim, "contact:instagram"), "instagram")),
 | 
			
		||||
            website=https(choose(("contact:website", "website"), tierheim["properties"])),
 | 
			
		||||
            description=get_or_none(tierheim, "opening_hours"),
 | 
			
		||||
            external_object_identifier=tierheim["id"],
 | 
			
		||||
@@ -168,51 +235,31 @@ def main():
 | 
			
		||||
            print(f"{th_data.name} already exists as ID {org_id}.")
 | 
			
		||||
            org_patch_data = {"id": org_id,
 | 
			
		||||
                              "name": th_data.name}
 | 
			
		||||
            if search_result.json()[0]["location"] is None:
 | 
			
		||||
                location = create_location(tierheim, instance, h)
 | 
			
		||||
                org_patch_data["location"] = location["id"]
 | 
			
		||||
 | 
			
		||||
            add_if_available(th_data, optional_data, org_patch_data)
 | 
			
		||||
            org_patch_data = add_if_available(th_data, optional_data, org_patch_data)
 | 
			
		||||
 | 
			
		||||
            result = requests.patch(endpoint, json=org_patch_data, headers=h)
 | 
			
		||||
            if result.status_code != 200:
 | 
			
		||||
                print(f"Updating {tierheim['properties']['name']} failed:{result.status_code} {result.json()}")
 | 
			
		||||
                exit()
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        location_data = {
 | 
			
		||||
            "place_id": tierheim["id"],
 | 
			
		||||
            "longitude": get_center_coordinates(tierheim["geometry"])[0],
 | 
			
		||||
            "latitude": get_center_coordinates(tierheim["geometry"])[1],
 | 
			
		||||
            "name": tierheim["properties"]["name"],
 | 
			
		||||
            "city": tierheim["properties"]["addr:city"],
 | 
			
		||||
            "housenumber": get_or_none(tierheim, "addr:housenumber"),
 | 
			
		||||
            "postcode": get_or_none(tierheim, "addr:postcode"),
 | 
			
		||||
            "street": get_or_none(tierheim, "addr:street"),
 | 
			
		||||
            "countrycode": get_or_none(tierheim, "addr:country"),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        location_result = requests.post(f"{instance}/api/locations/", json=location_data, headers=h)
 | 
			
		||||
 | 
			
		||||
        if location_result.status_code != 201:
 | 
			
		||||
            print(
 | 
			
		||||
                f"{idx} Location for {tierheim["properties"]["name"]}:{location_result.status_code} {location_result.json()} not created")
 | 
			
		||||
            exit()
 | 
			
		||||
 | 
			
		||||
        org_data = {"name": tierheim["properties"]["name"],
 | 
			
		||||
                    "location": location_result.json()["id"],
 | 
			
		||||
                    "external_object_identifier": f"{tierheim["id"]}",
 | 
			
		||||
                    "external_source_identifier": "OSM"
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
        add_if_available(th_data, optional_data, org_data)
 | 
			
		||||
 | 
			
		||||
        result = requests.post(endpoint, json=org_data, headers=h)
 | 
			
		||||
 | 
			
		||||
        if result.status_code != 201:
 | 
			
		||||
            print(f"{idx} {tierheim["properties"]["name"]} failed:{result.status_code} {result.json()}")
 | 
			
		||||
            exit(1)
 | 
			
		||||
        else:
 | 
			
		||||
            print(f"{idx} - {json.loads(result.content)["id"]} {tierheim["properties"]["name"]} created")
 | 
			
		||||
            location = create_location(tierheim, instance, h)
 | 
			
		||||
            org_data = {"name": tierheim["properties"]["name"],
 | 
			
		||||
                        "external_object_identifier": f"{tierheim["id"]}",
 | 
			
		||||
                        "external_source_identifier": "OSM",
 | 
			
		||||
                        "location": location["id"]
 | 
			
		||||
                        }
 | 
			
		||||
 | 
			
		||||
    print(f"{skipped_low_quality} datapoints skipped for low quality ({100*skipped_low_quality / len(tierheime):.2}%)")
 | 
			
		||||
            org_data = add_if_available(th_data, optional_data, org_data)
 | 
			
		||||
 | 
			
		||||
            result = requests.post(endpoint, json=org_data, headers=h)
 | 
			
		||||
 | 
			
		||||
            if result.status_code != 201:
 | 
			
		||||
                print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user