diff --git a/scripts/upload_animal_shelters.py b/scripts/upload_animal_shelters.py index 2dabe57..26414a3 100644 --- a/scripts/upload_animal_shelters.py +++ b/scripts/upload_animal_shelters.py @@ -1,10 +1,10 @@ import argparse import json import os - import requests +from tqdm import tqdm -DEFAULT_OSM_DATA_FILE = "osm_data.geojson" +DEFAULT_OSM_DATA_FILE = "export.geojson" def parse_args(): @@ -30,67 +30,75 @@ def get_config(): return api_token, instance, data_file -def load_osm_data(file_path): - """Load OSM data from a GeoJSON file.""" - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - return data - - -def load_osm_data(file_path): - #Load OSM data from a GeoJSON file. - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - return data - - -def transform_osm_data(feature): - #Transform a single OSM feature into the API payload format - prop = feature.get("properties", {}) - geometry = feature.get("geometry", {}) - - return { - "name": prop.get("name", "Unnamed Shelter"), - "phone": prop.get("phone"), - "website": prop.get("website"), - "opening_hours": prop.get("opening_hours"), - "email": prop.get("email"), - "location_string": f'{prop.get("addr:street", "")} {prop.get("addr:housenumber", "")} {prop.get("addr:postcode", "")} {prop.get("addr:city", "")}', - "external_object_id": prop.get("@id"), - "external_source_id": "OSM" - } - - -def send_to_api(data, endpoint, headers): - # Send transformed data to the Notfellchen API. - response = requests.post(endpoint, headers=headers, json=data) - if response.status_code == 201: - print(f"Success: Shelter '{data['name']}' uploaded.") - elif response.status_code == 400: - print(f"Error: Shelter '{data['name']}' already exists or invalid data. {response.text}") +def get_or_none(data, key): + if key in data["properties"].keys(): + return data["properties"][key] else: - print(f"Unexpected Error: {response.status_code} - {response.text}") - raise ConnectionError + return "" + + +def choose(keys, data, replace=False): + for key in keys: + if key in data.keys(): + if replace: + return data[key].replace(" ", "").replace("-", "").replace("(", "").replace(")", "") + else: + return data[key] + return None + + +def add(value, platform): + if value != "": + if value.find(platform) == -1: + return f"https://www.{platform}.com/{value}" + else: + return value + else: + return None + + +def https(value): + if value is not None and value != "": + value = value.replace("http://", "") + if value.find("https") == -1: + return f"https://{value}" + else: + return value + else: + return None def main(): - # Get configuration api_token, instance, data_file = get_config() - # Set headers and endpoint endpoint = f"{instance}/api/organizations/" - headers = { - "Authorization": f"Token {api_token}", - "Content-Type": "application/json" - } + h = {'Authorization': f'Token {api_token}', "content-type": "application/json"} - # Step 1: Load OSM data - osm_data = load_osm_data(data_file) + with open(data_file, encoding="utf8") as f: + d = json.load(f) - # Step 2: Process each shelter and send it to the API - for feature in osm_data.get("features", []): - shelter_data = transform_osm_data(feature) - send_to_api(shelter_data, endpoint, headers) + for idx, tierheim in tqdm(enumerate(d["features"])): + + if "name" not in tierheim["properties"].keys() or "addr:city" not in tierheim["properties"].keys(): + continue + + data = {"name": tierheim["properties"]["name"], + "location_string": f"{get_or_none(tierheim, "addr:street")} {get_or_none(tierheim, "addr:housenumber")}, {get_or_none(tierheim, "addr:postcode")} {tierheim["properties"]["addr:city"]}", + "phone_number": choose(("contact:phone", "phone"), tierheim["properties"], replace=True), + "fediverse_profile": get_or_none(tierheim, "contact:mastodon"), + "facebook": https(add(get_or_none(tierheim, "contact:facebook"), "facebook")), + "instagram": https(add(get_or_none(tierheim, "contact:instagram"), "instagram")), + "website": https(choose(("contact:website", "website"), tierheim["properties"])), + "email": choose(("contact:email", "email"), tierheim["properties"]), + "description": get_or_none(tierheim, "opening_hours"), + "external_object_identifier": f"{tierheim["id"]}", + "external_source_identifier": "OSM" + } + + result = requests.post(endpoint, json=data, headers=h) + + if result.status_code != 201: + print(f"{idx} {tierheim["properties"]["name"]}:{result.status_code} {result.json()}") if __name__ == "__main__":