""" This script downloads GBIF data using the gbif_dl library. Important: This script requires an internet connection. Information about the species used in the thesis: Specie: Corylus L. (hazelnut) taxonKey = 2875967 License: CC BY 4.0 Specie: Cynara Cardunculus L. (artichoke) taxonKey = 3112364 License: CC BY 4.0 """ import gbif_dl import json import os def get_gbif_data(): """ Download GBIF data for Corylus L. with CC BY 4.0 license. """ try: print("Configuring GBIF query") query = { "taxonKey": [3112364], # Taxon Key for the specified species "license": ["CC_BY_4_0"] # Filter only by CC BY 4.0 license } print("Generating download URLs") # Generate data URLs data_gen = gbif_dl.api.generate_urls( queries=query, label="taxonKey", nb_samples=8000, # The first iterations were with 100 images, just to test ) # Create directory CORRECTLY (without leading slash) dataset_dir = "dataset_gbif_artichoke" os.makedirs(dataset_dir, exist_ok=True) print(f"Directory '{dataset_dir}' created or verified") metadata_list = [] download_count = 0 print("Starting image download") # Iterate over every item for i, item in enumerate(data_gen, 1): try: print(f"Processing image {i}...") metadata_list.append(item) # Use the simplest working method gbif_dl.dl_async.download([item], root=dataset_dir) download_count += 1 print(f"Image {i} downloaded successfully") except Exception as e: print(f"Error in image {i}: {str(e)[:100]}...") continue # Save metadata print("Saving metadata...") metadata_file = os.path.join(dataset_dir, "metadata.json") with open(metadata_file, "w", encoding="utf-8") as f: json.dump(metadata_list, f, indent=2, ensure_ascii=False) print(f"Process completed:") print(f" Images downloaded: {download_count}") print(f" Metadata saved in: {metadata_file}") return download_count > 0 except Exception as e: print(f"Error: {e}") return False def main(): """ Main function of the script. """ print("STARTING GBIF DATA DOWNLOAD") print("=" * 50) # Execute download success = get_gbif_data() if success: print("\nProcess finished, please review the results in the' folder for the downloaded images") else: print("\n The process failed, please check the error messages above.") if __name__ == "__main__": main()