97 lines
2.7 KiB
Python
97 lines
2.7 KiB
Python
"""
|
|
This script downloads GBIF data using the gbif_dl library.
|
|
Important: This script requires an internet connection.
|
|
|
|
Information about the species used in the thesis:
|
|
|
|
Specie: Corylus L. (hazelnut)
|
|
taxonKey = 2875967
|
|
License: CC BY 4.0
|
|
Specie: Cynara Cardunculus L. (artichoke)
|
|
taxonKey = 3112364
|
|
License: CC BY 4.0
|
|
"""
|
|
|
|
import gbif_dl
|
|
import json
|
|
import os
|
|
|
|
def get_gbif_data():
|
|
"""
|
|
Download GBIF data for Corylus L. with CC BY 4.0 license.
|
|
"""
|
|
try:
|
|
print("Configuring GBIF query")
|
|
|
|
query = {
|
|
"taxonKey": [3112364], # Taxon Key for the specified species
|
|
"license": ["CC_BY_4_0"] # Filter only by CC BY 4.0 license
|
|
}
|
|
|
|
print("Generating download URLs")
|
|
|
|
# Generate data URLs
|
|
data_gen = gbif_dl.api.generate_urls(
|
|
queries=query,
|
|
label="taxonKey",
|
|
nb_samples=8000, # The first iterations were with 100 images, just to test
|
|
)
|
|
|
|
# Create directory CORRECTLY (without leading slash)
|
|
dataset_dir = "dataset_gbif_artichoke"
|
|
os.makedirs(dataset_dir, exist_ok=True)
|
|
print(f"Directory '{dataset_dir}' created or verified")
|
|
|
|
metadata_list = []
|
|
download_count = 0
|
|
|
|
print("Starting image download")
|
|
|
|
# Iterate over every item
|
|
for i, item in enumerate(data_gen, 1):
|
|
try:
|
|
print(f"Processing image {i}...")
|
|
metadata_list.append(item)
|
|
|
|
# Use the simplest working method
|
|
gbif_dl.dl_async.download([item], root=dataset_dir)
|
|
download_count += 1
|
|
print(f"Image {i} downloaded successfully")
|
|
|
|
except Exception as e:
|
|
print(f"Error in image {i}: {str(e)[:100]}...")
|
|
continue
|
|
|
|
# Save metadata
|
|
print("Saving metadata...")
|
|
metadata_file = os.path.join(dataset_dir, "metadata.json")
|
|
with open(metadata_file, "w", encoding="utf-8") as f:
|
|
json.dump(metadata_list, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Process completed:")
|
|
print(f" Images downloaded: {download_count}")
|
|
print(f" Metadata saved in: {metadata_file}")
|
|
|
|
return download_count > 0
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""
|
|
Main function of the script.
|
|
"""
|
|
print("STARTING GBIF DATA DOWNLOAD")
|
|
print("=" * 50)
|
|
|
|
# Execute download
|
|
success = get_gbif_data()
|
|
|
|
if success:
|
|
print("\nProcess finished, please review the results in the' folder for the downloaded images")
|
|
else:
|
|
print("\n The process failed, please check the error messages above.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |