Phenology/Code/Unsupervised_learning/run_clustering.py
2025-11-25 11:30:37 +01:00

173 lines
5.1 KiB
Python

# =============================================================================
# SCRIPT OF EXECUTION - ADVANCED CLUSTERING
# =============================================================================
"""
Helper script to run advanced clustering analysis
with different configurations and datasets.
"""
import subprocess
import sys
import os
def print_menu():
"""Show options menu"""
print("\n" + "="*80)
print(" UNSUPERVISED CLUSTERING - NOCCIOLA")
print("="*80)
print("\nAvailable options:")
print(" 1. Run full analysis (ResNet50 + K-Means + Hierarchical)")
print(" 2. View previous results")
print(" 3. Install necessary dependencies")
print(" 4. Verify configuration")
print(" 5. Exit")
print("="*80)
def install_dependencies():
"""Install necessary dependencies"""
print("\nInstalling dependencies...")
dependencies = [
'tensorflow',
'scikit-learn',
'pandas',
'numpy',
'matplotlib',
'seaborn',
'tqdm',
'umap-learn',
'pillow'
]
for dep in dependencies:
print(f"Installing {dep}...")
subprocess.run([sys.executable, '-m', 'pip', 'install', dep], check=False)
print("Dependencies installed")
def verify_configuration():
"""Verify that the configuration is correct"""
print("\nVerifying configuration...")
csv_path = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF\tags.csv'
images_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF'
# Verify CSV
if os.path.exists(csv_path):
print(f"CSV found: {csv_path}")
else:
print(f"CSV NOT found: {csv_path}")
return False
# Verify images directory
if os.path.exists(images_dir):
# Count images
img_count = 0
for root, dirs, files in os.walk(images_dir):
img_count += sum(1 for f in files if f.lower().endswith(('.jpg', '.jpeg', '.png')))
print(f"Images directory found: {images_dir}")
print(f" Images found: {img_count}")
else:
print(f"Images directory NOT found: {images_dir}")
return False
# Verify TensorFlow
try:
import tensorflow as tf
print(f"TensorFlow installed: {tf.__version__}")
except ImportError:
print("TensorFlow NOT installed")
return False
# Verify GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
print(f"GPU available: {len(gpus)} GPU(s)")
else:
print("GPU not available, CPU will be used (slower)")
return True
def run_clustering():
"""Run clustering analysis"""
print("\nRunning clustering analysis...")
print("This may take several minutes depending on the number of images...")
script_path = os.path.join(
os.path.dirname(__file__),
'Clustering_Avanzado.py'
)
try:
subprocess.run([sys.executable, script_path], check=True)
print("\nAnalysis completed successfully!")
return True
except subprocess.CalledProcessError as e:
print(f"\nError during execution: {e}")
return False
def view_results():
"""View previous results"""
output_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Nocciola\GBIF\results_clustering_avanzado'
if not os.path.exists(output_dir):
print("\nNo previous results found. Please run the analysis first.")
return
print(f"\nResults directory: {output_dir}")
print("\nFiles found:")
files = os.listdir(output_dir)
for f in sorted(files):
size = os.path.getsize(os.path.join(output_dir, f))
size_mb = size / (1024 * 1024)
print(f" - {f} ({size_mb:.2f} MB)")
# Show main CSV if it exists
csv_path = os.path.join(output_dir, 'results_clustering.csv')
if os.path.exists(csv_path):
import pandas as pd
df = pd.read_csv(csv_path)
print(f"\nPreview of results_clustering.csv:")
print(df.head(10))
print(f"\nTotal rows: {len(df)}")
print(f"Columns: {list(df.columns)}")
# Show cluster distribution
if 'cluster_kmeans' in df.columns:
print(f"\nCluster distribution (K-Means):")
print(df['cluster_kmeans'].value_counts().sort_index())
def main():
"""Main function"""
while True:
print_menu()
choice = input("\nSelect an option (1-5): ").strip()
if choice == '1':
if verify_configuration():
run_clustering()
else:
print("\nThere are configuration issues. Please fix them before continuing.")
elif choice == '2':
view_results()
elif choice == '3':
install_dependencies()
elif choice == '4':
verify_configuration()
elif choice == '5':
print("\nGoodbye!")
break
else:
print("\nInvalid option. Please try again.")
input("\nPress Enter to continue...")
if __name__ == '__main__':
main()