# ============================================================================= # SCRIPT OF EXECUTION - ADVANCED CLUSTERING # ============================================================================= """ Helper script to run advanced clustering analysis with different configurations and datasets. """ import subprocess import sys import os def print_menu(): """Show options menu""" print("\n" + "="*80) print(" UNSUPERVISED CLUSTERING - NOCCIOLA") print("="*80) print("\nAvailable options:") print(" 1. Run full analysis (ResNet50 + K-Means + Hierarchical)") print(" 2. View previous results") print(" 3. Install necessary dependencies") print(" 4. Verify configuration") print(" 5. Exit") print("="*80) def install_dependencies(): """Install necessary dependencies""" print("\nInstalling dependencies...") dependencies = [ 'tensorflow', 'scikit-learn', 'pandas', 'numpy', 'matplotlib', 'seaborn', 'tqdm', 'umap-learn', 'pillow' ] for dep in dependencies: print(f"Installing {dep}...") subprocess.run([sys.executable, '-m', 'pip', 'install', dep], check=False) print("Dependencies installed") def verify_configuration(): """Verify that the configuration is correct""" print("\nVerifying configuration...") csv_path = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF\tags.csv' images_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF' # Verify CSV if os.path.exists(csv_path): print(f"CSV found: {csv_path}") else: print(f"CSV NOT found: {csv_path}") return False # Verify images directory if os.path.exists(images_dir): # Count images img_count = 0 for root, dirs, files in os.walk(images_dir): img_count += sum(1 for f in files if f.lower().endswith(('.jpg', '.jpeg', '.png'))) print(f"Images directory found: {images_dir}") print(f" Images found: {img_count}") else: print(f"Images directory NOT found: {images_dir}") return False # Verify TensorFlow try: import tensorflow as tf print(f"TensorFlow installed: {tf.__version__}") except ImportError: print("TensorFlow NOT installed") return False # Verify GPU gpus = tf.config.list_physical_devices('GPU') if gpus: print(f"GPU available: {len(gpus)} GPU(s)") else: print("GPU not available, CPU will be used (slower)") return True def run_clustering(): """Run clustering analysis""" print("\nRunning clustering analysis...") print("This may take several minutes depending on the number of images...") script_path = os.path.join( os.path.dirname(__file__), 'Clustering_Avanzado.py' ) try: subprocess.run([sys.executable, script_path], check=True) print("\nAnalysis completed successfully!") return True except subprocess.CalledProcessError as e: print(f"\nError during execution: {e}") return False def view_results(): """View previous results""" output_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Nocciola\GBIF\results_clustering_avanzado' if not os.path.exists(output_dir): print("\nNo previous results found. Please run the analysis first.") return print(f"\nResults directory: {output_dir}") print("\nFiles found:") files = os.listdir(output_dir) for f in sorted(files): size = os.path.getsize(os.path.join(output_dir, f)) size_mb = size / (1024 * 1024) print(f" - {f} ({size_mb:.2f} MB)") # Show main CSV if it exists csv_path = os.path.join(output_dir, 'results_clustering.csv') if os.path.exists(csv_path): import pandas as pd df = pd.read_csv(csv_path) print(f"\nPreview of results_clustering.csv:") print(df.head(10)) print(f"\nTotal rows: {len(df)}") print(f"Columns: {list(df.columns)}") # Show cluster distribution if 'cluster_kmeans' in df.columns: print(f"\nCluster distribution (K-Means):") print(df['cluster_kmeans'].value_counts().sort_index()) def main(): """Main function""" while True: print_menu() choice = input("\nSelect an option (1-5): ").strip() if choice == '1': if verify_configuration(): run_clustering() else: print("\nThere are configuration issues. Please fix them before continuing.") elif choice == '2': view_results() elif choice == '3': install_dependencies() elif choice == '4': verify_configuration() elif choice == '5': print("\nGoodbye!") break else: print("\nInvalid option. Please try again.") input("\nPress Enter to continue...") if __name__ == '__main__': main()