173 lines
5.1 KiB
Python
173 lines
5.1 KiB
Python
# =============================================================================
|
|
# SCRIPT OF EXECUTION - ADVANCED CLUSTERING
|
|
# =============================================================================
|
|
"""
|
|
Helper script to run advanced clustering analysis
|
|
with different configurations and datasets.
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
|
|
def print_menu():
|
|
"""Show options menu"""
|
|
print("\n" + "="*80)
|
|
print(" UNSUPERVISED CLUSTERING - NOCCIOLA")
|
|
print("="*80)
|
|
print("\nAvailable options:")
|
|
print(" 1. Run full analysis (ResNet50 + K-Means + Hierarchical)")
|
|
print(" 2. View previous results")
|
|
print(" 3. Install necessary dependencies")
|
|
print(" 4. Verify configuration")
|
|
print(" 5. Exit")
|
|
print("="*80)
|
|
|
|
def install_dependencies():
|
|
"""Install necessary dependencies"""
|
|
print("\nInstalling dependencies...")
|
|
|
|
dependencies = [
|
|
'tensorflow',
|
|
'scikit-learn',
|
|
'pandas',
|
|
'numpy',
|
|
'matplotlib',
|
|
'seaborn',
|
|
'tqdm',
|
|
'umap-learn',
|
|
'pillow'
|
|
]
|
|
|
|
for dep in dependencies:
|
|
print(f"Installing {dep}...")
|
|
subprocess.run([sys.executable, '-m', 'pip', 'install', dep], check=False)
|
|
|
|
print("Dependencies installed")
|
|
|
|
def verify_configuration():
|
|
"""Verify that the configuration is correct"""
|
|
print("\nVerifying configuration...")
|
|
|
|
csv_path = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF\tags.csv'
|
|
images_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Carciofo\Robo_GBIF'
|
|
|
|
# Verify CSV
|
|
if os.path.exists(csv_path):
|
|
print(f"CSV found: {csv_path}")
|
|
else:
|
|
print(f"CSV NOT found: {csv_path}")
|
|
return False
|
|
|
|
# Verify images directory
|
|
if os.path.exists(images_dir):
|
|
# Count images
|
|
img_count = 0
|
|
for root, dirs, files in os.walk(images_dir):
|
|
img_count += sum(1 for f in files if f.lower().endswith(('.jpg', '.jpeg', '.png')))
|
|
print(f"Images directory found: {images_dir}")
|
|
print(f" Images found: {img_count}")
|
|
else:
|
|
print(f"Images directory NOT found: {images_dir}")
|
|
return False
|
|
|
|
# Verify TensorFlow
|
|
try:
|
|
import tensorflow as tf
|
|
print(f"TensorFlow installed: {tf.__version__}")
|
|
except ImportError:
|
|
print("TensorFlow NOT installed")
|
|
return False
|
|
|
|
# Verify GPU
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
if gpus:
|
|
print(f"GPU available: {len(gpus)} GPU(s)")
|
|
else:
|
|
print("GPU not available, CPU will be used (slower)")
|
|
|
|
return True
|
|
|
|
def run_clustering():
|
|
"""Run clustering analysis"""
|
|
print("\nRunning clustering analysis...")
|
|
print("This may take several minutes depending on the number of images...")
|
|
|
|
script_path = os.path.join(
|
|
os.path.dirname(__file__),
|
|
'Clustering_Avanzado.py'
|
|
)
|
|
|
|
try:
|
|
subprocess.run([sys.executable, script_path], check=True)
|
|
print("\nAnalysis completed successfully!")
|
|
return True
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"\nError during execution: {e}")
|
|
return False
|
|
|
|
def view_results():
|
|
"""View previous results"""
|
|
output_dir = r'C:\Users\sof12\Desktop\ML\Datasets\Nocciola\GBIF\results_clustering_avanzado'
|
|
|
|
if not os.path.exists(output_dir):
|
|
print("\nNo previous results found. Please run the analysis first.")
|
|
return
|
|
|
|
print(f"\nResults directory: {output_dir}")
|
|
print("\nFiles found:")
|
|
|
|
files = os.listdir(output_dir)
|
|
for f in sorted(files):
|
|
size = os.path.getsize(os.path.join(output_dir, f))
|
|
size_mb = size / (1024 * 1024)
|
|
print(f" - {f} ({size_mb:.2f} MB)")
|
|
|
|
# Show main CSV if it exists
|
|
csv_path = os.path.join(output_dir, 'results_clustering.csv')
|
|
if os.path.exists(csv_path):
|
|
import pandas as pd
|
|
df = pd.read_csv(csv_path)
|
|
print(f"\nPreview of results_clustering.csv:")
|
|
print(df.head(10))
|
|
print(f"\nTotal rows: {len(df)}")
|
|
print(f"Columns: {list(df.columns)}")
|
|
|
|
# Show cluster distribution
|
|
if 'cluster_kmeans' in df.columns:
|
|
print(f"\nCluster distribution (K-Means):")
|
|
print(df['cluster_kmeans'].value_counts().sort_index())
|
|
|
|
def main():
|
|
"""Main function"""
|
|
while True:
|
|
print_menu()
|
|
choice = input("\nSelect an option (1-5): ").strip()
|
|
|
|
if choice == '1':
|
|
if verify_configuration():
|
|
run_clustering()
|
|
else:
|
|
print("\nThere are configuration issues. Please fix them before continuing.")
|
|
|
|
elif choice == '2':
|
|
view_results()
|
|
|
|
elif choice == '3':
|
|
install_dependencies()
|
|
|
|
elif choice == '4':
|
|
verify_configuration()
|
|
|
|
elif choice == '5':
|
|
print("\nGoodbye!")
|
|
break
|
|
|
|
else:
|
|
print("\nInvalid option. Please try again.")
|
|
|
|
input("\nPress Enter to continue...")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|