import os
import sys
import subprocess
import glob
import json
import datetime
import shlex
import re
import time
import argparse

# --- CONFIGURATION ---

# [CUSTOMIZE PYTHON VERSION HERE]
TARGET_PYTHON = "python3.9"

DOMAIN_BASE = "https://walmart.uscareers.co.com"

# File names
STATE_FILE = "automation_state.json"
LOG_FILE = "automation.log"
ROBOTS_FILE = "robots.txt"

# Set to True to delete intermediate files like all-schemas.ndjson to save disk/cache
CLEANUP_TEMP_FILES = True

# Improver Settings
IMPROVER_MODES = [
    [], # (A) Plain
    ["--enable_salary_adjustment"], # (B)
    ["--enable_salary_adjustment", "--no-change-in-title", "--seed", "52"] # (C)
]

def get_python_command():
    """Determines the correct python command to use."""
    if TARGET_PYTHON:
        return shlex.split(TARGET_PYTHON)
    return [sys.executable]

def load_state():
    """Loads the last used index for schemas and improvers."""
    if os.path.exists(STATE_FILE):
        with open(STATE_FILE, 'r') as f:
            return json.load(f)
    else:
        return {
            "last_schema_index": 0,
            "last_improver_num": 0,
            "last_mode_index": -1
        }

def save_state(state):
    """Saves the current state to a JSON file."""
    with open(STATE_FILE, 'w') as f:
        json.dump(state, f)

def log_work(message):
    """Writes a message to the log file with a timestamp."""
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(LOG_FILE, 'a', encoding='utf-8') as f:
        f.write(f"[{timestamp}] {message}\n")
    print(f"[{timestamp}] {message}")

def run_command(script_name, args=[]):
    """
    Executes a python script directly connected to the terminal.
    """
    python_cmd = get_python_command()
    full_cmd = python_cmd + [script_name] + args
    
    cmd_str = " ".join(full_cmd)
    print(f"Running: {cmd_str}")
    
    env = os.environ.copy()
    env["PYTHONIOENCODING"] = "utf-8"
    
    result = subprocess.run(
        full_cmd,
        env=env
    )

    if result.returncode != 0:
        log_work(f"ERROR running command: {cmd_str}")
        log_work(f"Exit Code: {result.returncode}")
        sys.exit(1)
        
    return result.returncode

def cleanup_file(filename):
    """Safely removes a file if it exists."""
    if CLEANUP_TEMP_FILES and os.path.exists(filename):
        try:
            os.remove(filename)
            log_work(f"Cleaned up temporary file: {filename}")
        except Exception as e:
            print(f"Warning: Could not delete {filename}: {e}")

def update_robots_txt():
    """
    Scans for XML files using a generator (iglob) and streams writes to robots.txt.
    """
    xml_iterator = glob.iglob("*.xml")
    
    sitemap_count = 0
    
    with open(ROBOTS_FILE, 'w', encoding='utf-8') as f:
        f.write("User-agent: *\n")
        f.write("Allow: /\n")
        
        for xml_file in xml_iterator:
            if xml_file.lower() == "feed.xml":
                continue
                
            sitemap_url = f"{DOMAIN_BASE}/{xml_file}"
            f.write(f"Sitemap: {sitemap_url}\n")
            sitemap_count += 1
    
    log_work(f"Updated {ROBOTS_FILE} with {sitemap_count} sitemaps (excluded feed.xml).")

def get_available_improvers():
    """
    Scans the current directory for files matching '10-*-improver.py'.
    Returns a sorted list of integer numbers found in the filenames.
    """
    files = glob.glob("10-*-improver.py")
    numbers = []
    for f in files:
        match = re.search(r"10-(\d+)-improver\.py", f)
        if match:
            numbers.append(int(match.group(1)))
    
    return sorted(numbers)

def validate_dependencies(required_files):
    """
    Checks if all required files exist before starting execution.
    """
    missing = []
    for f in required_files:
        if not os.path.exists(f):
            missing.append(f)
    
    if missing:
        log_work("CRITICAL: Missing required files:")
        for m in missing:
            print(f"  - {m}")
        log_work("Aborting run to prevent mid-process failure.")
        sys.exit(1)

def parse_arguments():
    """Parses command line arguments using argparse."""
    parser = argparse.ArgumentParser(description="Master automation script.")
    parser.add_argument("--force-old", action="store_true", help="Use locations-old.csv for 1pMain.")
    return parser.parse_args()

def main():
    start_time = time.time()
    
    try:
        # --- 0. HANDLE ARGUMENTS ---
        args = parse_arguments()
        
        locations_file = "locations-geo.csv"
        main_gen_args = []

        if args.force_old:
            log_work("!!! FORCE OLD MODE ACTIVATED !!!")
            locations_file = "locations-old.csv"
            # MODIFIED: Removed the line below so it generates ALL things, not just posts
            # main_gen_args = ["--only-posts"] 
        
        state = load_state()

        # --- 1. DETERMINE NEXT SCHEMA ---
        next_schema_idx = state["last_schema_index"] + 1
        schema_file = f"schemas-{next_schema_idx}.ndjson"
        
        # Fallback logic
        if not os.path.exists(schema_file):
            log_work(f"Schema file {schema_file} not found. Restarting loop from schemas-1.")
            next_schema_idx = 1
            schema_file = f"schemas-{next_schema_idx}.ndjson"

        # --- 2. DETERMINE NEXT IMPROVER (DYNAMIC) ---
        available_improvers = get_available_improvers()
        
        if not available_improvers:
            log_work("CRITICAL: No improver scripts (10-*-improver.py) found.")
            sys.exit(1)
            
        last_improver_num = state.get("last_improver_num", 0)
        next_improver_num = None
        for num in available_improvers:
            if num > last_improver_num:
                next_improver_num = num
                break
        if next_improver_num is None:
            next_improver_num = available_improvers[0]
            
        improver_script = f"10-{next_improver_num}-improver.py"
        
        # --- 3. DETERMINE NEXT MODE ---
        next_mode_idx = state["last_mode_index"] + 1
        if next_mode_idx > 2:
            next_mode_idx = 0
        current_mode_flags = IMPROVER_MODES[next_mode_idx]

        # --- 4. PRE-FLIGHT CHECKS ---
        # Check EVERYTHING before running ANYTHING
        log_work("Performing pre-flight dependency checks...")
        required = [
            schema_file,
            locations_file,
            "multi-geo003.py",
            improver_script,
            "1pMain.py",
            "sitemap-2-003.py"
        ]
        validate_dependencies(required)
        log_work("Checks passed.")

        # --- START EXECUTION ---
        log_work("--------------------------------------------------")
        log_work(f"Config: Schema={schema_file} | Improver={improver_script} | Mode={next_mode_idx}")

        # --- STEP A: Multi-Geo ---
        run_command("multi-geo003.py", [
            "--schemas", schema_file,
            "--locations", locations_file,
            "--output", "all-schemas.ndjson"
        ])
        
        # --- STEP B: Improver ---
        improver_args = ["-i", "all-schemas.ndjson", "-o", "schema.ndjson"] + current_mode_flags
        run_command(improver_script, improver_args)

        cleanup_file("all-schemas.ndjson")
        
        # --- STEP C: Main Generator ---
        run_command("1pMain.py", main_gen_args)
        
        # --- STEP D: Sitemap ---
        run_command("sitemap-2-003.py")
        
        # --- STEP E: Robots.txt ---
        update_robots_txt()
        
        # --- SAVE STATE ---
        new_state = {
            "last_schema_index": next_schema_idx,
            "last_improver_num": next_improver_num,
            "last_mode_index": next_mode_idx
        }
        save_state(new_state)

        elapsed = time.time() - start_time
        log_work(f"Run Complete. Time taken: {elapsed:.2f} seconds.")

    except KeyboardInterrupt:
        log_work("\nProcess interrupted by user (Ctrl+C). Exiting safely.")
        sys.exit(130)
    except Exception as e:
        log_work(f"Unexpected Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()