# -*- coding: utf-8 -*-
import time
import json
import requests
import os
import re  # Added for regex matching
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
from bs4 import BeautifulSoup
from tqdm import tqdm
from colorama import Fore, Style, init
from datetime import datetime, timedelta
import sys
import signal # For graceful shutdown

# Initialize colorama
init(autoreset=True)

# --- Configuration ---
SCOPES = ["https://www.googleapis.com/auth/indexing"]
ENDPOINT = "https://indexing.googleapis.com/v3/urlNotifications:publish"
KEYS_DIR = "keys" # Directory containing .json key files
SITEMAP_FILE = "sitemap.txt" # File listing sitemap URLs
FAILED_KEYS_LOG_FILE = "failed_log.txt" # Log file for keys encountering permission/load issues
KEY_COOLDOWN_HOURS = 24 # Hours before retrying an exhausted key
FETCH_TIMEOUT_SECONDS = 30 # Timeout for fetching sitemaps
# --- REMOVED: CYCLE_SLEEP_TIME_SECONDS (no looping, cron handles scheduling) ---
# --- State Files ---
PROCESSED_URLS_FILE = "processed_urls.txt"
STATE_FILE = "script_state.json" # Stores exhausted keys and last key index
# --- End Configuration ---

# --- Global variable to signal shutdown ---
shutdown_requested = False

def signal_handler(sig, frame):
    """Handles Ctrl+C or SIGTERM for graceful shutdown."""
    global shutdown_requested
    if shutdown_requested: # Second signal
         print(f"\n{Fore.RED}Forcing exit immediately! State might not be saved.{Style.RESET_ALL}")
         sys.exit(1)
    print(f"\n{Fore.YELLOW}Shutdown requested (Signal: {sig}). Finishing current step and saving state... Press Ctrl+C again to force exit.{Style.RESET_ALL}")
    shutdown_requested = True

# Register signal handlers
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)


# ---------------------------------------------------------------
# Banner Function
# ---------------------------------------------------------------
def print_banner():
    """Prints the tool's banner."""
    banner = r"""
   ____                       _
  / ___|_ __ _   _ _ __ (_)_ __
 | |  _| '__| | | | '_ \| | '_ \
 | |_| | |  | |_| | | | | | | | |
  \____|_|   \__,_|_| |_|_|_| |_|

 Google Indexing Tool - v7.0 (Cron-Friendly)
"""
    print(Fore.MAGENTA + banner + Style.RESET_ALL)
    print(f"{Fore.CYAN}--- State Files: {PROCESSED_URLS_FILE}, {STATE_FILE} ---{Style.RESET_ALL}")
    print(f"{Fore.CYAN}--- Failed Key Log: {FAILED_KEYS_LOG_FILE} ---{Style.RESET_ALL}")


# ---------------------------------------------------------------
# Load/Save State Functions
# ---------------------------------------------------------------
def save_state(processed_set, exhausted_dict, key_idx):
    """Saves the current state to files."""
    print(f"{Fore.CYAN}Attempting to save state...{Style.RESET_ALL}")
    try:
        # Save processed URLs
        with open(PROCESSED_URLS_FILE, "w", encoding="utf-8") as f:
            for url in sorted(list(processed_set)): # Sort for consistency
                f.write(url + "\n")

        # Save exhausted keys and last index
        exhausted_serializable = {k: v.isoformat() for k, v in exhausted_dict.items()}
        state_data = {
            "exhausted_keys": exhausted_serializable,
            "last_key_index": key_idx
            }
        with open(STATE_FILE, "w", encoding="utf-8") as f:
            json.dump(state_data, f, indent=4)

        print(f"{Fore.GREEN}State saved successfully ({len(processed_set)} URLs, {len(exhausted_dict)} keys).{Style.RESET_ALL}")
    except Exception as e:
        print(f"{Fore.RED}Error saving state: {e}{Style.RESET_ALL}")

def load_state():
    """Loads state from files. Returns (processed_set, exhausted_dict, key_idx)"""
    processed_set = set()
    exhausted_dict = {}
    key_idx = -1
    print(f"{Fore.CYAN}Attempting to load previous state...{Style.RESET_ALL}")

    # Load processed URLs
    try:
        if os.path.exists(PROCESSED_URLS_FILE):
            with open(PROCESSED_URLS_FILE, "r", encoding="utf-8") as f:
                processed_set = {line.strip() for line in f if line.strip()}
            print(f"{Fore.BLUE}  Loaded {len(processed_set)} URLs from {PROCESSED_URLS_FILE}{Style.RESET_ALL}")
        else:
             print(f"{Fore.YELLOW}  {PROCESSED_URLS_FILE} not found. Starting with empty processed URL set.{Style.RESET_ALL}")
    except Exception as e:
        print(f"{Fore.RED}  Error loading {PROCESSED_URLS_FILE}: {e}. Starting fresh.{Style.RESET_ALL}")
        processed_set = set()

    # Load exhausted keys and last index
    try:
        if os.path.exists(STATE_FILE):
            with open(STATE_FILE, "r", encoding="utf-8") as f:
                state_data = json.load(f)
                exhausted_serializable = state_data.get("exhausted_keys", {})
                # Normalize keys (paths) when loading
                exhausted_dict = {os.path.normpath(k): datetime.fromisoformat(v) for k, v in exhausted_serializable.items()}
                key_idx = state_data.get("last_key_index", -1)
            print(f"{Fore.BLUE}  Loaded {len(exhausted_dict)} exhausted key states and last index ({key_idx}) from {STATE_FILE}{Style.RESET_ALL}")
        else:
             print(f"{Fore.YELLOW}  {STATE_FILE} not found. Starting with no exhausted keys and default index.{Style.RESET_ALL}")
    except Exception as e:
        print(f"{Fore.RED}  Error loading {STATE_FILE}: {e}. Starting fresh.{Style.RESET_ALL}")
        exhausted_dict = {}
        key_idx = -1

    return processed_set, exhausted_dict, key_idx

# ---------------------------------------------------------------
# Load Credentials
# ---------------------------------------------------------------
def load_credentials(json_key_file):
    """Load Google Service Account credentials."""
    try:
        http_obj = httplib2.Http(timeout=20) # 20 second timeout
        credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key_file, scopes=SCOPES)
        http = credentials.authorize(http_obj)
        return http
    except Exception as e:
        print(f"{Fore.RED}Error loading credentials from {os.path.basename(json_key_file)}: {e}{Style.RESET_ALL}")
        return None

# ---------------------------------------------------------------
# Find JSON Key Files
# ---------------------------------------------------------------
def get_json_key_files(directory):
    """Return a sorted, normalized list of .json file paths."""
    if not os.path.isdir(directory):
        print(f"{Fore.RED}Keys directory '{directory}' not found.{Style.RESET_ALL}")
        return []
    keys = [
        os.path.normpath(os.path.join(directory, f))
        for f in os.listdir(directory)
        if f.endswith(".json")
    ]
    keys.sort()
    return keys

# ---------------------------------------------------------------
# Load Sitemap URLs from File
# ---------------------------------------------------------------
def load_sitemaps_from_file(sitemap_file):
    """Load sitemap URLs from the specified text file."""
    try:
        with open(sitemap_file, "r", encoding="utf-8") as f:
            return [line.strip() for line in f if line.strip() and not line.strip().startswith('#')]
    except FileNotFoundError:
        print(f"{Fore.RED}Error: {sitemap_file} not found. Please create this file.{Style.RESET_ALL}")
        return []

# ---------------------------------------------------------------
# Log Keys with Permission Issues
# ---------------------------------------------------------------
def log_permission_issue_key(key_file):
    """Logs the base name of a key file to FAILED_KEYS_LOG_FILE"""
    try:
        with open(FAILED_KEYS_LOG_FILE, "a", encoding="utf-8") as f:
            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            f.write(f"{timestamp} - {os.path.basename(key_file)}\n")
    except Exception as e:
        print(f"{Fore.RED}Error writing to {FAILED_KEYS_LOG_FILE}: {e}{Style.RESET_ALL}")

# ---------------------------------------------------------------
# Switch to Next Available Key (v7 - no long wait, just exit)
# ---------------------------------------------------------------
def switch_to_next_key(json_key_files, current_key_index, exhausted_keys):
    """
    Moves to the next key that isn't currently exhausted or failed to load.
    v7 CHANGE: Returns (None, None) immediately if all keys exhausted —
    does NOT wait 24h. Cron will re-run the script later.
    """
    global shutdown_requested
    if shutdown_requested: return None, None

    num_keys = len(json_key_files)

    if num_keys == 0:
         print(f"{Fore.RED}No keys available to switch to.{Style.RESET_ALL}")
         return None, None

    checked_indices = set()
    next_index = current_key_index

    while len(checked_indices) < num_keys:
        if shutdown_requested: return None, None
        next_index = (next_index + 1) % num_keys
        checked_indices.add(next_index)

        key_file = json_key_files[next_index] # Already normalized path

        # Check if this key is temporarily exhausted
        if key_file in exhausted_keys:
            exhaustion_time = exhausted_keys[key_file]
            if datetime.now() - exhaustion_time <= timedelta(hours=KEY_COOLDOWN_HOURS):
                continue # Still exhausted
            else:
                print(f"{Fore.CYAN}Key {os.path.basename(key_file)} cooldown finished ({KEY_COOLDOWN_HOURS}h). Removing from exhausted list.{Style.RESET_ALL}")
                exhausted_keys.pop(key_file, None)

        # --- Key is not currently exhausted, try to load it ---
        print(f"{Fore.BLUE}Attempting to switch to API key: {os.path.basename(key_file)} (Index: {next_index}){Style.RESET_ALL}")
        http = load_credentials(key_file)
        if http:
            print(f"{Fore.GREEN}Successfully switched to API key: {os.path.basename(key_file)}{Style.RESET_ALL}")
            return next_index, http
        else:
            print(f"{Fore.RED}Failed to load credentials for {os.path.basename(key_file)}. Marking as exhausted and logging.{Style.RESET_ALL}")
            log_permission_issue_key(key_file)
            exhausted_keys[key_file] = datetime.now()
            continue

    # --- v7 CHANGE: All keys exhausted — exit immediately instead of waiting ---
    print(
        f"{Fore.RED}❌ All {num_keys} API keys are currently exhausted. "
        f"Exiting — cron will re-run after cooldown.{Style.RESET_ALL}"
    )
    return None, None

# ---------------------------------------------------------------
# Fetch Sitemap URLs
# ---------------------------------------------------------------
def fetch_sitemap_urls(sitemap_urls_list):
    """
    Recursively fetches and parses sitemaps (including index files).
    Returns a list of unique page URLs found.
    """
    global shutdown_requested
    all_page_urls = set()
    processed_sitemaps = set()
    sitemaps_to_fetch = list(sitemap_urls_list)

    pbar_sitemap = tqdm(total=len(sitemaps_to_fetch), desc="Fetching Sitemaps", unit="sitemap", leave=False)

    while sitemaps_to_fetch:
        if shutdown_requested: break

        sitemap_url = sitemaps_to_fetch.pop(0)
        pbar_sitemap.set_description(f"Fetching {os.path.basename(sitemap_url)[:30]}..")

        if not sitemap_url or sitemap_url in processed_sitemaps:
            pbar_sitemap.update(1)
            continue
        processed_sitemaps.add(sitemap_url)

        try:
            headers = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'}
            response = requests.get(sitemap_url, timeout=FETCH_TIMEOUT_SECONDS, headers=headers)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"\n{Fore.RED}Error fetching {sitemap_url}: {e}{Style.RESET_ALL}")
            pbar_sitemap.update(1)
            continue

        try:
             content_type = response.headers.get('Content-Type', '').lower()
             parser = 'lxml-xml' if 'xml' in content_type else 'lxml'
             try:
                 soup = BeautifulSoup(response.content, parser)
             except Exception:
                 soup = BeautifulSoup(response.content, "html.parser")

             new_sitemaps_found = 0
             for tag in soup.find_all(["url", "sitemap"]):
                loc = tag.find("loc")
                if loc and loc.text:
                    found_url = loc.text.strip()
                    if tag.name == 'sitemap':
                        if found_url not in processed_sitemaps and found_url not in sitemaps_to_fetch:
                            sitemaps_to_fetch.append(found_url)
                            new_sitemaps_found +=1
                    elif tag.name == 'url':
                        # --- UPDATED FILTERING LOGIC ---
                        found_url_lower = found_url.lower()
                        skip_domains = ["cdn.shopify.com"]

                        # Check for "index.html" or "page-(number).html"
                        is_pagination = re.search(r'page-\d+\.html', found_url_lower)
                        is_index = found_url_lower.endswith('index.html')

                        if (not any(skip_domain in found_url_lower for skip_domain in skip_domains) and
                            not is_pagination and
                            not is_index):
                            all_page_urls.add(found_url)

             if new_sitemaps_found > 0:
                  pbar_sitemap.total = len(processed_sitemaps) + len(sitemaps_to_fetch)
                  pbar_sitemap.set_postfix_str(f"+{new_sitemaps_found} nested")
             else:
                  pbar_sitemap.set_postfix_str("")

        except Exception as e:
            print(f"\n{Fore.RED}Error parsing content from {sitemap_url}: {e}{Style.RESET_ALL}")
        finally:
             pbar_sitemap.update(1)

    pbar_sitemap.close()
    unique_urls_list = list(all_page_urls)
    print(f"\n{Fore.CYAN}Total unique page URLs found across all sitemaps: {len(unique_urls_list)}{Style.RESET_ALL}")
    return unique_urls_list


# ---------------------------------------------------------------
# Index URLs via API - With Live URL/Key Output (v7)
# ---------------------------------------------------------------
def index_urls(urls, http, json_key_files, current_key_index, exhausted_keys):
    """
    Sends URLs to the Indexing API. Handles errors, switches keys, logs issues.
    Provides real-time stats and prints current URL/Key being attempted.
    Returns tuple: (indexed_count, error_count, current_key_index, http_object)
    """
    global shutdown_requested
    batch_indexed_count = 0
    batch_error_count = 0
    urls_to_process = list(urls)
    batch_total = len(urls_to_process)

    postfix_data = {"key": "", "status": "", "progress": ""}
    pbar = tqdm(total=batch_total, desc="Indexing Batch", ncols=110, unit="URL", leave=True, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')

    def _update_progress_postfix():
        """Helper to update the progress part of the postfix."""
        processed_count = pbar.n
        remaining_count = batch_total - processed_count
        percentage = (processed_count / batch_total * 100) if batch_total > 0 else 0
        postfix_data["progress"] = f"Ok:{batch_indexed_count}, Err:{batch_error_count}, Left:{remaining_count} ({percentage:.1f}%)"
        pbar.set_postfix(postfix_data, refresh=False)

    while urls_to_process:
        if shutdown_requested:
            print(f"\n{Fore.YELLOW}Shutdown requested during indexing. Stopping batch.{Style.RESET_ALL}")
            break

        if not http:
            print(f"\n{Fore.RED}No valid API key. Cannot process remaining {len(urls_to_process)} URLs.{Style.RESET_ALL}")
            batch_error_count += len(urls_to_process)
            _update_progress_postfix()
            urls_to_process.clear()
            break

        url = urls_to_process[0]
        current_key_file = json_key_files[current_key_index]

        # --- Print current URL and Key ---
        print(f"\n{Fore.WHITE}{'-'*70}{Style.RESET_ALL}")
        print(f"{Fore.CYAN}Processing URL:{Style.RESET_ALL} {url}")
        print(f"{Fore.BLUE}Using Key:     {Style.RESET_ALL} {os.path.basename(current_key_file)} (Index: {current_key_index})")
        # --- End Print ---

        postfix_data["key"] = f"Key:{os.path.basename(current_key_file)[:15]}"
        postfix_data["status"] = "Sending.."
        _update_progress_postfix()
        pbar.set_postfix(postfix_data, refresh=True)

        body = {"url": url, "type": "URL_UPDATED"}
        response, content = None, None
        needs_key_switch = False
        url_processed_or_skipped = False

        try:
            response, content = http.request(
                ENDPOINT, method="POST", body=json.dumps(body),
                headers={'Content-Type': 'application/json'}
            )
            if response and response.status >= 300:
                 try: decoded_content = content.decode() if content else "No Content"
                 except: decoded_content = str(content)
                 is_json_error = False
                 try:
                      if content and json.loads(content.decode()).get("error"): is_json_error = True
                 except: pass
                 if not is_json_error:
                      print(f"{Fore.RED}└─ Network/Server Error: Status {response.status}. Content: {decoded_content[:100]}{Style.RESET_ALL}")
                      exhausted_keys[current_key_file] = datetime.now()
                      needs_key_switch = True
                      batch_error_count += 1
                      postfix_data["status"] = f"{Fore.RED}Err: Net {response.status}{Style.RESET_ALL}"

        except Exception as e:
            print(f"{Fore.RED}└─ HTTP Lib Error: {e}{Style.RESET_ALL}")
            exhausted_keys[current_key_file] = datetime.now()
            needs_key_switch = True
            batch_error_count += 1
            postfix_data["status"] = f"{Fore.RED}Err: HTTP Lib{Style.RESET_ALL}"

        result = None
        if not needs_key_switch and content:
            try: result = json.loads(content.decode())
            except json.JSONDecodeError:
                print(f"{Fore.RED}└─ Error: Non-JSON response. Status: {response.status if response else 'N/A'}. Content: {content[:100]}..{Style.RESET_ALL}")
                batch_error_count += 1
                postfix_data["status"] = f"{Fore.RED}Err: Non-JSON{Style.RESET_ALL}"
                url_processed_or_skipped = True

        # --- Handle API Responses ---
        if not needs_key_switch and result:
            if "error" in result:
                error_details = result.get("error", {})
                code = error_details.get("code")
                status = error_details.get("status", "UNKNOWN")
                message = error_details.get("message", "N/A")
                error_summary = f"{status[:15]} ({code})"

                if code == 403 or status == "PERMISSION_DENIED":
                    print(f"{Fore.YELLOW}└─ Error: Permission Denied ({error_summary}). Logging & exhausting: {message[:60]}{Style.RESET_ALL}")
                    log_permission_issue_key(current_key_file)
                    exhausted_keys[current_key_file] = datetime.now()
                    needs_key_switch = True
                    batch_error_count += 1
                    postfix_data["status"] = f"{Fore.YELLOW}Err: Perm Denied{Style.RESET_ALL}"

                elif code == 429 or status == "RESOURCE_EXHAUSTED":
                    print(f"{Fore.YELLOW}└─ Error: Quota Exceeded ({error_summary}). Exhausting: {message[:60]}{Style.RESET_ALL}")
                    exhausted_keys[current_key_file] = datetime.now()
                    needs_key_switch = True
                    batch_error_count += 1
                    postfix_data["status"] = f"{Fore.YELLOW}Err: Quota{Style.RESET_ALL}"

                elif code == 400 or status == "INVALID_ARGUMENT":
                     print(f"{Fore.RED}└─ Error: Invalid URL? ({error_summary}). Skipping: {message[:60]}{Style.RESET_ALL}")
                     batch_error_count += 1
                     postfix_data["status"] = f"{Fore.RED}Err: Invalid URL{Style.RESET_ALL}"
                     url_processed_or_skipped = True

                else: # Other API errors
                    print(f"{Fore.RED}└─ Error: API Error ({error_summary}). Skipping: {message[:60]}{Style.RESET_ALL}")
                    batch_error_count += 1
                    postfix_data["status"] = f"{Fore.RED}Err: API {code}{Style.RESET_ALL}"
                    url_processed_or_skipped = True

            # --- Handle Success ---
            elif response and 200 <= response.status < 300:
                 batch_indexed_count += 1
                 indexed_url = result.get("urlNotificationMetadata", {}).get("url", url)
                 display_url = indexed_url if len(indexed_url) < 30 else indexed_url[:27] + "..."
                 print(f"{Fore.GREEN}└─ Success!{Style.RESET_ALL}")
                 postfix_data["status"] = f"{Fore.GREEN}OK: {display_url}{Style.RESET_ALL}"
                 url_processed_or_skipped = True

            # --- Handle Unexpected intermediate Status ---
            else:
                 print(f"{Fore.YELLOW}└─ Warning: Unexpected Status {response.status if response else 'N/A'}. Content: {content[:100]}{Style.RESET_ALL}")
                 batch_error_count += 1
                 postfix_data["status"] = f"{Fore.YELLOW}Err: Unexp. {response.status if response else '?'}{Style.RESET_ALL}"
                 url_processed_or_skipped = True

        # --- Post-processing for the URL ---
        if needs_key_switch:
             _update_progress_postfix()
             pbar.set_postfix(postfix_data, refresh=True)
             postfix_data["status"] = f"{Fore.CYAN}Switching...{Style.RESET_ALL}"
             pbar.set_postfix(postfix_data, refresh=True)
             time.sleep(0.1)
             print(f"{Fore.CYAN}   Switching key...{Style.RESET_ALL}")
             switched_index, new_http = switch_to_next_key(json_key_files, current_key_index, exhausted_keys)
             if new_http:
                 http = new_http
                 current_key_index = switched_index
                 # Loop will retry same URL, printing 'Processing...' again
             else:
                 http = None
                 postfix_data["status"] = f"{Fore.RED}No Keys Left{Style.RESET_ALL}"
                 # Error count for remaining URLs handled when loop breaks

        elif url_processed_or_skipped:
             urls_to_process.pop(0)
             _update_progress_postfix()
             pbar.update(1)
             pbar.set_postfix(postfix_data, refresh=True)

        else:
             # Safety net
             print(f"\n{Fore.MAGENTA}Logic Issue: URL {url} neither processed nor needing switch? Skipping.{Style.RESET_ALL}")
             batch_error_count += 1
             urls_to_process.pop(0)
             _update_progress_postfix()
             pbar.update(1)

    pbar.close()
    print(f"{Fore.WHITE}{'-'*70}{Style.RESET_ALL}") # Final separator for batch
    return batch_indexed_count, batch_error_count, current_key_index, http


# ---------------------------------------------------------------
# Main Script Execution (v7 - Single Run, No Loop)
# ---------------------------------------------------------------
def main():
    global shutdown_requested

    print_banner()

    # --- Load Keys ---
    json_key_files = get_json_key_files(KEYS_DIR)
    if not json_key_files:
        print(f"{Fore.RED}No JSON key files found in '{KEYS_DIR}'. Exiting.{Style.RESET_ALL}")
        sys.exit(1)
    print(f"{Fore.CYAN}Found {len(json_key_files)} key file(s) in '{KEYS_DIR}'.{Style.RESET_ALL}")

    # --- Load State ---
    processed_urls_in_session, exhausted_keys, current_key_index = load_state()
    http = None

    # --- Initial Key Acquisition ---
    if not shutdown_requested:
        print(f"\n{Fore.BLUE}Acquiring initial API key...{Style.RESET_ALL}")
        current_key_index, http = switch_to_next_key(json_key_files, current_key_index, exhausted_keys)
        if not http:
            print(f"{Fore.RED}Could not load any initial API key (all exhausted or broken). Exiting.{Style.RESET_ALL}")
            save_state(processed_urls_in_session, exhausted_keys, current_key_index if current_key_index is not None else -1)
            sys.exit(1)

    # --- Load Sitemaps ---
    sitemap_urls_from_file = load_sitemaps_from_file(SITEMAP_FILE)
    if not sitemap_urls_from_file:
        print(f"{Fore.YELLOW}No sitemap URLs found in {SITEMAP_FILE}. Nothing to do.{Style.RESET_ALL}")
        save_state(processed_urls_in_session, exhausted_keys, current_key_index)
        sys.exit(0)

    # --- Single Run (no while-loop, no cycle sleep) ---
    overall_indexed_count = 0
    overall_error_count = 0
    exit_code = 0

    try:
        run_start_time = time.time()
        print(f"\n{Fore.CYAN}{'-'*20} Run Starting ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) {'-'*20}{Style.RESET_ALL}")

        # --- Fetch URLs ---
        if not shutdown_requested:
            print(f"{Fore.CYAN}Fetching URLs from sitemaps...{Style.RESET_ALL}")
            all_fetched_urls = fetch_sitemap_urls(sitemap_urls_from_file)
        else:
            all_fetched_urls = []

        # --- Filter URLs ---
        if not shutdown_requested and all_fetched_urls:
            new_urls_to_process_raw = [url for url in all_fetched_urls if url not in processed_urls_in_session]
            new_urls_to_process = list(dict.fromkeys(new_urls_to_process_raw))
            if len(new_urls_to_process_raw) != len(new_urls_to_process):
                 print(f"{Fore.YELLOW}Removed {len(new_urls_to_process_raw) - len(new_urls_to_process)} duplicate URLs.{Style.RESET_ALL}")

            if not new_urls_to_process:
                print(f"{Fore.GREEN}✅ All {len(all_fetched_urls)} fetched URLs already processed. Nothing to do.{Style.RESET_ALL}")
            else:
                print(f"{Fore.YELLOW}Processing {len(new_urls_to_process)} new URLs (out of {len(all_fetched_urls)} fetched).{Style.RESET_ALL}")

                # --- Index New URLs ---
                if not shutdown_requested:
                    indexed_count, error_count, current_key_index, http = index_urls(
                        new_urls_to_process, http, json_key_files,
                        current_key_index, exhausted_keys
                    )

                    # --- Update State ---
                    processed_urls_in_session.update(new_urls_to_process)
                    overall_indexed_count += indexed_count
                    overall_error_count += error_count

                    # Set exit code based on outcome
                    if not http:
                        # All keys exhausted mid-run
                        exit_code = 2
        elif not all_fetched_urls and not shutdown_requested:
            print(f"{Fore.YELLOW}No URLs fetched from sitemaps.{Style.RESET_ALL}")

        # --- Run Summary ---
        run_duration = time.time() - run_start_time
        print(f"\n{Fore.CYAN}--- Run Summary (Duration: {run_duration:.2f}s) ---{Style.RESET_ALL}")
        print(f"{Fore.GREEN}  Indexed: {overall_indexed_count}{Style.RESET_ALL}")
        print(f"{Fore.RED}  Errors:  {overall_error_count}{Style.RESET_ALL}")
        print(f"{Fore.WHITE}  Total URLs in processed list: {len(processed_urls_in_session)}{Style.RESET_ALL}")
        if http and current_key_index is not None and 0 <= current_key_index < len(json_key_files):
            active_key_name = os.path.basename(json_key_files[current_key_index])
            print(f"{Fore.BLUE}  Last active API Key: Index {current_key_index} ({active_key_name}){Style.RESET_ALL}")
        else:
            print(f"{Fore.RED}  No API Key active at end of run.{Style.RESET_ALL}")
        print("-" * 60)

    except KeyboardInterrupt:
         print(f"\n{Fore.RED}Keyboard Interrupt detected!{Style.RESET_ALL}")
         shutdown_requested = True
         exit_code = 1
    finally:
        # --- Save Final State ---
        print(f"\n{Fore.CYAN}Saving final state...{Style.RESET_ALL}")
        final_key_index = current_key_index if (http and current_key_index is not None and 0 <= current_key_index < len(json_key_files)) else -1
        save_state(processed_urls_in_session, exhausted_keys, final_key_index)
        print_final_summary(overall_indexed_count, overall_error_count, processed_urls_in_session)
        print(f"Exiting with code {exit_code}.")
        sys.exit(exit_code)

# ---------------------------------------------------------------
# Final Summary Function
# ---------------------------------------------------------------
def print_final_summary(indexed, errors, processed_set):
     """Prints a summary at the end of the script execution."""
     print(f"\n{Fore.CYAN}{'='*60}{Style.RESET_ALL}")
     print(f"{Fore.CYAN}--- Final Script Summary ---{Style.RESET_ALL}")
     print(f"Total URLs Successfully Indexed: {indexed}")
     print(f"Total Errors Encountered: {errors}")
     print(f"Total unique URLs in processed list ({PROCESSED_URLS_FILE}): {len(processed_set)}")
     try:
          if os.path.exists(FAILED_KEYS_LOG_FILE):
               with open(FAILED_KEYS_LOG_FILE, "r", encoding="utf-8") as f:
                    logged_keys = {line.split(' - ')[1].strip() for line in f if ' - ' in line}
               if logged_keys:
                    print(f"{Fore.YELLOW}Keys with permission/load issues (in {FAILED_KEYS_LOG_FILE}): {', '.join(sorted(list(logged_keys)))}{Style.RESET_ALL}")
     except Exception as e:
          print(f"{Fore.RED}Could not read log file {FAILED_KEYS_LOG_FILE}: {e}{Style.RESET_ALL}")
     print(f"{Fore.CYAN}{'='*60}{Style.RESET_ALL}")


if __name__ == "__main__":
    # Ensure paths are normalized for consistency
    PROCESSED_URLS_FILE = os.path.normpath(PROCESSED_URLS_FILE)
    STATE_FILE = os.path.normpath(STATE_FILE)
    FAILED_KEYS_LOG_FILE = os.path.normpath(FAILED_KEYS_LOG_FILE)
    KEYS_DIR = os.path.normpath(KEYS_DIR)
    SITEMAP_FILE = os.path.normpath(SITEMAP_FILE)

    main()