#!/usr/bin/env python3
"""
brain.py — GSC Daily (walmart.uscareers.co.com)
==========================================
  1. Fetch robots.txt → parse all sitemaps
  2. Submit all sitemaps + feed.xml + robots.txt
  3. Inspect home URL

Called by daily.py — no arguments needed, everything hardcoded.

Requirements:
    pip install google-api-python-client google-auth google-auth-httplib2 requests
"""

import logging
import sys
import time
import requests
from datetime import datetime
from pathlib import Path

from google.oauth2 import service_account
from google.auth.transport.requests import Request as AuthRequest
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


# ╔═════════════════════════════════════════════════════════════╗
# ║  HARDCODED CONFIG                                          ║
# ╚═════════════════════════════════════════════════════════════╝
SITE_URL = "https://walmart.uscareers.co.com"
GSC_CREDS = "gsc-bot.json"
# ─────────────────────────────────────────────────────────────

SCOPES = [
    "https://www.googleapis.com/auth/webmasters",
    "https://www.googleapis.com/auth/indexing",
]

LOG_DIR = "gsc_logs"
MAX_RETRIES = 5
BASE_DELAY = 2
DELAY_BETWEEN_SITEMAPS = 0.8


# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
def setup_logging() -> Path:
    Path(LOG_DIR).mkdir(exist_ok=True)
    site_slug = SITE_URL.replace("https://", "").replace("/", "_").rstrip("_")
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = Path(LOG_DIR) / f"{site_slug}_{timestamp}.log"

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s  %(message)s",
        datefmt="%H:%M:%S",
        handlers=[
            logging.FileHandler(log_file, encoding="utf-8"),
            logging.StreamHandler(sys.stdout),
        ],
    )
    return log_file


# ---------------------------------------------------------------------------
# Auth
# ---------------------------------------------------------------------------
def get_credentials():
    return service_account.Credentials.from_service_account_file(
        GSC_CREDS, scopes=SCOPES
    )


def get_service(creds):
    return build("searchconsole", "v1", credentials=creds)


# ---------------------------------------------------------------------------
# Retry with exponential backoff
# ---------------------------------------------------------------------------
def retry_on_rate_limit(func, max_retries=MAX_RETRIES):
    for attempt in range(max_retries + 1):
        try:
            return True, func()
        except HttpError as e:
            if e.resp.status in (429, 403) and attempt < max_retries:
                delay = BASE_DELAY * (2 ** attempt)
                logging.warning(f"    Rate limited ({e.resp.status}). Retry {attempt+1}/{max_retries} in {delay}s...")
                time.sleep(delay)
            else:
                return False, e
        except requests.exceptions.RequestException as e:
            if attempt < max_retries:
                delay = BASE_DELAY * (2 ** attempt)
                logging.warning(f"    Network error. Retry {attempt+1}/{max_retries} in {delay}s...")
                time.sleep(delay)
            else:
                return False, e
        except Exception as e:
            return False, e
    return False, Exception("Max retries exceeded")


# ---------------------------------------------------------------------------
# Step 1: Fetch robots.txt → parse sitemaps
# ---------------------------------------------------------------------------
def fetch_sitemaps_from_robots() -> list[str]:
    robots_url = SITE_URL + "/robots.txt"
    logging.info(f"[*] Fetching {robots_url}")

    try:
        resp = requests.get(robots_url, timeout=15, headers={
            "User-Agent": "Mozilla/5.0 (GSC-Setup-Tool)"
        })
        resp.raise_for_status()
    except requests.RequestException as e:
        logging.error(f"[!] Could not fetch robots.txt: {e}")
        return []

    sitemaps = []
    for line in resp.text.splitlines():
        stripped = line.strip()
        if stripped.lower().startswith("sitemap:"):
            parts = stripped.split(None, 1)
            if len(parts) == 2 and parts[1].strip():
                sitemaps.append(parts[1].strip())

    logging.info(f"[+] Found {len(sitemaps)} sitemap(s) in robots.txt")
    return sitemaps


# ---------------------------------------------------------------------------
# Step 2: Submit sitemaps + feed.xml + robots.txt
# ---------------------------------------------------------------------------
def submit_sitemap(service, gsc_url: str, sitemap_url: str) -> bool:
    def _do():
        service.sitemaps().submit(siteUrl=gsc_url, feedpath=sitemap_url).execute()
        return True

    ok, result = retry_on_rate_limit(_do)
    if not ok:
        logging.error(f"    [!] Failed: {sitemap_url} — {result}")
    return ok


def submit_all_sitemaps(service, gsc_url: str, sitemaps: list[str]):
    total = len(sitemaps)
    logging.info(f"[*] Submitting {total} item(s)...")

    success = 0
    fail = 0
    for i, sm in enumerate(sitemaps, 1):
        logging.info(f"    [{i}/{total}] {sm}")
        if submit_sitemap(service, gsc_url, sm):
            success += 1
        else:
            fail += 1
        time.sleep(DELAY_BETWEEN_SITEMAPS)

    logging.info(f"[+] Results: {success} submitted, {fail} failed")


# ---------------------------------------------------------------------------
# Step 3: Inspect home URL
# ---------------------------------------------------------------------------
def inspect_home(service, gsc_url: str):
    logging.info(f"[*] Inspecting: {gsc_url}")

    def _do():
        return service.urlInspection().index().inspect(
            body={"inspectionUrl": gsc_url, "siteUrl": gsc_url}
        ).execute()

    ok, result = retry_on_rate_limit(_do)
    if not ok:
        logging.error(f"[!] Inspection failed: {result}")
        return

    ix = result.get("inspectionResult", {})
    idx = ix.get("indexStatusResult", {})

    logging.info("[+] Inspection Result:")
    logging.info(f"    Verdict:          {idx.get('verdict', 'N/A')}")
    logging.info(f"    Coverage State:   {idx.get('coverageState', 'N/A')}")
    logging.info(f"    Crawl Timestamp:  {idx.get('crawlTimestamp', 'N/A')}")
    logging.info(f"    Indexing State:   {idx.get('indexingState', 'N/A')}")
    logging.info(f"    robots.txt State: {idx.get('robotsTxtState', 'N/A')}")
    logging.info(f"    Page Fetch State: {idx.get('pageFetchState', 'N/A')}")

    mobile = ix.get("mobileUsabilityResult", {})
    if mobile:
        logging.info(f"    Mobile Usability: {mobile.get('verdict', 'N/A')}")

    rich = ix.get("richResultsResult", {})
    if rich:
        logging.info(f"    Rich Results:     {rich.get('verdict', 'N/A')}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
    gsc_url = SITE_URL + "/"
    feed_url = SITE_URL + "/feed.xml"

    log_file = setup_logging()

    logging.info("=" * 60)
    logging.info(f"  brain.py — {SITE_URL}")
    logging.info(f"  {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    logging.info("=" * 60)

    creds = get_credentials()
    service = get_service(creds)

    # ── Step 1: Fetch sitemaps from robots.txt ──
    logging.info("\n── Fetch Sitemaps ──")
    sitemaps = fetch_sitemaps_from_robots()

    # Always add feed.xml
    if feed_url not in sitemaps:
        sitemaps.append(feed_url)
        logging.info(f"[+] Added: feed.xml")

    # ── Step 2: Submit everything ──
    logging.info("\n── Submit Sitemaps ──")
    if sitemaps:
        submit_all_sitemaps(service, gsc_url, sitemaps)
    else:
        logging.info("[!] Nothing to submit")

    # ── Step 3: Inspect home ──
    logging.info("\n── Inspect Home ──")
    inspect_home(service, gsc_url)

    logging.info("\n" + "=" * 60)
    logging.info(f"  Done! Log: {log_file}")
    logging.info("=" * 60)


if __name__ == "__main__":
    main()