#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Location-Centric JobPosting Optimizer
=====================================

This script is a new version dedicated to optimizing JobPosting NDJSON
with a primary focus on geo-targeting and location-based SEO.

What's new:
1) Geo-Focused Title Generation
   - A completely new set of title templates that prioritize {city}, {state},
     and location-based urgency.
   - Titles are built to attract local candidates (e.g., "City Job: Role",
     "Hiring in City: Role").

2) Geo-Targeted Description Summary
   - The prepended SEO summary is now location-first.
   - New templates introduce the role, company, and salary all within
     the context of the specific {city}.

3) Geo-Contextual Enrichment
   - The AI enrichment layer now adds sentences specifically linking the
     role, company, and industry to the local area.

NEW IN THIS VERSION:
- Added a `--slightly-improve-title` flag. This provides a "light-touch"
  mode that ONLY appends geo-information (city, state, urgency) to the
  existing title and leaves ALL other fields (description, salary, etc.)
  in the record unmodified.
  (This mode is now more "intelligent", prioritizing new information
  and avoiding redundancy).

- NEW: Job Type Theming
  - The script now detects if a job is Full-Time, Part-Time, or Flexible.
  - It uses completely separate sets of templates (for both title and description)
    to theme the job posting around "careers" (for FT) or "flexibility" (for PT).

- NEW: Concise Title Templates
  - Title templates are now shorter and more focused.
  - Removed verbose placeholders like {company} and {salary} from titles.
  - Reduced default max title length.

Usage
-----
Full Optimization:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson

Title-Tweak Only Mode:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --slightly-improve-title

No Title Change (but full description/etc. optimization):
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --no-change-in-title
"""

import argparse
import json
import random
import re
import logging
import locale
import hashlib
from tqdm import tqdm
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, timezone

# ==========================
# Locale initialization
# ==========================
try:
    locale.setlocale(locale.LC_ALL, "")
except locale.Error:
    try:
        locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
    except locale.Error:
        locale.setlocale(locale.LC_ALL, "C.UTF-8")
        logging.warning("Locale setting failed. Using 'C.UTF-8'. Number formatting might be basic.")

# ==========================
# Constants & Defaults
# ==========================
MAX_TITLE_LEN_BASE = 65 # --- ADJUSTED from 70
DESIRED_DEFAULT_FALLBACK_LOGO_URL = "https://walmart.uscareers.co.com/logo.png"
DEFAULT_CURRENCY = "USD"
CONTACT_CTA = ''  # Optional CTA footer

DEFAULT_FULL_TIME_HOURS_PER_WEEK = 40
DEFAULT_PART_TIME_HOURS_PER_WEEK = 23
WEEKS_PER_YEAR = 52
MONTHS_PER_YEAR = 12

SALARY_ADJUSTMENT_FACTORS = [1.0, 0.92, 0.91, 0.88, 0.85, 1.05, 1.08, 1.10, 1.12, 1.15]
WEIGHTED_SALARY_ADJUSTMENT_FACTORS = [1.0, 1.0, 1.0] + SALARY_ADJUSTMENT_FACTORS

SALARY_UNIT_MAP = {
    'HOUR': 'Hour', 'HOURLY': 'Hour', 'DAY': 'Day', 'DAILY': 'Day',
    'WEEK': 'Week', 'WEEKLY': 'Week', 'MONTH': 'Month', 'MONTHLY': 'Month',
    'YEAR': 'Year', 'ANNUAL': 'Year', 'ANNUALLY': 'Year', 'PROJECT': 'Project'
}
NORMALIZED_SALARY_UNITS = {
    "HOUR": "HOUR", "HOURLY": "HOUR", "DAY": "PROJECT", "DAILY": "PROJECT",
    "WEEK": "WEEK", "WEEKLY": "WEEK", "MONTH": "MONTH", "MONTHLY": "MONTH",
    "YEAR": "YEAR", "ANNUAL": "YEAR", "ANNUALLY": "YEAR", "PROJECT": "PROJECT"
}

EXPERIENCE_LEVEL_MONTHS = {
    "Entry-Level": (0, 12),
    "Mid-Level": (13, 60),
    "Senior-Level": (61, float('inf'))
}
EXPERIENCE_LEVEL_KEYWORDS = {
    "Entry-Level": ["entry level", "no experience", "graduate", "junior", "trainee", "intern"],
    "Mid-Level": ["mid level", "intermediate", "associate", "experienced"],
    "Senior-Level": ["senior", "lead", "principal", "expert", "staff level", "manager"]
}

RECENT_POST_DAYS = 2
CLOSING_SOON_DAYS = 7

# ==================================================
# NEW: Geo-Centric Templates
# ==================================================
GEO_HEADER_TEMPLATES = [
    "Local Job Overview: {city}", "Position in {city}, {state}",
    "Opportunity in {city}", "Role Based in {city}", "{city} Role Highlights",
    "Your Next Step in {city}"
]
SUB_HEADER_TEMPLATES = {
    "primary": [
        "Key Information", "Fast Facts", "Role Essentials", "At a Glance",
        "Primary Details", "Snapshot"
    ],
    "secondary": [
        "Compensation & Benefits", "What You Get", "Pay & Perks", "Rewards",
        "Salary and Benefits"
    ],
    "tertiary": [
        "Core Focus", "What You'll Do", "Responsibilities", "Day-to-Day",
        "Position Scope"
    ]
}

LOCAL_AREA_DESCRIPTORS = [
    "a vibrant local hub", "a key business district", "our growing {city} office",
    "a central {city} location", "our main {city} branch"
]

# ==================================================
# NEW: Job-Type Themed Description Templates
# ==================================================
GEO_TONE_TEMPLATES = {
    "FULL_TIME": {
        'informational': {
            'intro': [
                "<strong>Full-Time Role in {city}:</strong> {role_for_summary}",
                "<strong>Position:</strong> {role_for_summary} (Full-Time, Based in {city})",
                "<strong>Job Title:</strong> {role_for_summary} (Full-Time Career)"
            ],
            'company': [
                "<strong>Employer:</strong> {company_for_summary} (Hiring in {city})",
                "<strong>Hiring Organization:</strong> {company_for_summary}",
                "<strong>Company:</strong> {company_for_summary}"
            ],
            'location': [
                "<strong>Location:</strong> {city}, {state}",
                "<strong>Based in:</strong> {city}, {state} ({local_area_info})",
                "<strong>Worksite:</strong> {city}, {state}"
            ],
            'salary': [
                "<strong>Compensation:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Pay:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Salary:</strong> {salary_primary_display}{converted_salary_suffix}"
            ],
            'experience_entry': [
                "<strong>Experience:</strong> Entry-Level (Full Training in {city}).",
                "<strong>Required Experience:</strong> Entry-Level.",
                "<strong>Career Level:</strong> Junior / Trainee."
            ],
            'experience_mid': [
                "<strong>Experience:</strong> Mid-Level.",
                "<strong>Required Experience:</strong> Associate / Experienced.",
                "<strong>Career Level:</strong> Intermediate."
            ],
            'experience_senior': [
                "<strong>Experience:</strong> Senior-Level.",
                "<strong>Required Experience:</strong> Proven expertise.",
                "<strong>Career Level:</strong> Senior / Lead."
            ],
            'urgency_new': [
                "<strong>Status:</strong> New {city} Full-Time listing (actively interviewing).",
                "<strong>Posted:</strong> Recently.",
                "<strong>Urgency:</strong> Immediate consideration in {city}."
            ],
            'urgency_closing': [
                "<strong>Status:</strong> Closing soon.",
                "<strong>Urgency:</strong> Final applications accepted.",
                "<strong>Deadline:</strong> Approaching."
            ],
            'focus': [
                "Apply your <strong>{primary_skill}</strong> skills at our <strong>{city}</strong> location.",
                "This role centers on <strong>{primary_skill}</strong> within the <strong>{industry_display}</strong> space in {city}.",
                "Core objectives involve <strong>{primary_skill}</strong> in <strong>{industry_display}</strong>."
            ],
            'benefits': [
                "A comprehensive benefits package is included.",
                "Eligible team members receive full benefits.",
                "This role offers a competitive full-time benefits package."
            ],
            'value_props': [
                "Clear career pathways at our {city} office.",
                "Impactful work with supportive local leadership.",
                "A stable, long-term position with modern tooling.",
                "Join a growing team in {city} and build your career."
            ]
        }
    },
    "PART_TIME": {
        'informational': {
            'intro': [
                "<strong>Part-Time Job in {city}:</strong> {role_for_summary}",
                "<strong>Position:</strong> {role_for_summary} (Part-Time, Based in {city})",
                "<strong>Job Title:</strong> {role_for_summary} (Flexible Hours)"
            ],
            'company': [
                "<strong>Employer:</strong> {company_for_summary} (Hiring for PT roles in {city})",
                "<strong>Hiring Organization:</strong> {company_for_summary}",
                "<strong>Company:</strong> {company_for_summary}"
            ],
            'location': [
                "<strong>Location:</strong> {city}, {state}",
                "<strong>Based in:</strong> {city}, {state} ({local_area_info})",
                "<strong>Worksite:</strong> {city}, {state}"
            ],
            'salary': [
                "<strong>Compensation:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Pay:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Salary:</strong> {salary_primary_display}{converted_salary_suffix}"
            ],
            'experience_entry': [
                "<strong>Experience:</strong> Entry-Level (Training in {city}).",
                "<strong>Required Experience:</strong> No experience necessary.",
                "<strong>Career Level:</strong> Junior / Trainee."
            ],
            'experience_mid': [
                "<strong>Experience:</strong> Mid-Level.",
                "<strong>Required Experience:</strong> Associate / Experienced.",
                "<strong>Career Level:</strong> Intermediate."
            ],
            'experience_senior': [
                "<strong>Experience:</strong> Senior-Level.",
                "<strong>Required Experience:</strong> Proven expertise.",
                "<strong>Career Level:</strong> Senior / Lead."
            ],
            'urgency_new': [
                "<strong>Status:</strong> New Part-Time listing (actively interviewing).",
                "<strong>Posted:</strong> Recently.",
                "<strong>Urgency:</strong> Immediate consideration in {city}."
            ],
            'urgency_closing': [
                "<strong>Status:</strong> Closing soon.",
                "<strong>Urgency:</strong> Final applications for PT role accepted.",
                "<strong>Deadline:</strong> Approaching."
            ],
            'focus': [
                "Apply your <strong>{primary_skill}</strong> skills at our <strong>{city}</strong> location.",
                "This role centers on <strong>{primary_skill}</strong> within the <strong>{industry_display}</strong> space in {city}.",
                "Core objectives involve <strong>{primary_skill}</strong> in <strong>{industry_display}</strong>."
            ],
            'benefits': [
                "A competitive benefits package is available.",
                "Eligible part-time team members receive benefits.",
                "This role offers part-time benefits."
            ],
            'value_props': [
                "Excellent work-life balance in {city}.",
                "Flexible scheduling to fit your needs.",
                "Great supplemental income opportunity.",
                "Stable part-time work with a friendly {city} team."
            ]
        }
    },
    "FLEXIBLE": {
        'informational': {
            'intro': [
                "<strong>Flexible Role in {city}:</strong> {role_for_summary}",
                "<strong>Position:</strong> {role_for_summary} (Flexible/Contract, {city})",
                "<strong>Job Title:</strong> {role_for_summary} (Gig/Contract)"
            ],
            'company': [
                "<strong>Employer:</strong> {company_for_summary} (Hiring in {city})",
                "<strong>Hiring Organization:</strong> {company_for_summary}",
                "<strong>Company:</strong> {company_for_summary}"
            ],
            'location': [
                "<strong>Location:</strong> {city}, {state}",
                "<strong>Based in:</strong> {city}, {state} ({local_area_info})",
                "<strong>Worksite:</strong> {city}, {state}"
            ],
            'salary': [
                "<strong>Compensation:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Pay:</strong> {salary_primary_display}{converted_salary_suffix}",
                "<strong>Salary:</strong> {salary_primary_display}{converted_salary_suffix}"
            ],
            'experience_entry': [
                "<strong>Experience:</strong> Open to all levels.",
                "<strong>Required Experience:</strong> Entry-Level.",
                "<strong>Career Level:</strong> Trainee."
            ],
            'experience_mid': [
                "<strong>Experience:</strong> Mid-Level.",
                "<strong>Required Experience:</strong> Associate / Experienced.",
                "<strong>Career Level:</strong> Intermediate."
            ],
            'experience_senior': [
                "<strong>Experience:</strong> Senior-Level.",
                "<strong>Required Experience:</strong> Proven expertise.",
                "<strong>Career Level:</strong> Senior / Lead."
            ],
            'urgency_new': [
                "<strong>Status:</strong> New {city} listing (actively interviewing).",
                "<strong>Posted:</strong> Recently.",
                "<strong>Urgency:</strong> Immediate consideration in {city}."
            ],
            'urgency_closing': [
                "<strong>Status:</strong> Closing soon.",
                "<strong>Urgency:</strong> Final applications accepted.",
                "<strong>Deadline:</strong> Approaching."
            ],
            'focus': [
                "Apply your <strong>{primary_skill}</strong> skills at our <strong>{city}</strong> location.",
                "This role centers on <strong>{primary_skill}</strong> within the <strong>{industry_display}</strong> space in {city}.",
                "Core objectives involve <strong>{primary_skill}</strong> in <strong>{industry_display}</strong>."
            ],
            'benefits': [
                "A competitive compensation package is offered.",
                "Benefits may be available based on employment type.",
                "This role offers competitive pay."
            ],
            'value_props': [
                "Flexible contract/gig opportunity in {city}.",
                "Set your own schedule.",
                "Project-based work with clear objectives.",
                "Join our {city} team for this flexible role."
            ]
        }
    }
}


HIRING_SYNS = [
    "Hiring", "Immediate Start", "Now Hiring", "Apply Today", "Urgent Hire",
    "Join Team", "Recruiting", "Apply Now"
]
ENTRY_SYNS = ["Entry Level", "No Experience", "Junior Role", "Graduate Role", "Training Provided"]
URGENCY_TAGS_NEW_TITLE = ["New", "Just Posted", "Recent Job"]
URGENCY_TAGS_CLOSING_TITLE = ["Apply Soon", "Hiring Now", "Urgent"]

# ==================================================
# NEW: Job-Type Themed Title Templates (CONCISE)
# ==================================================
GEO_FOCUSED_TITLE_TEMPLATES = {
    "FULL_TIME": [
        "{city} Full-Time: {role}",
        "{role} (Full-Time) - {city}",
        "{role} Career - {city}, {state}",
        "{urgency_tag}: {role} FT ({city})",
        "{role} (FT) - {city}, {state}",
        "{city} Career: {role}",
        "{role} | Full-Time | {city}",
        "{hiring}: {role} (Full-Time, {city})",
        "{city} {role} (Full-Time)",
        "{experience_tag} {role} (FT) - {city}"
    ],
    "PART_TIME": [
        "{city} Part-Time: {role}",
        "{role} (PT) - {city}",
        "{city} PT Opening: {role}",
        "Flexible PT {role} ({city})",
        "{role} (Part-Time) - {city}, {state}",
        "{urgency_tag}: {role} PT ({city})",
        "{role} | Part-Time | {city}",
        "{hiring}: {role} (Part-Time, {city})",
        "{city} {role} (Part-Time)",
        "{experience_tag} {role} (PT) - {city}"
    ],
    "FLEXIBLE": [
        "{city} Contract: {role}",
        "{role} (Gig) - {city}",
        "Flexible {role} ({city})",
        "{role} (Contract) - {city}, {state}",
        "{urgency_tag}: {role} (Contract, {city})",
        "{role} | Flexible | {city}",
        "{city} {job_type} Role: {role}",
        "{hiring}: {role} (Flexible, {city})",
        "{city} {role} (Gig)",
        "{experience_tag} {role} (Contract) - {city}"
    ]
}
# --- END TEMPLATE CUSTOMIZATION ---


JOB_SEO_KEYWORDS = [
    "hiring", "apply now", "urgent", "career", "benefits", "salary", "immediate"
]

SECTION_KEYWORDS_MAP = {
    "responsibilities": ["key responsibilities", "responsibilities", "your role", "what you'll do", "duties", "main duties", "primary accountabilities"],
    "requirements": ["requirements", "qualifications", "essential skills", "your qualifications", "must-have qualifications", "to succeed you'll need", "your profile"],
    "skills": ["skills", "technical skills", "soft skills", "key competencies"],
    "benefits": ["benefits", "perks", "what we offer", "why join us"],
    "experience": ["experience", "professional background", "experience level"],
    "incentives": ["incentives", "incentive compensation", "bonus", "commission"],
    "workhours": ["work hours", "hours of work", "schedule"]
}

# ==========================
# Argparse
# ==========================

def parse_args():
    p = argparse.ArgumentParser(description="Optimize Local JobPosting NDJSON with a strong location-centric focus.")
    p.add_argument('-i','--input', default='all-schemas.ndjson', help='Input NDJSON file')
    p.add_argument('-o','--output', default='schema.ndjson', help='Output NDJSON file')
    p.add_argument('--seed', type=int, default=None, help='Seed for reproducible randomness')
    p.add_argument('--logo_cdn', default=DESIRED_DEFAULT_FALLBACK_LOGO_URL, help='Default fallback logo URL')
    p.add_argument('--currency', default=DEFAULT_CURRENCY, help='Default currency')
    p.add_argument('-v','--verbose', action='store_true', help='Enable debug logging')
    p.add_argument('--full_time_hours', type=int, default=DEFAULT_FULL_TIME_HOURS_PER_WEEK, help="Standard hours for full-time salary conversions.")
    p.add_argument('--part_time_hours', type=int, default=DEFAULT_PART_TIME_HOURS_PER_WEEK, help="Standard hours for part-time salary conversions.")
    p.add_argument('--enable_salary_adjustment', action='store_true', help='Enable dynamic salary value adjustments (random increase/decrease/no change).')
    
    # --- NEW: Mutually exclusive group for title operations ---
    title_group = p.add_mutually_exclusive_group()
    title_group.add_argument('--no-change-in-title', action='store_true', help='Do not improve the job title; keep the original (still optimizes description, etc.).')
    title_group.add_argument('--slightly-improve-title', action='store_true', help='Apply only minor, geo-focused improvements to the original title and leave all other fields unchanged.')
    # --- END NEW ---
    
    return p.parse_args()

# ==========================
# Small utilities
# (No changes in this section)
# ==========================

def title_case(s: str) -> str:
    if not s:
        return ""
    words = [w.capitalize() if not (w.isupper() and len(w) > 1) else w for w in s.split()]
    return " ".join(words)

def to_dhaka_offset(dt: str) -> str:
    if not dt:
        return ""
    try:
        parsed_dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
        dhaka_tz = timezone(timedelta(hours=6))
        return parsed_dt.astimezone(dhaka_tz).isoformat()
    except ValueError:
        logging.warning(f"Invalid date format for Dhaka offset: {dt}. Returning original.")
        return dt

def to_midnight(dt_str: str) -> str:
    if not dt_str:
        return ""
    try:
        dt_obj = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        target_tz = dt_obj.tzinfo or timezone(timedelta(hours=6))
        dt_obj_midnight = dt_obj.astimezone(target_tz).replace(hour=23, minute=59, second=59, microsecond=0)
        return dt_obj_midnight.isoformat()
    except (IndexError, ValueError) as e:
        logging.warning(f"Date parse error for to_midnight: {dt_str}. Error: {e}. Returning original.")
        return dt_str

def normalize_url(url: str) -> str:
    if not url:
        return ""
    url = url.split('?',1)[0].split('#',1)[0]
    if url.startswith('//'):
        url = 'https://' + url[2:]
    elif url.startswith('http://'):
        url = 'https://' + url[7:]
    elif not url.startswith('https://'):
        url = 'https://' + url.lstrip('/')
    if not re.search(r'\.\w{2,5}(?:/)?$', url.split('/')[-1]) and not url.endswith('/'):
        url += '/'
    return url

def get_currency_symbol(currency_code: str) -> str:
    symbols = {"USD":"$", "EUR":"€", "GBP":"£", "JPY":"¥", "CAD":"CA$", "AUD":"A$", "INR":"₹", "BDT":"৳"}
    return symbols.get(str(currency_code).upper(), (str(currency_code) + " ") if currency_code else "$")

# ==========================
# Intelligence helpers
# ==========================

def get_primary_skill(skills_value) -> str:
    if not skills_value:
        return ""
    processed = ""
    if isinstance(skills_value, str):
        processed = skills_value
    elif isinstance(skills_value, list):
        string_skills = []
        for item in skills_value:
            if isinstance(item, str) and item.strip():
                string_skills.append(item.strip())
            elif item is not None:
                try:
                    s_item = str(item).strip()
                    string_skills.append(s_item)
                except Exception:
                    pass
        processed = ", ".join(string_skills)
    else:
        try:
            processed = str(skills_value).strip()
        except Exception:
            return ""
    if not processed:
        return ""
    try:
        return next((s.strip() for s in re.split(r'[,;/]', processed) if s.strip()), "")
    except TypeError:
        return ""


def clean_text_to_list(text_content: str) -> list[str]:
    if not text_content:
        return []
    soup = BeautifulSoup(text_content, 'html.parser')
    return [li.get_text(separator=' ', strip=True) for li in soup.find_all('li') if li.get_text(strip=True)] or \
           [p.get_text(separator=' ', strip=True) for p in soup.find_all('p') if p.get_text(strip=True)] or \
           ([soup.get_text(strip=True)] if soup.get_text(strip=True) else [])


def get_location_details(rec: dict) -> tuple[str, str, str]:
    job_loc_data = rec.get('jobLocation')
    city, state, country = "", "", "US"
    if isinstance(job_loc_data, dict):
        address_data = job_loc_data.get('address')
        if isinstance(address_data, dict):
            city = address_data.get('addressLocality', '')
            state = address_data.get('addressRegion', '')
            country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    elif isinstance(job_loc_data, list) and job_loc_data:
        first_loc = job_loc_data[0]
        if isinstance(first_loc, dict):
            address_data = first_loc.get('address')
            if isinstance(address_data, dict):
                city = address_data.get('addressLocality', '')
                state = address_data.get('addressRegion', '')
                country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    if not city and state:
        city = state
    elif not city and not state:
        logging.debug(f"Job ID {rec.get('@id', 'Unknown')}: Missing addressLocality and addressRegion.")
    return str(city), str(state), str(country)


# --- UPDATED FUNCTION ---
def get_employment_types_info(rec: dict) -> dict:
    et_input = rec.get('employmentType')
    normalized_types = []
    if isinstance(et_input, list):
        for item in et_input:
            if isinstance(item, str) and item.strip():
                norm_item = item.replace('_', '-').strip().lower()
                normalized_types.append(title_case(norm_item.replace(" time", "-Time")))
    elif isinstance(et_input, str) and et_input.strip():
        norm_item = et_input.replace('_', '-').strip().lower()
        normalized_types.append(title_case(norm_item.replace(" time", "-Time")))

    unique_types = sorted(list(set(normalized_types)))
    chosen_for_title, chosen_for_description = "Flexible", "Flexible"
    
    # --- NEW: Determine template_key ---
    template_key = "FLEXIBLE" # Default
    if "Full-Time" in unique_types and len(unique_types) == 1:
        template_key = "FULL_TIME"
    elif "Part-Time" in unique_types and len(unique_types) == 1:
        template_key = "PART_TIME"
    # --- END NEW ---

    if not unique_types:
        schema_list = ["OTHER"]
    else:
        is_full_time = "Full-Time" in unique_types
        is_part_time = "Part-Time" in unique_types
        if is_full_time and is_part_time:
            chosen_for_title, chosen_for_description = "Full/Part-Time", "Full-Time"
        elif len(unique_types) == 1:
            chosen_for_title, chosen_for_description = unique_types[0], unique_types[0]
        else:
            # Prioritize FT or PT if they exist, otherwise pick one
            chosen = "Full-Time" if is_full_time else ("Part-Time" if is_part_time else random.choice(unique_types))
            chosen_for_title, chosen_for_description = chosen, chosen
        
        schema_list = [t.upper().replace('-', '_') for t in unique_types]

    return {
        'all_available_display': unique_types or ["Flexible"],
        'title_display': chosen_for_title,
        'chosen_for_description': chosen_for_description,
        'schema_list': schema_list,
        'template_key': template_key # NEW KEY
    }
# --- END UPDATED FUNCTION ---


def get_industries_info(rec: dict) -> dict:
    industry_input = rec.get('industry')
    processed_industries = []
    if isinstance(industry_input, list):
        for item in industry_input:
            if isinstance(item, str) and item.strip():
                processed_industries.append(title_case(item.replace('&', 'and').strip()))
    elif isinstance(industry_input, str) and industry_input.strip():
        processed_industries.append(title_case(industry_input.replace('&', 'and').strip()))
    unique_industries = sorted(list(set(processed_industries)))
    if not unique_industries:
        return {'display_list': [], 'title_display': "", 'schema_list': []}
    return {
        'display_list': unique_industries,
        'title_display': random.choice(unique_industries),
        'schema_list': unique_industries
    }


def get_experience_level_info(rec: dict) -> dict:
    exp_req = rec.get("experienceRequirements", {})
    months_exp = None
    level_tag = ""
    description_tag = ""
    if isinstance(exp_req, dict):
        months_str = exp_req.get("monthsOfExperience")
        desc_str = exp_req.get("description", "").lower()
        if months_str is not None:
            try:
                months_exp = int(months_str)
            except (ValueError, TypeError):
                pass
        if months_exp is not None:
            for level, (min_m, max_m) in EXPERIENCE_LEVEL_MONTHS.items():
                if min_m <= months_exp <= max_m:
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        else:
            for level, keywords in EXPERIENCE_LEVEL_KEYWORDS.items():
                if any(kw in desc_str for kw in keywords):
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        if not level_tag and any(syn.lower() in desc_str for syn in ENTRY_SYNS):
            description_tag, level_tag = "Entry-Level", "Entry"
    return {"title_tag": level_tag, "description_tag": description_tag, "months": months_exp}


def get_job_urgency_tags(date_posted_str: str, valid_through_str: str, rec_id: str) -> dict:
    urgency = {"title_tag": "", "description_key": None}
    now_utc = datetime.now(timezone.utc)
    if date_posted_str:
        try:
            posted_dt = datetime.fromisoformat(date_posted_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if (now_utc - posted_dt).days <= RECENT_POST_DAYS:
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_NEW_TITLE), "description_key": "new"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing datePosted '{date_posted_str}': {e}")
    if valid_through_str:
        try:
            valid_dt = datetime.fromisoformat(valid_through_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if timedelta(days=0) <= (valid_dt - now_utc) <= timedelta(days=CLOSING_SOON_DAYS):
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_CLOSING_TITLE), "description_key": "closing"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing validThrough '{valid_through_str}': {e}")
    return urgency


def to_k_notation(num_val: float, currency_symbol: str) -> str:
    if abs(num_val) >= 1000:
        k_val = num_val / 1000.0
        return f"{currency_symbol}{k_val:.1f}k".replace(".0k", "k")
    return f"{currency_symbol}{int(num_val)}"


def format_salary_details(rec: dict, currency_symbol: str = "$", enable_dynamic_adjustment: bool = False,
                          chosen_emp_type: str = "Full-Time", full_time_hours: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                          part_time_hours: int = DEFAULT_PART_TIME_HOURS_PER_WEEK) -> dict:
    base = rec.get('baseSalary', {})
    val_obj = base.get('value', {})
    if not isinstance(base, dict):
        base = {}
    if not isinstance(val_obj, dict):
        val_obj = {}

    minv_raw = val_obj.get('minValue', base.get('minValue'))
    maxv_raw = val_obj.get('maxValue', base.get('maxValue'))
    unit_raw = str(val_obj.get('unitText', base.get('unitText', ''))).upper()
    primary_unit_normalized = NORMALIZED_SALARY_UNITS.get(unit_raw, "PROJECT")

    def parse_salary_value(s_val):
        if s_val is None:
            return None
        if isinstance(s_val, (int, float)):
            return float(s_val)
        if isinstance(s_val, str):
            s_val_cleaned = str(s_val).replace(currency_symbol, '').replace(',', '').strip()
            if "negotiable" in s_val_cleaned.lower() or not s_val_cleaned:
                return "Negotiable"
            try:
                return float(s_val_cleaned)
            except ValueError:
                return None
        return None

    min_val_num = parse_salary_value(minv_raw)
    max_val_num = parse_salary_value(maxv_raw)

    if min_val_num == "Negotiable" or max_val_num == "Negotiable":
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": 1.0}
    if min_val_num is None and max_val_num is None:
        return {"primary_display": "", "is_negotiable": False, "conversions": {}, "adjusted_factor": 1.0}

    adjustment_factor = 1.0
    if enable_dynamic_adjustment:
        adjustment_factor = random.choice(WEIGHTED_SALARY_ADJUSTMENT_FACTORS)
        if isinstance(min_val_num, (int, float)):
            min_val_num *= adjustment_factor
        if isinstance(max_val_num, (int, float)):
            max_val_num *= adjustment_factor
        if isinstance(min_val_num, (int, float)) and isinstance(max_val_num, (int, float)) and min_val_num > max_val_num:
            min_val_num, max_val_num = max_val_num, min_val_num

    primary_value_for_conversion, primary_display_val = None, "Error"
    if min_val_num is not None and max_val_num is not None:
        primary_value_for_conversion = (min_val_num + max_val_num) / 2.0
        primary_display_val = f"{to_k_notation(min_val_num, currency_symbol)}-{to_k_notation(max_val_num, currency_symbol)}"
    elif max_val_num is not None:
        primary_value_for_conversion, primary_display_val = max_val_num, f"Up to {to_k_notation(max_val_num, currency_symbol)}"
    elif min_val_num is not None:
        primary_value_for_conversion, primary_display_val = min_val_num, to_k_notation(min_val_num, currency_symbol)
    else:
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": adjustment_factor}

    primary_unit_display = SALARY_UNIT_MAP.get(primary_unit_normalized, "")
    primary_salary_str = f"{primary_display_val}{'/' + primary_unit_display if primary_unit_display and primary_unit_display != 'Project' else ''}"

    conversions, converted_values_num = {}, {}
    hours_per_week = part_time_hours if "part-time" in chosen_emp_type.lower() else full_time_hours

    if primary_value_for_conversion is not None and primary_unit_normalized != "PROJECT":
        annual_equiv = None
        if primary_unit_normalized == "HOUR":
            annual_equiv = primary_value_for_conversion * hours_per_week * WEEKS_PER_YEAR
        elif primary_unit_normalized == "WEEK":
            annual_equiv = primary_value_for_conversion * WEEKS_PER_YEAR
        elif primary_unit_normalized == "MONTH":
            annual_equiv = primary_value_for_conversion * MONTHS_PER_YEAR
        elif primary_unit_normalized == "YEAR":
            annual_equiv = primary_value_for_conversion

        if annual_equiv is not None:
            converted_values_num.update({
                "YEAR": annual_equiv,
                "MONTH": annual_equiv / MONTHS_PER_YEAR,
                "WEEK": annual_equiv / WEEKS_PER_YEAR
            })
            if hours_per_week > 0:
                converted_values_num["HOUR"] = (annual_equiv / WEEKS_PER_YEAR) / hours_per_week
            for unit, val in converted_values_num.items():
                if unit != primary_unit_normalized:
                    conversions[unit] = f"{to_k_notation(val, currency_symbol)}/{SALARY_UNIT_MAP.get(unit, '')}"

    return {
        "primary_display": primary_salary_str,
        "primary_raw_min": min_val_num,
        "primary_raw_max": max_val_num,
        "primary_unit_normalized": primary_unit_normalized,
        "is_negotiable": False,
        "conversions": conversions,
        "converted_raw": converted_values_num,
        "adjusted_factor": adjustment_factor
    }

# ==========================
# Content assembly & enrichment
# (Refactored for Geo-Focus)
# ==========================

def clean_role_and_company(original_title: str, org_name_from_ho: str) -> tuple[str, str]:
    org_name = str(org_name_from_ho or "").strip()
    role = re.sub(r'\s*\(.*?[mfvdix].*?\)\s*', '', str(original_title), flags=re.IGNORECASE).strip()
    role = re.sub(r"\s+jobs?\b", "", role, flags=re.IGNORECASE).strip()
    company, final_role = org_name, role

    if not company:
        preps = ["at", "for", "with"]
        for prep in preps:
            match = re.search(rf"^(.*?)\s+{re.escape(prep)}\s+([\w\s.,'&()-]+)$", role, flags=re.IGNORECASE)
            if match and 2 <= len(match.group(2).split()) <= 5:
                company, final_role = match.group(2).strip(), match.group(1).strip()
                break

    if company:
        final_role = re.sub(rf"\s*\b{re.escape(company)}\b", "", final_role, flags=re.IGNORECASE).strip(" -|,")

    if not company:
        company = "A Leading Local Company"
    if not final_role:
        final_role = "Associate"
    return final_role.strip(), company.strip()


def geo_context_enrichment(html_block: str, role: str, company: str, primary_skill: str, city: str, state: str, industry_display: str) -> str:
    """NEW: Minimal enrichment layer to add location-specific context.
    """
    soup = BeautifulSoup(html_block or "", 'html.parser')
    text = soup.decode_contents() if html_block else ""

    # Controlled synonym map
    synonyms = {
        r"\bcompany\b": "organization",
        r"\bemployees\b": "team members",
        r"\bsalary\b": "compensation",
        r"\bjob\b": "role",
        r"\bexperience\b": "background",
        r"\bbenefits\b": "perks"
    }
    for pat, repl in synonyms.items():
        text = re.sub(pat, repl, text, flags=re.IGNORECASE)

    # NEW: Geo-Context stitches
    stitches = [
        f"<li>This <strong>{city}</strong>-based role is an excellent opportunity for professionals skilled in <strong>{primary_skill or 'relevant skills'}</strong>.</li>",
        f"<li>Our <strong>{company}</strong> team in {city}, {state} is growing.</li>",
        f"<li>Benefit from working in <strong>{city}</strong>, a key hub for the <strong>{industry_display}</strong> industry.</li>"
    ]

    # Insert stitches under first UL; else create a fresh list
    ul = soup.find('ul')
    if ul:
        for item in stitches:
            ul.append(BeautifulSoup(item, 'html.parser'))
    else:
        ul_new = BeautifulSoup("<ul></ul>", 'html.parser')
        for item in stitches:
            ul_new.ul.append(BeautifulSoup(item, 'html.parser'))
        soup.append(ul_new)

    return soup.decode_contents()


# --- UPDATED FUNCTION ---
def create_geo_targeted_summary(rec: dict, primary_skill: str, salary_details: dict, job_urgency: dict, exp_level_info: dict, industries_info: dict, emp_types_info: dict) -> str:
    """
    NEW: Builds an SEO summary with a strong location-first emphasis.
    NOW: Uses Job-Type Themed templates.
    """
    role_for_summary, company_for_summary = clean_role_and_company(rec.get('title',''), rec.get('hiringOrganization',{}).get('name',''))
    city, state, _ = get_location_details(rec)

    # --- NEW: Select template bank based on job type ---
    emp_type_key = emp_types_info.get('template_key', 'FLEXIBLE')
    tone_bank = GEO_TONE_TEMPLATES.get(emp_type_key, GEO_TONE_TEMPLATES['FLEXIBLE'])
    active = tone_bank['informational']
    # --- END NEW ---

    salary_primary_display = salary_details.get("primary_display", "Negotiable") if salary_details else "Negotiable"
    converted_salary_suffix = ""
    if salary_details and salary_details.get("conversions"):
        best_conv = salary_details["conversions"].get("YEAR") or random.choice(list(salary_details["conversions"].values()))
        converted_salary_suffix = f" (approx. {best_conv})"

    data = {
        "role_for_summary": title_case(role_for_summary),
        "company_for_summary": company_for_summary,
        "city": city,
        "state": state,
        "local_area_info": random.choice(LOCAL_AREA_DESCRIPTORS).format(city=city),
        "salary_primary_display": salary_primary_display,
        "converted_salary_suffix": converted_salary_suffix,
        "primary_skill": primary_skill or "your professional skills",
        "industry_display": (industries_info['display_list'][0] if industries_info and industries_info.get('display_list') else "a dynamic")
    }

    def phrase(key):
        return random.choice(active[key]).format(**data)

    job_details = [phrase('intro'), phrase('company'), phrase('location')]

    urgency_key = job_urgency.get("description_key")
    if urgency_key:
        job_details.append(phrase(f'urgency_{urgency_key}'))

    exp_level = exp_level_info.get("description_tag")
    if exp_level == "Entry-Level":
        job_details.append(phrase('experience_entry'))
    elif exp_level == "Mid-Level":
        job_details.append(phrase('experience_mid'))
    elif exp_level == "Senior-Level":
        job_details.append(phrase('experience_senior'))

    comp_benefits = [phrase('salary'), f"<strong>Benefits:</strong> {random.choice(active['benefits'])}"]
    value_props = [f"<em>{random.choice(active['value_props']).format(city=city)}</em>"]
    role_focus = [random.choice(active['focus']).format(**data)]

    summary_html = f"<h3>{random.choice(GEO_HEADER_TEMPLATES).format(city=city, state=state)}</h3>"
    structure_choice = random.randint(1, 3)
    if structure_choice == 1:
        random.shuffle(job_details)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in job_details) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['secondary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in (comp_benefits + value_props)) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in role_focus) +
            "</ul>"
        )
    elif structure_choice == 2:
        all_parts = job_details + comp_benefits + value_props + role_focus
        random.shuffle(all_parts)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in all_parts) +
            "</ul>"
        )
    else:
        summary_html += f"<p>{' '.join(job_details)}</p>"
        summary_html += f"<p>{' '.join(comp_benefits + value_props)}</p>"
        summary_html += f"<h5>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h5><p>{role_focus[0]}</p>"

    # Enrich with stitched context and synonyms
    summary_html = geo_context_enrichment(
        summary_html,
        role=title_case(role_for_summary),
        company=company_for_summary,
        primary_skill=primary_skill,
        city=city,
        state=state,
        industry_display=data["industry_display"]
    )

    return BeautifulSoup(summary_html, 'html.parser').decode_contents().strip()
# --- END UPDATED FUNCTION ---

# ==========================
# Title building & SEO post-processing
# (Refactored for Geo-Focus)
# ==========================

def enforce_length(title: str, max_len: int) -> str:
    if len(title) <= max_len:
        return title.strip()
    shortened = re.sub(r'\s*\([^)]*\)\s*$', '', title).strip()
    if len(shortened) <= max_len:
        return shortened
    while len(title) > max_len:
        parts = title.rsplit(' ', 1)
        if len(parts) > 1:
            title = parts[0]
        else:
            return title[:max_len-3].strip() + "..."
    return title.strip(" -|,( ")


def enrich_title_for_seo(title: str) -> str:
    """Append one missing, high-intent keyword if space allows; avoid stuffing."""
    t = title
    for kw in JOB_SEO_KEYWORDS:
        if kw.lower() not in t.lower() and len(t) <= 60:  # keep margin
            t = f"{t} | {kw.title()}"
            break
    return t


# --- UPDATED FUNCTION ---
def generate_location_focused_title(rec: dict, primary_skill: str, salary_details: dict, job_urgency:dict,
                                    exp_level_info:dict, emp_types_info:dict, industries_info:dict,
                                    dynamic_max_len:int) -> str:
    """
    NEW: Generates SEO title using the GEO_FOCUSED_TITLE_TEMPLATES list.
    NOW: Uses Job-Type Themed, *CONCISE* templates.
    """
    ho_name = rec.get('hiringOrganization', {}).get('name', '')
    cleaned_role, company_name = clean_role_and_company(rec.get('title', rec.get('name','')), ho_name)
    city, state, _ = get_location_details(rec)

    # Salary, company, and industry are no longer used in title templates
    # to keep them short, but we'll leave the code here.
    salary_fmt = ""
    if salary_details and salary_details.get("primary_display") and not salary_details.get("is_negotiable"):
        salary_fmt = salary_details["primary_display"]
        prim_unit = salary_details["primary_unit_normalized"]
        conv = salary_details.get("conversions", {})
        if prim_unit == "HOUR" and "YEAR" in conv:
            salary_fmt = f"{salary_details['primary_display'].split('/')[0]}/hr"

    parts = {
        "role": title_case(cleaned_role),
        "company": title_case(company_name), # No longer in default templates
        "city": title_case(city),
        "state": state.upper(),
        "job_type": emp_types_info.get('title_display', ""), # e.g., "Full-Time" or "Full/Part-Time"
        "hiring": random.choice(HIRING_SYNS),
        "urgency_tag": job_urgency.get('title_tag', ''),
        "experience_tag": exp_level_info.get('title_tag', ''),
        "salary_compact": salary_fmt if "Negotiable" not in salary_fmt else "", # No longer in default templates
        "skill1": title_case(primary_skill),
        "industry": industries_info.get('title_display', "") # No longer in default templates
    }

    # --- NEW: Select template bank based on job type ---
    emp_type_key = emp_types_info.get('template_key', 'FLEXIBLE')
    template_list = GEO_FOCUSED_TITLE_TEMPLATES.get(emp_type_key, GEO_FOCUSED_TITLE_TEMPLATES['FLEXIBLE'])
    tmpl = random.choice(template_list)
    # --- END NEW ---
    
    # Filter out empty parts to avoid "Role in ()"
    parts_filled = {k: v for k, v in parts.items() if v}
    
    # Simple check to find a template that fits the available data
    # This is a basic way to avoid templates with many missing keys
    attempts = 0
    while any(f"{{{k}}}" in tmpl for k in parts.keys() if k not in parts_filled) and attempts < 10:
        # --- NEW: Get template from the correct list ---
        tmpl = random.choice(template_list)
        # --- END NEW ---
        attempts += 1
        if attempts > 5: # Be less strict if we cant find a perfect match
             if "{" + random.choice(list(parts.keys())) + "}" in tmpl:
                 break # Just pick one

    title = tmpl.format(**parts)
    title = re.sub(r'\s{2,}', ' ', title).strip()
    title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', title)
    title = title.replace('()', '').replace('[]', '').strip(" -|,: •—")
    title = re.sub(r'\s*-\s*-\s*', '-', title) # Clean up double dashes
    title = re.sub(r'\s*•\s*•\s*', '•', title) # Clean up double bullets
    title = title.strip(" -|,: •—")

    # Note: enrich_title_for_seo is removed to keep titles shorter
    # title = enrich_title_for_seo(title) 
    return enforce_length(title, dynamic_max_len)
# --- END UPDATED FUNCTION ---


def generate_slight_title_improvement(original_title: str, city: str, state: str, urgency_tag: str, max_len: int) -> str:
    """
    Applies a minor, additive improvement to the original job title,
    as requested for the --slightly-improve-title mode.
    Uses a dynamic, *prioritized* template list for more intelligent variety.
    (This function is NOT themed by job type, as it's a minimal-change operation)
    """
    title = original_title.strip()
    
    # 1. More gentle cleanup: just remove trailing separators
    cleaned_title = title.strip(" -|,: •—")
    
    title_lower = cleaned_title.lower()
    
    # 2. Check what information is *already* present
    has_city = city and city.lower() in title_lower
    # Check for state abbreviation, avoiding spaces (e.g., "NY" in "JobNY")
    has_state = state and state.upper() in re.sub(r'[^A-Z]', '', title)
    has_urgency = urgency_tag and urgency_tag.lower() in title_lower
    
    # 3. Define prioritized template buckets
    # {title} is the cleaned original title
    template_buckets = {
        'urgent_geo': [
            "{urgency_tag}: {title} ({city})",
            "{title} - {city}, {state} ({urgency_tag})",
            "{title} ({city}) - {urgency_tag}",
        ],
        'geo': [
            "{title} - {city}, {state}",
            "{title} | {city}, {state}",
            "{title} in {city}",
            "{city} Opening: {title}",
            "{title} ({city})",
        ],
        'urgent': [
            "{title} ({urgency_tag})",
            "{title} - {urgency_tag}",
            "{urgency_tag}: {title}",
            "{title} | {urgency_tag}",
        ],
        'state_only': [
             "{title} ({state} Opening)",
             "{title} - {state}",
        ]
    }

    parts = {
        "title": cleaned_title,
        "city": title_case(city),
        "state": state.upper(),
        "urgency_tag": urgency_tag,
    }

    # 4. Define which buckets to try, in order of priority
    priority_order = []
    
    # Build priority list based on *new* information we can add
    can_add_geo = city and not has_city
    can_add_urgency = urgency_tag and not has_urgency
    # Only use state-only if city isn't available/used
    can_add_state = state and not has_state and not can_add_geo 
    
    if can_add_urgency and can_add_geo:
        priority_order.append('urgent_geo')
    
    if can_add_geo:
        priority_order.append('geo')
        
    if can_add_urgency:
        priority_order.append('urgent')
        
    if can_add_state:
        priority_order.append('state_only')

    # 5. Iterate through prioritized buckets
    for bucket_name in priority_order:
        templates = template_buckets[bucket_name]
        random.shuffle(templates) # Shuffle templates *within* the priority bucket
        
        for tmpl in templates:
            # Check if all placeholders for *this* template are available
            placeholders = re.findall(r'\{([^{}]+)\}', tmpl)
            if not all(parts.get(p) for p in placeholders if p != 'title'): # 'title' is always present
                continue # Skip template if data is missing (e.g., missing {state} for a {city}, {state} template)
            
            new_title = tmpl.format(**parts)
            
            # Clean up potential formatting issues
            new_title = re.sub(r'\s{2,}', ' ', new_title).strip()
            new_title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', new_title)
            new_title = new_title.replace('()', '').replace('[]', '').strip(" -|,: •—")
            
            if len(new_title) <= max_len:
                return enforce_length(new_title, max_len) # Found a good one that fits

    # 6. If no template was applied, return the cleaned original title
    return enforce_length(cleaned_title, max_len)


# ==========================
# Description assembler
# (Refactored for Geo-Focus)
# ==========================

# --- UPDATED FUNCTION ---
def assemble_location_focused_description(html_str: str, rec: dict, primary_skill: str = "", salary_details:dict = None,
                                          job_urgency:dict=None, exp_level_info:dict=None, industries_info:dict=None,
                                          emp_types_info:dict=None) -> str: # <-- NEW ARG
    """
    NEW: Assembles description using the geo-targeted summary function.
    NOW: Passes emp_types_info to the summary function for theming.
    """
    original_html_content = html_str or ""
    original_was_empty = not original_html_content.strip() or original_html_content.strip().lower() in ["<p>no description provided.</p>"]

    # Use the new geo-targeted summary function
    seo_summary_html = create_geo_targeted_summary(
        rec, primary_skill, salary_details, 
        job_urgency, exp_level_info, industries_info,
        emp_types_info # <-- Pass arg
    )

    final_description = seo_summary_html if original_was_empty else seo_summary_html + "<hr><br>" + original_html_content

    if CONTACT_CTA:
        final_description += f"<p>{CONTACT_CTA}</p>"
    return final_description
# --- END UPDATED FUNCTION ---


# ==========================
# Hiring org normalization
# (No changes in this section)
# ==========================

def normalize_hiring_org(org: dict, logo_cdn: str) -> dict:
    if not isinstance(org, dict):
        return {}
    sa = org.get('sameAs', '')
    if sa and isinstance(sa, str) and not sa.startswith(('http://', 'https://')):
        org['sameAs'] = 'https://' + sa.lstrip('/')
    if not (isinstance(org.get('logo'), str) and org['logo'].startswith(('http://', 'https://'))):
        org['logo'] = logo_cdn
    return org


# ==========================
# Core pipeline
# (Refactored for Geo-Focus)
# ==========================
ARGS = None

def rewrite_geotargeted_job_records(in_path: str, out_path: str, seed: int=None, logo_cdn: str = DESIRED_DEFAULT_FALLBACK_LOGO_URL,
                                    default_currency_arg: str = DEFAULT_CURRENCY, enable_salary_adj_arg: bool = False,
                                    full_time_hours_arg: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                                    part_time_hours_arg: int = DEFAULT_PART_TIME_HOURS_PER_WEEK,
                                    no_change_in_title_arg: bool = False,
                                    slightly_improve_title_arg: bool = False):
    
    logging.info(f"Optimizing Geo-Targeted Jobs: {in_path} -> {out_path} with seed {seed}")
    if no_change_in_title_arg:
        logging.info("Running with --no-change-in-title. Original titles will be preserved.")
    if slightly_improve_title_arg:
        logging.info("Running with --slightly-improve-title. ONLY titles will be modified; all other fields preserved.")

    # Adaptive stats (Contextual Behavior Simulation)
    stats = {"total": 0, "title_len_sum": 0, "desc_len_sum": 0}

    try:
        with open(in_path, 'r', encoding='utf-8') as fin_check:
            num_lines = sum(1 for line in fin_check if line.strip())
        if num_lines == 0:
            logging.warning(f"Input file '{in_path}' is empty.")
            open(out_path, 'w').close()
            return

        with open(in_path, 'r', encoding='utf-8') as fin, open(out_path, 'w', encoding='utf-8') as fout:
            for line in tqdm(fin, total=num_lines, desc="Processing Geo-Targeted Jobs"):
                if not line.strip():
                    continue
                try:
                    rec = json.loads(line)
                except json.JSONDecodeError as e:
                    logging.warning(f"Bad JSON: {e}. Line: {line[:70]}...")
                    continue
                if not isinstance(rec, dict):
                    logging.warning("Line is not a JSON object, skipping.")
                    continue

                jid = rec.get('@id') or rec.get('url') or hashlib.sha256(line.encode()).hexdigest()
                random.seed(hash(str(jid) + str(seed)))

                # --- NEW LOGIC BRANCH for --slightly-improve-title ---
                if slightly_improve_title_arg:
                    original_title = rec.get('title', rec.get('name', ''))
                    if not original_title:
                        fout.write(line) # Write original line if no title
                        continue

                    # Get JUST enough info for the slight improvement
                    city, state, _ = get_location_details(rec)
                    job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                    
                    # Calculate dynamic max length (copied from existing logic)
                    dynamic_max_len = MAX_TITLE_LEN_BASE
                    if stats["total"] >= max(10, num_lines // 2):
                        avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                        if avg_title_len < 52: dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                        elif avg_title_len > 72: dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                    # Generate the new title using the new "slight improvement" function
                    new_title = generate_slight_title_improvement(
                        original_title, city, state, 
                        job_urgency.get('title_tag', ''), 
                        dynamic_max_len
                    )
                    
                    rec['title'] = new_title # Update the title
                    
                    # Stats tracking (minimal)
                    stats["total"] += 1
                    stats["title_len_sum"] += len(new_title)
                    # DO NOT track description length or other changes

                    fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")
                    continue # IMPORTANT: Skip all other processing
                # --- END OF NEW LOGIC BRANCH ---


                # --- FULL PROCESSING LOGIC (only runs if --slightly-improve-title is OFF) ---

                # Extract & Normalize
                primary_skill = get_primary_skill(rec.get('skills',''))
                emp_types_info = get_employment_types_info(rec) # <-- Contains the new 'template_key'
                industries_info = get_industries_info(rec)
                exp_level_info = get_experience_level_info(rec)

                if rec.get('datePosted'):
                    rec['datePosted'] = to_dhaka_offset(rec['datePosted'])
                if rec.get('validThrough'):
                    rec['validThrough'] = to_midnight(rec['validThrough'])

                job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                curr_code = rec.get('baseSalary', {}).get('currency', default_currency_arg) or default_currency_arg
                curr_symbol = get_currency_symbol(curr_code)
                salary_details = format_salary_details(
                    rec, curr_symbol, enable_salary_adj_arg,
                    emp_types_info['chosen_for_description'],
                    full_time_hours_arg, part_time_hours_arg
                )

                # --- UPDATED CALL ---
                # Description (prepend geo-targeted semantic SEO summary)
                rec['description'] = assemble_location_focused_description(
                    rec.get('description',''), rec, primary_skill,
                    salary_details, job_urgency, exp_level_info, industries_info,
                    emp_types_info # <-- Pass new arg for theming
                )
                # --- END UPDATED CALL ---

                # Title (with adaptive max length)
                dynamic_max_len = MAX_TITLE_LEN_BASE
                # Light adaptive tuning after half the dataset processed
                if stats["total"] >= max(10, num_lines // 2):
                    avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                    if avg_title_len < 52:
                        dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                    elif avg_title_len > 72:
                        dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                if no_change_in_title_arg:
                    logging.debug(f"JID {jid}: Keeping original title.")
                else:
                    # This call now automatically uses the themed templates
                    # because emp_types_info contains the 'template_key'
                    rec['title'] = generate_location_focused_title(
                        rec, primary_skill, salary_details, job_urgency,
                        exp_level_info, emp_types_info, industries_info,
                        dynamic_max_len
                    )

                # Employment & Industry for schema
                rec['employmentType'] = emp_types_info['schema_list'] or None
                rec['industry'] = industries_info['schema_list'] or None

                # URL normalization or generation
                if rec.get('url'):
                    rec['url'] = normalize_url(rec['url'])
                else:
                    # Canonical URL fallback from title
                    slug = re.sub(r'[^a-z0-9]+', '-', rec['title'].lower()).strip('-') if rec.get('title') else hashlib.sha1(jid.encode()).hexdigest()[:10]
                    rec['url'] = normalize_url(f"https://walmart.uscareers.co.com/jobs/{slug}/")

                # Hiring Organization
                ho = rec.get('hiringOrganization')
                if isinstance(ho, dict):
                    rec['hiringOrganization'] = normalize_hiring_org(ho, logo_cdn)
                elif isinstance(ho, str) and ho.strip():
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":ho.strip()}, logo_cdn)
                else:
                    _, cname = clean_role_and_company(rec.get('title',''), None)
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":cname}, logo_cdn)

                # Base salary cleanup
                bs = rec.get('baseSalary', {})
                if isinstance(bs, dict):
                    bs.setdefault('@type','MonetaryAmount')
                    bs['currency'] = curr_code
                    v = bs.get('value', {})
                    if not isinstance(v, dict):
                        v = {}
                    v.setdefault('@type','QuantitativeValue')
                    if salary_details.get("is_negotiable"):
                        v.update({'description': "Negotiable", 'minValue': None, 'maxValue': None, 'unitText': None})
                    elif salary_details.get("primary_raw_min") is not None or salary_details.get("primary_raw_max") is not None:
                        v.update({
                            'minValue': str(salary_details.get("primary_raw_min")),
                            'maxValue': str(salary_details.get("primary_raw_max")),
                            'unitText': salary_details.get("primary_unit_normalized", "PROJECT").upper(),
                        })
                    bs['value'] = v
                    rec['baseSalary'] = bs

                # Remove null description field in baseSalary.value if present
                if rec.get('baseSalary', {}).get('value', {}).get('description') is None:
                    try:
                        del rec['baseSalary']['value']['description']
                    except KeyError:
                        pass

                # Schema baseline
                rec.update({'@context':'http://schema.org', '@type':'JobPosting'})

                # Clean empty fields
                for k in list(rec.keys()):
                    if rec[k] is None:
                        del rec[k]

                # Stats tracking
                stats["total"] += 1
                stats["title_len_sum"] += len(rec.get('title', ''))
                stats["desc_len_sum"] += len(BeautifulSoup(rec.get('description',''), 'html.parser').get_text())

                fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")

        # After run, log adaptive insight
        if stats["total"]:
            avg_t = stats["title_len_sum"] / stats["total"]
            avg_d = stats["desc_len_sum"] / stats["total"]
            logging.info(f"Adaptive tuning summary: avg title length = {avg_t:.1f}, avg description length = {avg_d:.1f} chars, n={stats['total']}")

    except FileNotFoundError:
        logging.error(f"Input file '{in_path}' not found.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
    logging.info(f"Processing complete. Output: {out_path}")


# ==========================
# Entrypoint
# ==========================

def main():
    global ARGS
    ARGS = parse_args()
    log_level = logging.DEBUG if ARGS.verbose else logging.INFO
    logging.basicConfig(level=log_level,
                        format="%(asctime)s [%(levelname)s] %(filename)s:%(lineno)d - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S")
    # Call the new main processing function
    rewrite_geotargeted_job_records(
        ARGS.input, ARGS.output, ARGS.seed, ARGS.logo_cdn, ARGS.currency,
        ARGS.enable_salary_adjustment, ARGS.full_time_hours, ARGS.part_time_hours,
        ARGS.no_change_in_title,
        ARGS.slightly_improve_title
    )

if __name__ == "__main__":
    main()

