#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Location-Centric JobPosting Optimizer
=====================================

This script is a new version dedicated to optimizing JobPosting NDJSON
with a primary focus on geo-targeting and location-based SEO.

What's new:
1) Geo-Focused Title Generation
   - A completely new set of title templates that prioritize {city}, {state},
     and location-based urgency.
   - Titles are built to attract local candidates (e.g., "City Job: Role",
     "Hiring in City: Role").

2) Geo-Targeted Description Summary
   - The prepended SEO summary is now location-first.
   - New templates introduce the role, company, and salary all within
     the context of the specific {city}.

3) Geo-Contextual Enrichment
   - The AI enrichment layer now adds sentences specifically linking the
     role, company, and industry to the local area.

NEW IN THIS VERSION:
- Added a `--slightly-improve-title` flag. This provides a "light-touch"
  mode that ONLY appends geo-information (city, state, urgency) to the
  existing title and leaves ALL other fields (description, salary, etc.)
  in the record unmodified.
  (This mode is now more dynamic, using a shuffled list of templates).

Usage
-----
Full Optimization:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson

Title-Tweak Only Mode:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --slightly-improve-title

No Title Change (but full description/etc. optimization):
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --no-change-in-title
"""

import argparse
import json
import random
import re
import logging
import locale
import hashlib
from tqdm import tqdm
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, timezone

# ==========================
# Locale initialization
# ==========================
try:
    locale.setlocale(locale.LC_ALL, "")
except locale.Error:
    try:
        locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
    except locale.Error:
        locale.setlocale(locale.LC_ALL, "C.UTF-8")
        logging.warning("Locale setting failed. Using 'C.UTF-8'. Number formatting might be basic.")

# ==========================
# Constants & Defaults
# ==========================
MAX_TITLE_LEN_BASE = 70
DESIRED_DEFAULT_FALLBACK_LOGO_URL = "https://walmart.uscareers.co.com/logo.png"
DEFAULT_CURRENCY = "USD"
CONTACT_CTA = ''  # Optional CTA footer

DEFAULT_FULL_TIME_HOURS_PER_WEEK = 40
DEFAULT_PART_TIME_HOURS_PER_WEEK = 23
WEEKS_PER_YEAR = 52
MONTHS_PER_YEAR = 12

SALARY_ADJUSTMENT_FACTORS = [1.0, 0.92, 0.91, 0.88, 0.85, 1.05, 1.08, 1.10, 1.12, 1.15]
WEIGHTED_SALARY_ADJUSTMENT_FACTORS = [1.0, 1.0, 1.0] + SALARY_ADJUSTMENT_FACTORS

SALARY_UNIT_MAP = {
    'HOUR': 'Hour', 'HOURLY': 'Hour', 'DAY': 'Day', 'DAILY': 'Day',
    'WEEK': 'Week', 'WEEKLY': 'Week', 'MONTH': 'Month', 'MONTHLY': 'Month',
    'YEAR': 'Year', 'ANNUAL': 'Year', 'ANNUALLY': 'Year', 'PROJECT': 'Project'
}
NORMALIZED_SALARY_UNITS = {
    "HOUR": "HOUR", "HOURLY": "HOUR", "DAY": "PROJECT", "DAILY": "PROJECT",
    "WEEK": "WEEK", "WEEKLY": "WEEK", "MONTH": "MONTH", "MONTHLY": "MONTH",
    "YEAR": "YEAR", "ANNUAL": "YEAR", "ANNUALLY": "YEAR", "PROJECT": "PROJECT"
}

EXPERIENCE_LEVEL_MONTHS = {
    "Entry-Level": (0, 12),
    "Mid-Level": (13, 60),
    "Senior-Level": (61, float('inf'))
}
EXPERIENCE_LEVEL_KEYWORDS = {
    "Entry-Level": ["entry level", "no experience", "graduate", "junior", "trainee", "intern"],
    "Mid-Level": ["mid level", "intermediate", "associate", "experienced"],
    "Senior-Level": ["senior", "lead", "principal", "expert", "staff level", "manager"]
}

RECENT_POST_DAYS = 2
CLOSING_SOON_DAYS = 7

# ==================================================
# NEW: Geo-Centric Templates
# ==================================================
GEO_HEADER_TEMPLATES = [
    "Local Job Overview: {city}", "Position in {city}, {state}",
    "Opportunity in {city}", "Role Based in {city}", "{city} Role Highlights",
    "Your Next Step in {city}"
]
SUB_HEADER_TEMPLATES = {
    "primary": [
        "Key Information", "Fast Facts", "Role Essentials", "At a Glance",
        "Primary Details", "Snapshot"
    ],
    "secondary": [
        "Compensation & Benefits", "What You Get", "Pay & Perks", "Rewards",
        "Salary and Benefits"
    ],
    "tertiary": [
        "Core Focus", "What You'll Do", "Responsibilities", "Day-to-Day",
        "Position Scope"
    ]
}

LOCAL_AREA_DESCRIPTORS = [
    "a vibrant local hub", "a key business district", "our growing {city} office",
    "a central {city} location", "our main {city} branch"
]

GEO_TONE_TEMPLATES = {
    'informational': {
        'intro': [
            "<strong>Role in {city}:</strong> {role_for_summary}",
            "<strong>Position:</strong> {role_for_summary} (Based in {city})",
            "<strong>Job Title:</strong> {role_for_summary}"
        ],
        'company': [
            "<strong>Employer:</strong> {company_for_summary} (Hiring in {city})",
            "<strong>Hiring Organization:</strong> {company_for_summary}",
            "<strong>Company:</strong> {company_for_summary}"
        ],
        'location': [
            "<strong>Location:</strong> {city}, {state}",
            "<strong>Based in:</strong> {city}, {state} ({local_area_info})",
            "<strong>Worksite:</strong> {city}, {state}"
        ],
        'salary': [
            "<strong>Compensation:</strong> {salary_primary_display}{converted_salary_suffix}",
            "<strong>Pay:</strong> {salary_primary_display}{converted_salary_suffix}",
            "<strong>Salary:</strong> {salary_primary_display}{converted_salary_suffix}"
        ],
        'experience_entry': [
            "<strong>Experience:</strong> Entry-Level (Training in {city}).",
            "<strong>Required Experience:</strong> Entry-Level.",
            "<strong>Career Level:</strong> Junior / Trainee."
        ],
        'experience_mid': [
            "<strong>Experience:</strong> Mid-Level.",
            "<strong>Required Experience:</strong> Associate / Experienced.",
            "<strong>Career Level:</strong> Intermediate."
        ],
        'experience_senior': [
            "<strong>Experience:</strong> Senior-Level.",
            "<strong>Required Experience:</strong> Proven expertise.",
            "<strong>Career Level:</strong> Senior / Lead."
        ],
        'urgency_new': [
            "<strong>Status:</strong> New {city} listing (actively interviewing).",
            "<strong>Posted:</strong> Recently.",
            "<strong>Urgency:</strong> Immediate consideration in {city}."
        ],
        'urgency_closing': [
            "<strong>Status:</strong> Closing soon.",
            "<strong>Urgency:</strong> Final applications accepted.",
            "<strong>Deadline:</strong> Approaching."
        ],
        'focus': [
            "Apply your <strong>{primary_skill}</strong> skills at our <strong>{city}</strong> location.",
            "This role centers on <strong>{primary_skill}</strong> within the <strong>{industry_display}</strong> space in {city}.",
            "Core objectives involve <strong>{primary_skill}</strong> in <strong>{industry_display}</strong>."
        ],
        'benefits': [
            "A comprehensive benefits package is included.",
            "Eligible team members receive standard benefits.",
            "This role offers a competitive benefits package."
        ],
        'value_props': [
            "Clear growth pathways at our {city} office.",
            "Impactful work with supportive local leadership.",
            "Stable workload with modern tooling and processes."
        ]
    }
}

HIRING_SYNS = [
    "Hiring", "Immediate Start", "Now Hiring", "Apply Today", "Urgent Hire",
    "Join Team", "Recruiting", "Apply Now"
]
ENTRY_SYNS = ["Entry Level", "No Experience", "Junior Role", "Graduate Role", "Training Provided"]
URGENCY_TAGS_NEW_TITLE = ["New", "Just Posted", "Recent Job"]
URGENCY_TAGS_CLOSING_TITLE = ["Apply Soon", "Hiring Now", "Urgent"]

# NEW: Dynamic templates for --slightly-improve-title
SLIGHT_TITLE_IMPROVEMENT_TEMPLATES = [
    "{title} - {city}, {state}",
    "{title} ({city})",
    "{title} | {city}, {state}",
    "{title} ({urgency_tag})",
    "{title} - {urgency_tag}",
    "{urgency_tag}: {title} ({city})",
    "{title} in {city}",
    "{title} | {urgency_tag}",
    "{title} ({state} Opening)",
    "{city} Opening: {title}",
    "{urgency_tag}: {title}"
]

# NEW: Geo-Focused Title Templates
GEO_FOCUSED_TITLE_TEMPLATES = [
    "{city} Job: {role} ({job_type})",
    "{city}, {state}: {role} at {company}",
    "{city}-based {role} Opening",
    "Work in {city}: {role}",
    "{city} {experience_tag} {role}",
    "{state} Career: {role} in {city}",
    "{city} {role} with {salary_compact} Pay",
    "{urgency_tag}: {role} in {city}",
    "{hiring} in {city}: {role}",
    "Immediate Opening in {city} for a {role}",
    "{city} {role} ({salary_compact})",
    "{company} Needs {role} in {city}",
    "Join {company} in {city} as a {role}",
    "{company} ({city}) Hiring: {role}",
    "{role} - {city}, {state}",
    "{role} Position in {city}",
    "{role} ({industry}) Vacancy in {city}",
    "{role} ({salary_compact}) - {city}, {state}",
    "{role} Needed - {city} Area",
    "{hiring}: {role} in {city}, {state}",
    "{role} | {city}, {state} | {company}",
    "{city} {role} ({job_type}) - {company}",
    "{experience_tag} {role} - {city}",
    "{company} is hiring a {role} in {city}",
    "{salary_compact} {role} job in {city}",
    "{urgency_tag} Opening: {role} ({city})",
    "{city} {job_type} {role}",
    "Apply Now: {role} in {city}",
    "{city} Career: {role} at {company}",
    "{role} • {city}, {state} • {company}",
    "{city}: {role} ({job_type}, {salary_compact})",
    "{urgency_tag} {role} @ {company} in {city}",
    "{company} seeks {role} in {city}"
]

JOB_SEO_KEYWORDS = [
    "hiring", "apply now", "urgent", "career", "benefits", "salary", "immediate"
]

SECTION_KEYWORDS_MAP = {
    "responsibilities": ["key responsibilities", "responsibilities", "your role", "what you'll do", "duties", "main duties", "primary accountabilities"],
    "requirements": ["requirements", "qualifications", "essential skills", "your qualifications", "must-have qualifications", "to succeed you'll need", "your profile"],
    "skills": ["skills", "technical skills", "soft skills", "key competencies"],
    "benefits": ["benefits", "perks", "what we offer", "why join us"],
    "experience": ["experience", "professional background", "experience level"],
    "incentives": ["incentives", "incentive compensation", "bonus", "commission"],
    "workhours": ["work hours", "hours of work", "schedule"]
}

# ==========================
# Argparse
# ==========================

def parse_args():
    p = argparse.ArgumentParser(description="Optimize Local JobPosting NDJSON with a strong location-centric focus.")
    p.add_argument('-i','--input', default='all-schemas.ndjson', help='Input NDJSON file')
    p.add_argument('-o','--output', default='schema.ndjson', help='Output NDJSON file')
    p.add_argument('--seed', type=int, default=None, help='Seed for reproducible randomness')
    p.add_argument('--logo_cdn', default=DESIRED_DEFAULT_FALLBACK_LOGO_URL, help='Default fallback logo URL')
    p.add_argument('--currency', default=DEFAULT_CURRENCY, help='Default currency')
    p.add_argument('-v','--verbose', action='store_true', help='Enable debug logging')
    p.add_argument('--full_time_hours', type=int, default=DEFAULT_FULL_TIME_HOURS_PER_WEEK, help="Standard hours for full-time salary conversions.")
    p.add_argument('--part_time_hours', type=int, default=DEFAULT_PART_TIME_HOURS_PER_WEEK, help="Standard hours for part-time salary conversions.")
    p.add_argument('--enable_salary_adjustment', action='store_true', help='Enable dynamic salary value adjustments (random increase/decrease/no change).')
    
    # --- NEW: Mutually exclusive group for title operations ---
    title_group = p.add_mutually_exclusive_group()
    title_group.add_argument('--no-change-in-title', action='store_true', help='Do not improve the job title; keep the original (still optimizes description, etc.).')
    title_group.add_argument('--slightly-improve-title', action='store_true', help='Apply only minor, geo-focused improvements to the original title and leave all other fields unchanged.')
    # --- END NEW ---
    
    return p.parse_args()

# ==========================
# Small utilities
# (No changes in this section)
# ==========================

def title_case(s: str) -> str:
    if not s:
        return ""
    words = [w.capitalize() if not (w.isupper() and len(w) > 1) else w for w in s.split()]
    return " ".join(words)

def to_dhaka_offset(dt: str) -> str:
    if not dt:
        return ""
    try:
        parsed_dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
        dhaka_tz = timezone(timedelta(hours=6))
        return parsed_dt.astimezone(dhaka_tz).isoformat()
    except ValueError:
        logging.warning(f"Invalid date format for Dhaka offset: {dt}. Returning original.")
        return dt

def to_midnight(dt_str: str) -> str:
    if not dt_str:
        return ""
    try:
        dt_obj = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        target_tz = dt_obj.tzinfo or timezone(timedelta(hours=6))
        dt_obj_midnight = dt_obj.astimezone(target_tz).replace(hour=23, minute=59, second=59, microsecond=0)
        return dt_obj_midnight.isoformat()
    except (IndexError, ValueError) as e:
        logging.warning(f"Date parse error for to_midnight: {dt_str}. Error: {e}. Returning original.")
        return dt_str

def normalize_url(url: str) -> str:
    if not url:
        return ""
    url = url.split('?',1)[0].split('#',1)[0]
    if url.startswith('//'):
        url = 'https://' + url[2:]
    elif url.startswith('http://'):
        url = 'https://' + url[7:]
    elif not url.startswith('https://'):
        url = 'https://' + url.lstrip('/')
    if not re.search(r'\.\w{2,5}(?:/)?$', url.split('/')[-1]) and not url.endswith('/'):
        url += '/'
    return url

def get_currency_symbol(currency_code: str) -> str:
    symbols = {"USD":"$", "EUR":"€", "GBP":"£", "JPY":"¥", "CAD":"CA$", "AUD":"A$", "INR":"₹", "BDT":"৳"}
    return symbols.get(str(currency_code).upper(), (str(currency_code) + " ") if currency_code else "$")

# ==========================
# Intelligence helpers
# (No changes in this section, logic is sound)
# ==========================

def get_primary_skill(skills_value) -> str:
    if not skills_value:
        return ""
    processed = ""
    if isinstance(skills_value, str):
        processed = skills_value
    elif isinstance(skills_value, list):
        string_skills = []
        for item in skills_value:
            if isinstance(item, str) and item.strip():
                string_skills.append(item.strip())
            elif item is not None:
                try:
                    s_item = str(item).strip()
                    string_skills.append(s_item)
                except Exception:
                    pass
        processed = ", ".join(string_skills)
    else:
        try:
            processed = str(skills_value).strip()
        except Exception:
            return ""
    if not processed:
        return ""
    try:
        return next((s.strip() for s in re.split(r'[,;/]', processed) if s.strip()), "")
    except TypeError:
        return ""


def clean_text_to_list(text_content: str) -> list[str]:
    if not text_content:
        return []
    soup = BeautifulSoup(text_content, 'html.parser')
    return [li.get_text(separator=' ', strip=True) for li in soup.find_all('li') if li.get_text(strip=True)] or \
           [p.get_text(separator=' ', strip=True) for p in soup.find_all('p') if p.get_text(strip=True)] or \
           ([soup.get_text(strip=True)] if soup.get_text(strip=True) else [])


def get_location_details(rec: dict) -> tuple[str, str, str]:
    job_loc_data = rec.get('jobLocation')
    city, state, country = "", "", "US"
    if isinstance(job_loc_data, dict):
        address_data = job_loc_data.get('address')
        if isinstance(address_data, dict):
            city = address_data.get('addressLocality', '')
            state = address_data.get('addressRegion', '')
            country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    elif isinstance(job_loc_data, list) and job_loc_data:
        first_loc = job_loc_data[0]
        if isinstance(first_loc, dict):
            address_data = first_loc.get('address')
            if isinstance(address_data, dict):
                city = address_data.get('addressLocality', '')
                state = address_data.get('addressRegion', '')
                country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    if not city and state:
        city = state
    elif not city and not state:
        logging.debug(f"Job ID {rec.get('@id', 'Unknown')}: Missing addressLocality and addressRegion.")
    return str(city), str(state), str(country)


def get_employment_types_info(rec: dict) -> dict:
    et_input = rec.get('employmentType')
    normalized_types = []
    if isinstance(et_input, list):
        for item in et_input:
            if isinstance(item, str) and item.strip():
                norm_item = item.replace('_', '-').strip().lower()
                normalized_types.append(title_case(norm_item.replace(" time", "-Time")))
    elif isinstance(et_input, str) and et_input.strip():
        norm_item = et_input.replace('_', '-').strip().lower()
        normalized_types.append(title_case(norm_item.replace(" time", "-Time")))

    unique_types = sorted(list(set(normalized_types)))
    chosen_for_title, chosen_for_description = "Flexible", "Flexible"

    if not unique_types:
        schema_list = ["OTHER"]
    else:
        is_full_time = "Full-Time" in unique_types
        is_part_time = "Part-Time" in unique_types
        if is_full_time and is_part_time:
            chosen_for_title, chosen_for_description = "Full/Part-Time", "Full-Time"
        elif len(unique_types) == 1:
            chosen_for_title, chosen_for_description = unique_types[0], unique_types[0]
        else:
            chosen = "Full-Time" if is_full_time else ("Part-Time" if is_part_time else random.choice(unique_types))
            chosen_for_title, chosen_for_description = chosen, chosen
        schema_list = [t.upper().replace('-', '_') for t in unique_types]

    return {
        'all_available_display': unique_types or ["Flexible"],
        'title_display': chosen_for_title,
        'chosen_for_description': chosen_for_description,
        'schema_list': schema_list
    }


def get_industries_info(rec: dict) -> dict:
    industry_input = rec.get('industry')
    processed_industries = []
    if isinstance(industry_input, list):
        for item in industry_input:
            if isinstance(item, str) and item.strip():
                processed_industries.append(title_case(item.replace('&', 'and').strip()))
    elif isinstance(industry_input, str) and industry_input.strip():
        processed_industries.append(title_case(industry_input.replace('&', 'and').strip()))
    unique_industries = sorted(list(set(processed_industries)))
    if not unique_industries:
        return {'display_list': [], 'title_display': "", 'schema_list': []}
    return {
        'display_list': unique_industries,
        'title_display': random.choice(unique_industries),
        'schema_list': unique_industries
    }


def get_experience_level_info(rec: dict) -> dict:
    exp_req = rec.get("experienceRequirements", {})
    months_exp = None
    level_tag = ""
    description_tag = ""
    if isinstance(exp_req, dict):
        months_str = exp_req.get("monthsOfExperience")
        desc_str = exp_req.get("description", "").lower()
        if months_str is not None:
            try:
                months_exp = int(months_str)
            except (ValueError, TypeError):
                pass
        if months_exp is not None:
            for level, (min_m, max_m) in EXPERIENCE_LEVEL_MONTHS.items():
                if min_m <= months_exp <= max_m:
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        else:
            for level, keywords in EXPERIENCE_LEVEL_KEYWORDS.items():
                if any(kw in desc_str for kw in keywords):
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        if not level_tag and any(syn.lower() in desc_str for syn in ENTRY_SYNS):
            description_tag, level_tag = "Entry-Level", "Entry"
    return {"title_tag": level_tag, "description_tag": description_tag, "months": months_exp}


def get_job_urgency_tags(date_posted_str: str, valid_through_str: str, rec_id: str) -> dict:
    urgency = {"title_tag": "", "description_key": None}
    now_utc = datetime.now(timezone.utc)
    if date_posted_str:
        try:
            posted_dt = datetime.fromisoformat(date_posted_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if (now_utc - posted_dt).days <= RECENT_POST_DAYS:
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_NEW_TITLE), "description_key": "new"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing datePosted '{date_posted_str}': {e}")
    if valid_through_str:
        try:
            valid_dt = datetime.fromisoformat(valid_through_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if timedelta(days=0) <= (valid_dt - now_utc) <= timedelta(days=CLOSING_SOON_DAYS):
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_CLOSING_TITLE), "description_key": "closing"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing validThrough '{valid_through_str}': {e}")
    return urgency


def to_k_notation(num_val: float, currency_symbol: str) -> str:
    if abs(num_val) >= 1000:
        k_val = num_val / 1000.0
        return f"{currency_symbol}{k_val:.1f}k".replace(".0k", "k")
    return f"{currency_symbol}{int(num_val)}"


def format_salary_details(rec: dict, currency_symbol: str = "$", enable_dynamic_adjustment: bool = False,
                          chosen_emp_type: str = "Full-Time", full_time_hours: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                          part_time_hours: int = DEFAULT_PART_TIME_HOURS_PER_WEEK) -> dict:
    base = rec.get('baseSalary', {})
    val_obj = base.get('value', {})
    if not isinstance(base, dict):
        base = {}
    if not isinstance(val_obj, dict):
        val_obj = {}

    minv_raw = val_obj.get('minValue', base.get('minValue'))
    maxv_raw = val_obj.get('maxValue', base.get('maxValue'))
    unit_raw = str(val_obj.get('unitText', base.get('unitText', ''))).upper()
    primary_unit_normalized = NORMALIZED_SALARY_UNITS.get(unit_raw, "PROJECT")

    def parse_salary_value(s_val):
        if s_val is None:
            return None
        if isinstance(s_val, (int, float)):
            return float(s_val)
        if isinstance(s_val, str):
            s_val_cleaned = str(s_val).replace(currency_symbol, '').replace(',', '').strip()
            if "negotiable" in s_val_cleaned.lower() or not s_val_cleaned:
                return "Negotiable"
            try:
                return float(s_val_cleaned)
            except ValueError:
                return None
        return None

    min_val_num = parse_salary_value(minv_raw)
    max_val_num = parse_salary_value(maxv_raw)

    if min_val_num == "Negotiable" or max_val_num == "Negotiable":
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": 1.0}
    if min_val_num is None and max_val_num is None:
        return {"primary_display": "", "is_negotiable": False, "conversions": {}, "adjusted_factor": 1.0}

    adjustment_factor = 1.0
    if enable_dynamic_adjustment:
        adjustment_factor = random.choice(WEIGHTED_SALARY_ADJUSTMENT_FACTORS)
        if isinstance(min_val_num, (int, float)):
            min_val_num *= adjustment_factor
        if isinstance(max_val_num, (int, float)):
            max_val_num *= adjustment_factor
        if isinstance(min_val_num, (int, float)) and isinstance(max_val_num, (int, float)) and min_val_num > max_val_num:
            min_val_num, max_val_num = max_val_num, min_val_num

    primary_value_for_conversion, primary_display_val = None, "Error"
    if min_val_num is not None and max_val_num is not None:
        primary_value_for_conversion = (min_val_num + max_val_num) / 2.0
        primary_display_val = f"{to_k_notation(min_val_num, currency_symbol)}-{to_k_notation(max_val_num, currency_symbol)}"
    elif max_val_num is not None:
        primary_value_for_conversion, primary_display_val = max_val_num, f"Up to {to_k_notation(max_val_num, currency_symbol)}"
    elif min_val_num is not None:
        primary_value_for_conversion, primary_display_val = min_val_num, to_k_notation(min_val_num, currency_symbol)
    else:
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": adjustment_factor}

    primary_unit_display = SALARY_UNIT_MAP.get(primary_unit_normalized, "")
    primary_salary_str = f"{primary_display_val}{'/' + primary_unit_display if primary_unit_display and primary_unit_display != 'Project' else ''}"

    conversions, converted_values_num = {}, {}
    hours_per_week = part_time_hours if "part-time" in chosen_emp_type.lower() else full_time_hours

    if primary_value_for_conversion is not None and primary_unit_normalized != "PROJECT":
        annual_equiv = None
        if primary_unit_normalized == "HOUR":
            annual_equiv = primary_value_for_conversion * hours_per_week * WEEKS_PER_YEAR
        elif primary_unit_normalized == "WEEK":
            annual_equiv = primary_value_for_conversion * WEEKS_PER_YEAR
        elif primary_unit_normalized == "MONTH":
            annual_equiv = primary_value_for_conversion * MONTHS_PER_YEAR
        elif primary_unit_normalized == "YEAR":
            annual_equiv = primary_value_for_conversion

        if annual_equiv is not None:
            converted_values_num.update({
                "YEAR": annual_equiv,
                "MONTH": annual_equiv / MONTHS_PER_YEAR,
                "WEEK": annual_equiv / WEEKS_PER_YEAR
            })
            if hours_per_week > 0:
                converted_values_num["HOUR"] = (annual_equiv / WEEKS_PER_YEAR) / hours_per_week
            for unit, val in converted_values_num.items():
                if unit != primary_unit_normalized:
                    conversions[unit] = f"{to_k_notation(val, currency_symbol)}/{SALARY_UNIT_MAP.get(unit, '')}"

    return {
        "primary_display": primary_salary_str,
        "primary_raw_min": min_val_num,
        "primary_raw_max": max_val_num,
        "primary_unit_normalized": primary_unit_normalized,
        "is_negotiable": False,
        "conversions": conversions,
        "converted_raw": converted_values_num,
        "adjusted_factor": adjustment_factor
    }

# ==========================
# Content assembly & enrichment
# (Refactored for Geo-Focus)
# ==========================

def clean_role_and_company(original_title: str, org_name_from_ho: str) -> tuple[str, str]:
    org_name = str(org_name_from_ho or "").strip()
    role = re.sub(r'\s*\(.*?[mfvdix].*?\)\s*', '', str(original_title), flags=re.IGNORECASE).strip()
    role = re.sub(r"\s+jobs?\b", "", role, flags=re.IGNORECASE).strip()
    company, final_role = org_name, role

    if not company:
        preps = ["at", "for", "with"]
        for prep in preps:
            match = re.search(rf"^(.*?)\s+{re.escape(prep)}\s+([\w\s.,'&()-]+)$", role, flags=re.IGNORECASE)
            if match and 2 <= len(match.group(2).split()) <= 5:
                company, final_role = match.group(2).strip(), match.group(1).strip()
                break

    if company:
        final_role = re.sub(rf"\s*\b{re.escape(company)}\b", "", final_role, flags=re.IGNORECASE).strip(" -|,")

    if not company:
        company = "A Leading Local Company"
    if not final_role:
        final_role = "Associate"
    return final_role.strip(), company.strip()


def geo_context_enrichment(html_block: str, role: str, company: str, primary_skill: str, city: str, state: str, industry_display: str) -> str:
    """NEW: Minimal enrichment layer to add location-specific context.
    """
    soup = BeautifulSoup(html_block or "", 'html.parser')
    text = soup.decode_contents() if html_block else ""

    # Controlled synonym map
    synonyms = {
        r"\bcompany\b": "organization",
        r"\bemployees\b": "team members",
        r"\bsalary\b": "compensation",
        r"\bjob\b": "role",
        r"\bexperience\b": "background",
        r"\bbenefits\b": "perks"
    }
    for pat, repl in synonyms.items():
        text = re.sub(pat, repl, text, flags=re.IGNORECASE)

    # NEW: Geo-Context stitches
    stitches = [
        f"<li>This <strong>{city}</strong>-based role is an excellent opportunity for professionals skilled in <strong>{primary_skill or 'relevant skills'}</strong>.</li>",
        f"<li>Our <strong>{company}</strong> team in {city}, {state} is growing.</li>",
        f"<li>Benefit from working in <strong>{city}</strong>, a key hub for the <strong>{industry_display}</strong> industry.</li>"
    ]

    # Insert stitches under first UL; else create a fresh list
    ul = soup.find('ul')
    if ul:
        for item in stitches:
            ul.append(BeautifulSoup(item, 'html.parser'))
    else:
        ul_new = BeautifulSoup("<ul></ul>", 'html.parser')
        for item in stitches:
            ul_new.ul.append(BeautifulSoup(item, 'html.parser'))
        soup.append(ul_new)

    return soup.decode_contents()


def create_geo_targeted_summary(rec: dict, primary_skill: str, salary_details: dict, job_urgency: dict, exp_level_info: dict, industries_info: dict) -> str:
    """NEW: Builds an SEO summary with a strong location-first emphasis.
    """
    role_for_summary, company_for_summary = clean_role_and_company(rec.get('title',''), rec.get('hiringOrganization',{}).get('name',''))
    city, state, _ = get_location_details(rec)

    active = GEO_TONE_TEMPLATES['informational']
    salary_primary_display = salary_details.get("primary_display", "Negotiable") if salary_details else "Negotiable"
    converted_salary_suffix = ""
    if salary_details and salary_details.get("conversions"):
        best_conv = salary_details["conversions"].get("YEAR") or random.choice(list(salary_details["conversions"].values()))
        converted_salary_suffix = f" (approx. {best_conv})"

    data = {
        "role_for_summary": title_case(role_for_summary),
        "company_for_summary": company_for_summary,
        "city": city,
        "state": state,
        "local_area_info": random.choice(LOCAL_AREA_DESCRIPTORS).format(city=city),
        "salary_primary_display": salary_primary_display,
        "converted_salary_suffix": converted_salary_suffix,
        "primary_skill": primary_skill or "your professional skills",
        "industry_display": (industries_info['display_list'][0] if industries_info and industries_info.get('display_list') else "a dynamic")
    }

    def phrase(key):
        return random.choice(active[key]).format(**data)

    job_details = [phrase('intro'), phrase('company'), phrase('location')]

    urgency_key = job_urgency.get("description_key")
    if urgency_key:
        job_details.append(phrase(f'urgency_{urgency_key}'))

    exp_level = exp_level_info.get("description_tag")
    if exp_level == "Entry-Level":
        job_details.append(phrase('experience_entry'))
    elif exp_level == "Mid-Level":
        job_details.append(phrase('experience_mid'))
    elif exp_level == "Senior-Level":
        job_details.append(phrase('experience_senior'))

    comp_benefits = [phrase('salary'), f"<strong>Benefits:</strong> {random.choice(active['benefits'])}"]
    value_props = [f"<em>{random.choice(active['value_props']).format(city=city)}</em>"]
    role_focus = [random.choice(active['focus']).format(**data)]

    summary_html = f"<h3>{random.choice(GEO_HEADER_TEMPLATES).format(city=city, state=state)}</h3>"
    structure_choice = random.randint(1, 3)
    if structure_choice == 1:
        random.shuffle(job_details)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in job_details) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['secondary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in (comp_benefits + value_props)) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in role_focus) +
            "</ul>"
        )
    elif structure_choice == 2:
        all_parts = job_details + comp_benefits + value_props + role_focus
        random.shuffle(all_parts)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in all_parts) +
            "</ul>"
        )
    else:
        summary_html += f"<p>{' '.join(job_details)}</p>"
        summary_html += f"<p>{' '.join(comp_benefits + value_props)}</p>"
        summary_html += f"<h5>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h5><p>{role_focus[0]}</p>"

    # Enrich with stitched context and synonyms
    summary_html = geo_context_enrichment(
        summary_html,
        role=title_case(role_for_summary),
        company=company_for_summary,
        primary_skill=primary_skill,
        city=city,
        state=state,
        industry_display=data["industry_display"]
    )

    return BeautifulSoup(summary_html, 'html.parser').decode_contents().strip()


# ==========================
# Title building & SEO post-processing
# (Refactored for Geo-Focus)
# ==========================

def enforce_length(title: str, max_len: int) -> str:
    if len(title) <= max_len:
        return title.strip()
    shortened = re.sub(r'\s*\([^)]*\)\s*$', '', title).strip()
    if len(shortened) <= max_len:
        return shortened
    while len(title) > max_len:
        parts = title.rsplit(' ', 1)
        if len(parts) > 1:
            title = parts[0]
        else:
            return title[:max_len-3].strip() + "..."
    return title.strip(" -|,( ")


def enrich_title_for_seo(title: str) -> str:
    """Append one missing, high-intent keyword if space allows; avoid stuffing."""
    t = title
    for kw in JOB_SEO_KEYWORDS:
        if kw.lower() not in t.lower() and len(t) <= 60:  # keep margin
            t = f"{t} | {kw.title()}"
            break
    return t


def generate_location_focused_title(rec: dict, primary_skill: str, salary_details: dict, job_urgency:dict,
                                    exp_level_info:dict, emp_types_info:dict, industries_info:dict,
                                    dynamic_max_len:int) -> str:
    """NEW: Generates SEO title using the GEO_FOCUSED_TITLE_TEMPLATES list.
    """
    ho_name = rec.get('hiringOrganization', {}).get('name', '')
    cleaned_role, company_name = clean_role_and_company(rec.get('title', rec.get('name','')), ho_name)
    city, state, _ = get_location_details(rec)

    salary_fmt = ""
    if salary_details and salary_details.get("primary_display") and not salary_details.get("is_negotiable"):
        salary_fmt = salary_details["primary_display"]
        prim_unit = salary_details["primary_unit_normalized"]
        conv = salary_details.get("conversions", {})
        if prim_unit == "HOUR" and "YEAR" in conv:
            salary_fmt = f"{salary_details['primary_display'].split('/')[0]}/hr"

    parts = {
        "role": title_case(cleaned_role),
        "company": title_case(company_name),
        "city": title_case(city),
        "state": state.upper(),
        "job_type": emp_types_info.get('title_display', ""),
        "hiring": random.choice(HIRING_SYNS),
        "urgency_tag": job_urgency.get('title_tag', ''),
        "experience_tag": exp_level_info.get('title_tag', ''),
        "salary_compact": salary_fmt if "Negotiable" not in salary_fmt else "",
        "skill1": title_case(primary_skill),
        "industry": industries_info.get('title_display', "")
    }

    # CRITICAL CHANGE: Use the new geo-focused template list
    tmpl = random.choice(GEO_FOCUSED_TITLE_TEMPLATES)
    
    # Filter out empty parts to avoid "Role in ()"
    parts_filled = {k: v for k, v in parts.items() if v}
    
    # Simple check to find a template that fits the available data
    # This is a basic way to avoid templates with many missing keys
    attempts = 0
    while any(f"{{{k}}}" in tmpl for k in parts.keys() if k not in parts_filled) and attempts < 10:
        tmpl = random.choice(GEO_FOCUSED_TITLE_TEMPLATES)
        attempts += 1
        if attempts > 5: # Be less strict if we cant find a perfect match
             if "{" + random.choice(list(parts.keys())) + "}" in tmpl:
                 break # Just pick one

    title = tmpl.format(**parts)
    title = re.sub(r'\s{2,}', ' ', title).strip()
    title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', title)
    title = title.replace('()', '').replace('[]', '').strip(" -|,: •—")
    title = re.sub(r'\s*-\s*-\s*', '-', title) # Clean up double dashes
    title = re.sub(r'\s*•\s*•\s*', '•', title) # Clean up double bullets
    title = title.strip(" -|,: •—")

    title = enrich_title_for_seo(title)
    return enforce_length(title, dynamic_max_len)


# --- UPDATED FUNCTION for --slightly-improve-title ---
def generate_slight_title_improvement(original_title: str, city: str, state: str, urgency_tag: str, max_len: int) -> str:
    """
    Applies a minor, additive improvement to the original job title,
    as requested for the --slightly-improve-title mode.
    NOW: Uses a dynamic, shuffled template list for variety.
    """
    title = original_title.strip()
    
    # Clean up common additions that might already be there to avoid duplicates
    title = re.sub(r'\s*-\s*[\w\s]+,\s*\w+$', '', title, flags=re.IGNORECASE) # Remove " - City, ST"
    title = re.sub(r'\s*\([^)]*\)$', '', title).strip() # Remove trailing "(...)"
    title = title.strip(" -|,: •—")
    
    cleaned_title = title # Store the cleaned version as fallback

    parts = {
        "title": cleaned_title,
        "city": title_case(city),
        "state": state.upper(),
        "urgency_tag": urgency_tag,
    }

    # Filter out empty parts to avoid "Role in ()"
    parts_filled = {k: v for k, v in parts.items() if v}
    
    # Find all templates that can be *fully* populated with the available data
    valid_templates = []
    for tmpl in SLIGHT_TITLE_IMPROVEMENT_TEMPLATES:
        # Find all {placeholders} in the template
        placeholders = re.findall(r'\{([^{}]+)\}', tmpl)
        # Check if all placeholders (e.g., 'city', 'urgency_tag') are in our 'parts_filled'
        if all(p in parts_filled for p in placeholders if p != 'title'): # 'title' is always present
            valid_templates.append(tmpl)

    if not valid_templates:
        return enforce_length(cleaned_title, max_len) # Return original (cleaned)

    # --- THIS IS THE DYNAMIC PART ---
    random.shuffle(valid_templates)

    # Try to add the best one that fits
    for tmpl in valid_templates:
        new_title = tmpl.format(**parts)
        
        # Clean up potential formatting issues
        new_title = re.sub(r'\s{2,}', ' ', new_title).strip()
        new_title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', new_title)
        new_title = new_title.replace('()', '').replace('[]', '').strip(" -|,: •—")
        
        if len(new_title) <= max_len:
            return enforce_length(new_title, max_len) # Found a good one

    # If all were too long, return the cleaned original title
    return enforce_length(cleaned_title, max_len)
# --- END UPDATED FUNCTION ---


# ==========================
# Description assembler
# (Refactored for Geo-Focus)
# ==========================

def assemble_location_focused_description(html_str: str, rec: dict, primary_skill: str = "", salary_details:dict = None,
                                          job_urgency:dict=None, exp_level_info:dict=None, industries_info:dict=None) -> str:
    """NEW: Assembles description using the geo-targeted summary function.
    """
    original_html_content = html_str or ""
    original_was_empty = not original_html_content.strip() or original_html_content.strip().lower() in ["<p>no description provided.</p>"]

    # Use the new geo-targeted summary function
    seo_summary_html = create_geo_targeted_summary(rec, primary_skill, salary_details, job_urgency, exp_level_info, industries_info)

    final_description = seo_summary_html if original_was_empty else seo_summary_html + "<hr><br>" + original_html_content

    if CONTACT_CTA:
        final_description += f"<p>{CONTACT_CTA}</p>"
    return final_description


# ==========================
# Hiring org normalization
# (No changes in this section)
# ==========================

def normalize_hiring_org(org: dict, logo_cdn: str) -> dict:
    if not isinstance(org, dict):
        return {}
    sa = org.get('sameAs', '')
    if sa and isinstance(sa, str) and not sa.startswith(('http://', 'https://')):
        org['sameAs'] = 'https://' + sa.lstrip('/')
    if not (isinstance(org.get('logo'), str) and org['logo'].startswith(('http://', 'https://'))):
        org['logo'] = logo_cdn
    return org


# ==========================
# Core pipeline
# (Refactored for Geo-Focus)
# ==========================
ARGS = None

def rewrite_geotargeted_job_records(in_path: str, out_path: str, seed: int=None, logo_cdn: str = DESIRED_DEFAULT_FALLBACK_LOGO_URL,
                                    default_currency_arg: str = DEFAULT_CURRENCY, enable_salary_adj_arg: bool = False,
                                    full_time_hours_arg: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                                    part_time_hours_arg: int = DEFAULT_PART_TIME_HOURS_PER_WEEK,
                                    no_change_in_title_arg: bool = False,
                                    slightly_improve_title_arg: bool = False): # <-- NEW ARG
    
    logging.info(f"Optimizing Geo-Targeted Jobs: {in_path} -> {out_path} with seed {seed}")
    if no_change_in_title_arg:
        logging.info("Running with --no-change-in-title. Original titles will be preserved.")
    # --- NEW LOGGING ---
    if slightly_improve_title_arg:
        logging.info("Running with --slightly-improve-title. ONLY titles will be modified; all other fields preserved.")
    # --- END NEW ---

    # Adaptive stats (Contextual Behavior Simulation)
    stats = {"total": 0, "title_len_sum": 0, "desc_len_sum": 0}

    try:
        with open(in_path, 'r', encoding='utf-8') as fin_check:
            num_lines = sum(1 for line in fin_check if line.strip())
        if num_lines == 0:
            logging.warning(f"Input file '{in_path}' is empty.")
            open(out_path, 'w').close()
            return

        with open(in_path, 'r', encoding='utf-8') as fin, open(out_path, 'w', encoding='utf-8') as fout:
            for line in tqdm(fin, total=num_lines, desc="Processing Geo-Targeted Jobs"):
                if not line.strip():
                    continue
                try:
                    rec = json.loads(line)
                except json.JSONDecodeError as e:
                    logging.warning(f"Bad JSON: {e}. Line: {line[:70]}...")
                    continue
                if not isinstance(rec, dict):
                    logging.warning("Line is not a JSON object, skipping.")
                    continue

                jid = rec.get('@id') or rec.get('url') or hashlib.sha256(line.encode()).hexdigest()
                random.seed(hash(str(jid) + str(seed)))

                # --- NEW LOGIC BRANCH for --slightly-improve-title ---
                if slightly_improve_title_arg:
                    original_title = rec.get('title', rec.get('name', ''))
                    if not original_title:
                        fout.write(line) # Write original line if no title
                        continue

                    # Get JUST enough info for the slight improvement
                    city, state, _ = get_location_details(rec)
                    job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                    
                    # Calculate dynamic max length (copied from existing logic)
                    dynamic_max_len = MAX_TITLE_LEN_BASE
                    if stats["total"] >= max(10, num_lines // 2):
                        avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                        if avg_title_len < 52: dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                        elif avg_title_len > 72: dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                    # Generate the new title using the new "slight improvement" function
                    new_title = generate_slight_title_improvement(
                        original_title, city, state, 
                        job_urgency.get('title_tag', ''), 
                        dynamic_max_len
                    )
                    
                    rec['title'] = new_title # Update the title
                    
                    # Stats tracking (minimal)
                    stats["total"] += 1
                    stats["title_len_sum"] += len(new_title)
                    # DO NOT track description length or other changes

                    fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")
                    continue # IMPORTANT: Skip all other processing
                # --- END OF NEW LOGIC BRANCH ---


                # --- FULL PROCESSING LOGIC (only runs if --slightly-improve-title is OFF) ---

                # Extract & Normalize
                primary_skill = get_primary_skill(rec.get('skills',''))
                emp_types_info = get_employment_types_info(rec)
                industries_info = get_industries_info(rec)
                exp_level_info = get_experience_level_info(rec)

                if rec.get('datePosted'):
                    rec['datePosted'] = to_dhaka_offset(rec['datePosted'])
                if rec.get('validThrough'):
                    rec['validThrough'] = to_midnight(rec['validThrough'])

                job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                curr_code = rec.get('baseSalary', {}).get('currency', default_currency_arg) or default_currency_arg
                curr_symbol = get_currency_symbol(curr_code)
                salary_details = format_salary_details(
                    rec, curr_symbol, enable_salary_adj_arg,
                    emp_types_info['chosen_for_description'],
                    full_time_hours_arg, part_time_hours_arg
                )

                # Description (prepend geo-targeted semantic SEO summary)
                # USE NEW FUNCTION
                rec['description'] = assemble_location_focused_description(
                    rec.get('description',''), rec, primary_skill,
                    salary_details, job_urgency, exp_level_info, industries_info
                )

                # Title (with adaptive max length)
                dynamic_max_len = MAX_TITLE_LEN_BASE
                # Light adaptive tuning after half the dataset processed
                if stats["total"] >= max(10, num_lines // 2):
                    avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                    if avg_title_len < 52:
                        dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                    elif avg_title_len > 72:
                        dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                if no_change_in_title_arg:
                    logging.debug(f"JID {jid}: Keeping original title.")
                else:
                    # This 'else' is now "full improvement", since the other two cases
                    # (no_change and slightly_improve) are handled.
                    rec['title'] = generate_location_focused_title(
                        rec, primary_skill, salary_details, job_urgency,
                        exp_level_info, emp_types_info, industries_info,
                        dynamic_max_len
                    )

                # Employment & Industry for schema
                rec['employmentType'] = emp_types_info['schema_list'] or None
                rec['industry'] = industries_info['schema_list'] or None

                # URL normalization or generation
                if rec.get('url'):
                    rec['url'] = normalize_url(rec['url'])
                else:
                    # Canonical URL fallback from title
                    slug = re.sub(r'[^a-z0-9]+', '-', rec['title'].lower()).strip('-') if rec.get('title') else hashlib.sha1(jid.encode()).hexdigest()[:10]
                    rec['url'] = normalize_url(f"https://walmart.uscareers.co.com/jobs/{slug}/")

                # Hiring Organization
                ho = rec.get('hiringOrganization')
                if isinstance(ho, dict):
                    rec['hiringOrganization'] = normalize_hiring_org(ho, logo_cdn)
                elif isinstance(ho, str) and ho.strip():
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":ho.strip()}, logo_cdn)
                else:
                    _, cname = clean_role_and_company(rec.get('title',''), None)
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":cname}, logo_cdn)

                # Base salary cleanup
                bs = rec.get('baseSalary', {})
                if isinstance(bs, dict):
                    bs.setdefault('@type','MonetaryAmount')
                    bs['currency'] = curr_code
                    v = bs.get('value', {})
                    if not isinstance(v, dict):
                        v = {}
                    v.setdefault('@type','QuantitativeValue')
                    if salary_details.get("is_negotiable"):
                        v.update({'description': "Negotiable", 'minValue': None, 'maxValue': None, 'unitText': None})
                    elif salary_details.get("primary_raw_min") is not None or salary_details.get("primary_raw_max") is not None:
                        v.update({
                            'minValue': str(salary_details.get("primary_raw_min")),
                            'maxValue': str(salary_details.get("primary_raw_max")),
                            'unitText': salary_details.get("primary_unit_normalized", "PROJECT").upper(),
                        })
                    bs['value'] = v
                    rec['baseSalary'] = bs

                # Remove null description field in baseSalary.value if present
                if rec.get('baseSalary', {}).get('value', {}).get('description') is None:
                    try:
                        del rec['baseSalary']['value']['description']
                    except KeyError:
                        pass

                # Schema baseline
                rec.update({'@context':'http://schema.org', '@type':'JobPosting'})

                # Clean empty fields
                for k in list(rec.keys()):
                    if rec[k] is None:
                        del rec[k]

                # Stats tracking
                stats["total"] += 1
                stats["title_len_sum"] += len(rec.get('title', ''))
                stats["desc_len_sum"] += len(BeautifulSoup(rec.get('description',''), 'html.parser').get_text())

                fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")

        # After run, log adaptive insight
        if stats["total"]:
            avg_t = stats["title_len_sum"] / stats["total"]
            avg_d = stats["desc_len_sum"] / stats["total"]
            logging.info(f"Adaptive tuning summary: avg title length = {avg_t:.1f}, avg description length = {avg_d:.1f} chars, n={stats['total']}")

    except FileNotFoundError:
        logging.error(f"Input file '{in_path}' not found.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
    logging.info(f"Processing complete. Output: {out_path}")


# ==========================
# Entrypoint
# ==========================

def main():
    global ARGS
    ARGS = parse_args()
    log_level = logging.DEBUG if ARGS.verbose else logging.INFO
    logging.basicConfig(level=log_level,
                        format="%(asctime)s [%(levelname)s] %(filename)s:%(lineno)d - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S")
    # Call the new main processing function
    rewrite_geotargeted_job_records(
        ARGS.input, ARGS.output, ARGS.seed, ARGS.logo_cdn, ARGS.currency,
        ARGS.enable_salary_adjustment, ARGS.full_time_hours, ARGS.part_time_hours,
        ARGS.no_change_in_title,
        ARGS.slightly_improve_title  # <-- Pass the new argument
    )

if __name__ == "__main__":
    main()

