#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Location-Centric JobPosting Optimizer
=====================================

This script is a new version dedicated to optimizing JobPosting NDJSON
with a primary focus on geo-targeting and location-based SEO.

What's new:
1) Geo-Focused Title Generation
   - A completely new set of title templates that prioritize {city}, {state},
     and location-based urgency.
   - Titles are built to attract local candidates (e.g., "City Job: Role",
     "Hiring in City: Role").

2) Geo-Targeted Description Summary
   - The prepended SEO summary is now location-first.
   - New templates introduce the role, company, and salary all within
     the context of the specific {city}.

3) Geo-Contextual Enrichment
   - The AI enrichment layer now adds sentences specifically linking the
     role, company, and industry to the local area.

NEW IN THIS VERSION:
- Added a `--slightly-improve-title` flag. This provides a "light-touch"
  mode that ONLY appends geo-information (city, state, urgency) to the
  existing title and leaves ALL other fields (description, salary, etc.)
  in the record unmodified.
  (This mode is now more "intelligent", prioritizing new information
  and avoiding redundancy).

Usage
-----
Full Optimization:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson

Title-Tweak Only Mode:
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --slightly-improve-title

No Title Change (but full description/etc. optimization):
python3 location_centric_schema_improver.py -i input.ndjson -o output.ndjson --no-change-in-title
"""

import argparse
import json
import random
import re
import logging
import locale
import hashlib
from tqdm import tqdm
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, timezone

# ==========================
# Locale initialization
# ==========================
try:
    locale.setlocale(locale.LC_ALL, "")
except locale.Error:
    try:
        locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
    except locale.Error:
        locale.setlocale(locale.LC_ALL, "C.UTF-8")
        logging.warning("Locale setting failed. Using 'C.UTF-8'. Number formatting might be basic.")

# ==========================
# Constants & Defaults
# ==========================
MAX_TITLE_LEN_BASE = 70
DESIRED_DEFAULT_FALLBACK_LOGO_URL = "https://walmart.uscareers.co.com/logo.png"
DEFAULT_CURRENCY = "USD"
CONTACT_CTA = ''  # Optional CTA footer

DEFAULT_FULL_TIME_HOURS_PER_WEEK = 40
DEFAULT_PART_TIME_HOURS_PER_WEEK = 23
WEEKS_PER_YEAR = 52
MONTHS_PER_YEAR = 12

SALARY_ADJUSTMENT_FACTORS = [1.0, 0.92, 0.91, 0.88, 0.85, 1.05, 1.08, 1.10, 1.12, 1.15]
WEIGHTED_SALARY_ADJUSTMENT_FACTORS = [1.0, 1.0, 1.0] + SALARY_ADJUSTMENT_FACTORS

SALARY_UNIT_MAP = {
    'HOUR': 'Hour', 'HOURLY': 'Hour', 'DAY': 'Day', 'DAILY': 'Day',
    'WEEK': 'Week', 'WEEKLY': 'Week', 'MONTH': 'Month', 'MONTHLY': 'Month',
    'YEAR': 'Year', 'ANNUAL': 'Year', 'ANNUALLY': 'Year', 'PROJECT': 'Project'
}
NORMALIZED_SALARY_UNITS = {
    "HOUR": "HOUR", "HOURLY": "HOUR", "DAY": "PROJECT", "DAILY": "PROJECT",
    "WEEK": "WEEK", "WEEKLY": "WEEK", "MONTH": "MONTH", "MONTHLY": "MONTH",
    "YEAR": "YEAR", "ANNUAL": "YEAR", "ANNUALLY": "YEAR", "PROJECT": "PROJECT"
}

EXPERIENCE_LEVEL_MONTHS = {
    "Entry-Level": (0, 12),
    "Mid-Level": (13, 60),
    "Senior-Level": (61, float('inf'))
}
EXPERIENCE_LEVEL_KEYWORDS = {
    "Entry-Level": ["entry level", "no experience", "graduate", "junior", "trainee", "intern"],
    "Mid-Level": ["mid level", "intermediate", "associate", "experienced"],
    "Senior-Level": ["senior", "lead", "principal", "expert", "staff level", "manager"]
}

RECENT_POST_DAYS = 2
CLOSING_SOON_DAYS = 7

# ==================================================
# NEW: Geo-Centric Templates
# ==================================================
GEO_HEADER_TEMPLATES = [
    "Local Job Overview: {city}", "Position in {city}, {state}",
    "Opportunity in {city}", "Role Based in {city}", "{city} Role Highlights",
    "Your Next Step in {city}"
]
SUB_HEADER_TEMPLATES = {
    "primary": [
        "Key Information", "Fast Facts", "Role Essentials", "At a Glance",
        "Primary Details", "Snapshot"
    ],
    "secondary": [
        "Compensation & Benefits", "What You Get", "Pay & Perks", "Rewards",
        "Salary and Benefits"
    ],
    "tertiary": [
        "Core Focus", "What You'll Do", "Responsibilities", "Day-to-Day",
        "Position Scope"
    ]
}

LOCAL_AREA_DESCRIPTORS = [
    "a vibrant local hub", "a key business district", "our growing {city} office",
    "a central {city} location", "our main {city} branch"
]

GEO_TONE_TEMPLATES = {
    'informational': {
        'intro': [
            "<strong>Role in {city}:</strong> {role_for_summary}",
            "<strong>Position:</strong> {role_for_summary} (Based in {city})",
            "<strong>Job Title:</strong> {role_for_summary}"
        ],
        'company': [
            "<strong>Employer:</strong> {company_for_summary} (Hiring in {city})",
            "<strong>Hiring Organization:</strong> {company_for_summary}",
            "<strong>Company:</strong> {company_for_summary}"
        ],
        'location': [
            "<strong>Location:</strong> {city}, {state}",
            "<strong>Based in:</strong> {city}, {state} ({local_area_info})",
            "<strong>Worksite:</strong> {city}, {state}"
        ],
        'salary': [
            "<strong>Compensation:</strong> {salary_primary_display}{converted_salary_suffix}",
            "<strong>Pay:</strong> {salary_primary_display}{converted_salary_suffix}",
            "<strong>Salary:</strong> {salary_primary_display}{converted_salary_suffix}"
        ],
        'experience_entry': [
            "<strong>Experience:</strong> Entry-Level (Training in {city}).",
            "<strong>Required Experience:</strong> Entry-Level.",
            "<strong>Career Level:</strong> Junior / Trainee."
        ],
        'experience_mid': [
            "<strong>Experience:</strong> Mid-Level.",
            "<strong>Required Experience:</strong> Associate / Experienced.",
            "<strong>Career Level:</strong> Intermediate."
        ],
        'experience_senior': [
            "<strong>Experience:</strong> Senior-Level.",
            "<strong>Required Experience:</strong> Proven expertise.",
            "<strong>Career Level:</strong> Senior / Lead."
        ],
        'urgency_new': [
            "<strong>Status:</strong> New {city} listing (actively interviewing).",
            "<strong>Posted:</strong> Recently.",
            "<strong>Urgency:</strong> Immediate consideration in {city}."
        ],
        'urgency_closing': [
            "<strong>Status:</strong> Closing soon.",
            "<strong>Urgency:</strong> Final applications accepted.",
            "<strong>Deadline:</strong> Approaching."
        ],
        'focus': [
            "Apply your <strong>{primary_skill}</strong> skills at our <strong>{city}</strong> location.",
            "This role centers on <strong>{primary_skill}</strong> within the <strong>{industry_display}</strong> space in {city}.",
            "Core objectives involve <strong>{primary_skill}</strong> in <strong>{industry_display}</strong>."
        ],
        'benefits': [
            "A comprehensive benefits package is included.",
            "Eligible team members receive standard benefits.",
            "This role offers a competitive benefits package."
        ],
        'value_props': [
            "Clear growth pathways at our {city} office.",
            "Impactful work with supportive local leadership.",
            "Stable workload with modern tooling and processes."
        ]
    }
}

HIRING_SYNS = [
    "Hiring", "Immediate Start", "Now Hiring", "Apply Today", "Urgent Hire",
    "Join Team", "Recruiting", "Apply Now"
]
ENTRY_SYNS = ["Entry Level", "No Experience", "Junior Role", "Graduate Role", "Training Provided"]
URGENCY_TAGS_NEW_TITLE = ["New", "Just Posted", "Recent Job"]
URGENCY_TAGS_CLOSING_TITLE = ["Apply Soon", "Hiring Now", "Urgent"]

# (REMOVED) SLIGHT_TITLE_IMPROVEMENT_TEMPLATES - Logic is now inside the function

# NEW: Geo-Focused Title Templates
GEO_FOCUSED_TITLE_TEMPLATES = [
    "{city} Job: {role} ({job_type})",
    "{city}, {state}: {role} at {company}",
    "{city}-based {role} Opening",
    "Work in {city}: {role}",
    "{city} {experience_tag} {role}",
    "{state} Career: {role} in {city}",
    "{city} {role} with {salary_compact} Pay",
    "{urgency_tag}: {role} in {city}",
    "{hiring} in {city}: {role}",
    "Immediate Opening in {city} for a {role}",
    "{city} {role} ({salary_compact})",
    "{company} Needs {role} in {city}",
    "Join {company} in {city} as a {role}",
    "{company} ({city}) Hiring: {role}",
    "{role} - {city}, {state}",
    "{role} Position in {city}",
    "{role} ({industry}) Vacancy in {city}",
    "{role} ({salary_compact}) - {city}, {state}",
    "{role} Needed - {city} Area",
    "{hiring}: {role} in {city}, {state}",
    "{role} | {city}, {state} | {company}",
    "{city} {role} ({job_type}) - {company}",
    "{experience_tag} {role} - {city}",
    "{company} is hiring a {role} in {city}",
    "{salary_compact} {role} job in {city}",
    "{urgency_tag} Opening: {role} ({city})",
    "{city} {job_type} {role}",
    "Apply Now: {role} in {city}",
    "{city} Career: {role} at {company}",
    "{role} • {city}, {state} • {company}",
    "{city}: {role} ({job_type}, {salary_compact})",
    "{urgency_tag} {role} @ {company} in {city}",
    "{company} seeks {role} in {city}"
]

JOB_SEO_KEYWORDS = [
    "hiring", "apply now", "urgent", "career", "benefits", "salary", "immediate"
]

SECTION_KEYWORDS_MAP = {
    "responsibilities": ["key responsibilities", "responsibilities", "your role", "what you'll do", "duties", "main duties", "primary accountabilities"],
    "requirements": ["requirements", "qualifications", "essential skills", "your qualifications", "must-have qualifications", "to succeed you'll need", "your profile"],
    "skills": ["skills", "technical skills", "soft skills", "key competencies"],
    "benefits": ["benefits", "perks", "what we offer", "why join us"],
    "experience": ["experience", "professional background", "experience level"],
    "incentives": ["incentives", "incentive compensation", "bonus", "commission"],
    "workhours": ["work hours", "hours of work", "schedule"]
}

# ==========================
# Argparse
# ==========================

def parse_args():
    p = argparse.ArgumentParser(description="Optimize Local JobPosting NDJSON with a strong location-centric focus.")
    p.add_argument('-i','--input', default='all-schemas.ndjson', help='Input NDJSON file')
    p.add_argument('-o','--output', default='schema.ndjson', help='Output NDJSON file')
    p.add_argument('--seed', type=int, default=None, help='Seed for reproducible randomness')
    p.add_argument('--logo_cdn', default=DESIRED_DEFAULT_FALLBACK_LOGO_URL, help='Default fallback logo URL')
    p.add_argument('--currency', default=DEFAULT_CURRENCY, help='Default currency')
    p.add_argument('-v','--verbose', action='store_true', help='Enable debug logging')
    p.add_argument('--full_time_hours', type=int, default=DEFAULT_FULL_TIME_HOURS_PER_WEEK, help="Standard hours for full-time salary conversions.")
    p.add_argument('--part_time_hours', type=int, default=DEFAULT_PART_TIME_HOURS_PER_WEEK, help="Standard hours for part-time salary conversions.")
    p.add_argument('--enable_salary_adjustment', action='store_true', help='Enable dynamic salary value adjustments (random increase/decrease/no change).')
    
    # --- NEW: Mutually exclusive group for title operations ---
    title_group = p.add_mutually_exclusive_group()
    title_group.add_argument('--no-change-in-title', action='store_true', help='Do not improve the job title; keep the original (still optimizes description, etc.).')
    title_group.add_argument('--slightly-improve-title', action='store_true', help='Apply only minor, geo-focused improvements to the original title and leave all other fields unchanged.')
    # --- END NEW ---
    
    return p.parse_args()

# ==========================
# Small utilities
# (No changes in this section)
# ==========================

def title_case(s: str) -> str:
    if not s:
        return ""
    words = [w.capitalize() if not (w.isupper() and len(w) > 1) else w for w in s.split()]
    return " ".join(words)

def to_dhaka_offset(dt: str) -> str:
    if not dt:
        return ""
    try:
        parsed_dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
        dhaka_tz = timezone(timedelta(hours=6))
        return parsed_dt.astimezone(dhaka_tz).isoformat()
    except ValueError:
        logging.warning(f"Invalid date format for Dhaka offset: {dt}. Returning original.")
        return dt

def to_midnight(dt_str: str) -> str:
    if not dt_str:
        return ""
    try:
        dt_obj = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        target_tz = dt_obj.tzinfo or timezone(timedelta(hours=6))
        dt_obj_midnight = dt_obj.astimezone(target_tz).replace(hour=23, minute=59, second=59, microsecond=0)
        return dt_obj_midnight.isoformat()
    except (IndexError, ValueError) as e:
        logging.warning(f"Date parse error for to_midnight: {dt_str}. Error: {e}. Returning original.")
        return dt_str

def normalize_url(url: str) -> str:
    if not url:
        return ""
    url = url.split('?',1)[0].split('#',1)[0]
    if url.startswith('//'):
        url = 'https://' + url[2:]
    elif url.startswith('http://'):
        url = 'https://' + url[7:]
    elif not url.startswith('https://'):
        url = 'https://' + url.lstrip('/')
    if not re.search(r'\.\w{2,5}(?:/)?$', url.split('/')[-1]) and not url.endswith('/'):
        url += '/'
    return url

def get_currency_symbol(currency_code: str) -> str:
    symbols = {"USD":"$", "EUR":"€", "GBP":"£", "JPY":"¥", "CAD":"CA$", "AUD":"A$", "INR":"₹", "BDT":"৳"}
    return symbols.get(str(currency_code).upper(), (str(currency_code) + " ") if currency_code else "$")

# ==========================
# Intelligence helpers
# (No changes in this section, logic is sound)
# ==========================

def get_primary_skill(skills_value) -> str:
    if not skills_value:
        return ""
    processed = ""
    if isinstance(skills_value, str):
        processed = skills_value
    elif isinstance(skills_value, list):
        string_skills = []
        for item in skills_value:
            if isinstance(item, str) and item.strip():
                string_skills.append(item.strip())
            elif item is not None:
                try:
                    s_item = str(item).strip()
                    string_skills.append(s_item)
                except Exception:
                    pass
        processed = ", ".join(string_skills)
    else:
        try:
            processed = str(skills_value).strip()
        except Exception:
            return ""
    if not processed:
        return ""
    try:
        return next((s.strip() for s in re.split(r'[,;/]', processed) if s.strip()), "")
    except TypeError:
        return ""


def clean_text_to_list(text_content: str) -> list[str]:
    if not text_content:
        return []
    soup = BeautifulSoup(text_content, 'html.parser')
    return [li.get_text(separator=' ', strip=True) for li in soup.find_all('li') if li.get_text(strip=True)] or \
           [p.get_text(separator=' ', strip=True) for p in soup.find_all('p') if p.get_text(strip=True)] or \
           ([soup.get_text(strip=True)] if soup.get_text(strip=True) else [])


def get_location_details(rec: dict) -> tuple[str, str, str]:
    job_loc_data = rec.get('jobLocation')
    city, state, country = "", "", "US"
    if isinstance(job_loc_data, dict):
        address_data = job_loc_data.get('address')
        if isinstance(address_data, dict):
            city = address_data.get('addressLocality', '')
            state = address_data.get('addressRegion', '')
            country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    elif isinstance(job_loc_data, list) and job_loc_data:
        first_loc = job_loc_data[0]
        if isinstance(first_loc, dict):
            address_data = first_loc.get('address')
            if isinstance(address_data, dict):
                city = address_data.get('addressLocality', '')
                state = address_data.get('addressRegion', '')
                country = address_data.get('addressCountry', country) if isinstance(address_data.get('addressCountry'), str) else country
    if not city and state:
        city = state
    elif not city and not state:
        logging.debug(f"Job ID {rec.get('@id', 'Unknown')}: Missing addressLocality and addressRegion.")
    return str(city), str(state), str(country)


def get_employment_types_info(rec: dict) -> dict:
    et_input = rec.get('employmentType')
    normalized_types = []
    if isinstance(et_input, list):
        for item in et_input:
            if isinstance(item, str) and item.strip():
                norm_item = item.replace('_', '-').strip().lower()
                normalized_types.append(title_case(norm_item.replace(" time", "-Time")))
    elif isinstance(et_input, str) and et_input.strip():
        norm_item = et_input.replace('_', '-').strip().lower()
        normalized_types.append(title_case(norm_item.replace(" time", "-Time")))

    unique_types = sorted(list(set(normalized_types)))
    chosen_for_title, chosen_for_description = "Flexible", "Flexible"

    if not unique_types:
        schema_list = ["OTHER"]
    else:
        is_full_time = "Full-Time" in unique_types
        is_part_time = "Part-Time" in unique_types
        if is_full_time and is_part_time:
            chosen_for_title, chosen_for_description = "Full/Part-Time", "Full-Time"
        elif len(unique_types) == 1:
            chosen_for_title, chosen_for_description = unique_types[0], unique_types[0]
        else:
            chosen = "Full-Time" if is_full_time else ("Part-Time" if is_part_time else random.choice(unique_types))
            chosen_for_title, chosen_for_description = chosen, chosen
        schema_list = [t.upper().replace('-', '_') for t in unique_types]

    return {
        'all_available_display': unique_types or ["Flexible"],
        'title_display': chosen_for_title,
        'chosen_for_description': chosen_for_description,
        'schema_list': schema_list
    }


def get_industries_info(rec: dict) -> dict:
    industry_input = rec.get('industry')
    processed_industries = []
    if isinstance(industry_input, list):
        for item in industry_input:
            if isinstance(item, str) and item.strip():
                processed_industries.append(title_case(item.replace('&', 'and').strip()))
    elif isinstance(industry_input, str) and industry_input.strip():
        processed_industries.append(title_case(industry_input.replace('&', 'and').strip()))
    unique_industries = sorted(list(set(processed_industries)))
    if not unique_industries:
        return {'display_list': [], 'title_display': "", 'schema_list': []}
    return {
        'display_list': unique_industries,
        'title_display': random.choice(unique_industries),
        'schema_list': unique_industries
    }


def get_experience_level_info(rec: dict) -> dict:
    exp_req = rec.get("experienceRequirements", {})
    months_exp = None
    level_tag = ""
    description_tag = ""
    if isinstance(exp_req, dict):
        months_str = exp_req.get("monthsOfExperience")
        desc_str = exp_req.get("description", "").lower()
        if months_str is not None:
            try:
                months_exp = int(months_str)
            except (ValueError, TypeError):
                pass
        if months_exp is not None:
            for level, (min_m, max_m) in EXPERIENCE_LEVEL_MONTHS.items():
                if min_m <= months_exp <= max_m:
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        else:
            for level, keywords in EXPERIENCE_LEVEL_KEYWORDS.items():
                if any(kw in desc_str for kw in keywords):
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        if not level_tag and any(syn.lower() in desc_str for syn in ENTRY_SYNS):
            description_tag, level_tag = "Entry-Level", "Entry"
    return {"title_tag": level_tag, "description_tag": description_tag, "months": months_exp}


def get_job_urgency_tags(date_posted_str: str, valid_through_str: str, rec_id: str) -> dict:
    urgency = {"title_tag": "", "description_key": None}
    now_utc = datetime.now(timezone.utc)
    if date_posted_str:
        try:
            posted_dt = datetime.fromisoformat(date_posted_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if (now_utc - posted_dt).days <= RECENT_POST_DAYS:
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_NEW_TITLE), "description_key": "new"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing datePosted '{date_posted_str}': {e}")
    if valid_through_str:
        try:
            valid_dt = datetime.fromisoformat(valid_through_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if timedelta(days=0) <= (valid_dt - now_utc) <= timedelta(days=CLOSING_SOON_DAYS):
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_CLOSING_TITLE), "description_key": "closing"})
        except ValueError as e:
            logging.debug(f"JID {rec_id}: Err parsing validThrough '{valid_through_str}': {e}")
    return urgency


def to_k_notation(num_val: float, currency_symbol: str) -> str:
    if abs(num_val) >= 1000:
        k_val = num_val / 1000.0
        return f"{currency_symbol}{k_val:.1f}k".replace(".0k", "k")
    return f"{currency_symbol}{int(num_val)}"


def format_salary_details(rec: dict, currency_symbol: str = "$", enable_dynamic_adjustment: bool = False,
                          chosen_emp_type: str = "Full-Time", full_time_hours: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                          part_time_hours: int = DEFAULT_PART_TIME_HOURS_PER_WEEK) -> dict:
    base = rec.get('baseSalary', {})
    val_obj = base.get('value', {})
    if not isinstance(base, dict):
        base = {}
    if not isinstance(val_obj, dict):
        val_obj = {}

    minv_raw = val_obj.get('minValue', base.get('minValue'))
    maxv_raw = val_obj.get('maxValue', base.get('maxValue'))
    unit_raw = str(val_obj.get('unitText', base.get('unitText', ''))).upper()
    primary_unit_normalized = NORMALIZED_SALARY_UNITS.get(unit_raw, "PROJECT")

    def parse_salary_value(s_val):
        if s_val is None:
            return None
        if isinstance(s_val, (int, float)):
            return float(s_val)
        if isinstance(s_val, str):
            s_val_cleaned = str(s_val).replace(currency_symbol, '').replace(',', '').strip()
            if "negotiable" in s_val_cleaned.lower() or not s_val_cleaned:
                return "Negotiable"
            try:
                return float(s_val_cleaned)
            except ValueError:
                return None
        return None

    min_val_num = parse_salary_value(minv_raw)
    max_val_num = parse_salary_value(maxv_raw)

    if min_val_num == "Negotiable" or max_val_num == "Negotiable":
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": 1.0}
    if min_val_num is None and max_val_num is None:
        return {"primary_display": "", "is_negotiable": False, "conversions": {}, "adjusted_factor": 1.0}

    adjustment_factor = 1.0
    if enable_dynamic_adjustment:
        adjustment_factor = random.choice(WEIGHTED_SALARY_ADJUSTMENT_FACTORS)
        if isinstance(min_val_num, (int, float)):
            min_val_num *= adjustment_factor
        if isinstance(max_val_num, (int, float)):
            max_val_num *= adjustment_factor
        if isinstance(min_val_num, (int, float)) and isinstance(max_val_num, (int, float)) and min_val_num > max_val_num:
            min_val_num, max_val_num = max_val_num, min_val_num

    primary_value_for_conversion, primary_display_val = None, "Error"
    if min_val_num is not None and max_val_num is not None:
        primary_value_for_conversion = (min_val_num + max_val_num) / 2.0
        primary_display_val = f"{to_k_notation(min_val_num, currency_symbol)}-{to_k_notation(max_val_num, currency_symbol)}"
    elif max_val_num is not None:
        primary_value_for_conversion, primary_display_val = max_val_num, f"Up to {to_k_notation(max_val_num, currency_symbol)}"
    elif min_val_num is not None:
        primary_value_for_conversion, primary_display_val = min_val_num, to_k_notation(min_val_num, currency_symbol)
    else:
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}, "adjusted_factor": adjustment_factor}

    primary_unit_display = SALARY_UNIT_MAP.get(primary_unit_normalized, "")
    primary_salary_str = f"{primary_display_val}{'/' + primary_unit_display if primary_unit_display and primary_unit_display != 'Project' else ''}"

    conversions, converted_values_num = {}, {}
    hours_per_week = part_time_hours if "part-time" in chosen_emp_type.lower() else full_time_hours

    if primary_value_for_conversion is not None and primary_unit_normalized != "PROJECT":
        annual_equiv = None
        if primary_unit_normalized == "HOUR":
            annual_equiv = primary_value_for_conversion * hours_per_week * WEEKS_PER_YEAR
        elif primary_unit_normalized == "WEEK":
            annual_equiv = primary_value_for_conversion * WEEKS_PER_YEAR
        elif primary_unit_normalized == "MONTH":
            annual_equiv = primary_value_for_conversion * MONTHS_PER_YEAR
        elif primary_unit_normalized == "YEAR":
            annual_equiv = primary_value_for_conversion

        if annual_equiv is not None:
            converted_values_num.update({
                "YEAR": annual_equiv,
                "MONTH": annual_equiv / MONTHS_PER_YEAR,
                "WEEK": annual_equiv / WEEKS_PER_YEAR
            })
            if hours_per_week > 0:
                converted_values_num["HOUR"] = (annual_equiv / WEEKS_PER_YEAR) / hours_per_week
            for unit, val in converted_values_num.items():
                if unit != primary_unit_normalized:
                    conversions[unit] = f"{to_k_notation(val, currency_symbol)}/{SALARY_UNIT_MAP.get(unit, '')}"

    return {
        "primary_display": primary_salary_str,
        "primary_raw_min": min_val_num,
        "primary_raw_max": max_val_num,
        "primary_unit_normalized": primary_unit_normalized,
        "is_negotiable": False,
        "conversions": conversions,
        "converted_raw": converted_values_num,
        "adjusted_factor": adjustment_factor
    }

# ==========================
# Content assembly & enrichment
# (Refactored for Geo-Focus)
# ==========================

def clean_role_and_company(original_title: str, org_name_from_ho: str) -> tuple[str, str]:
    org_name = str(org_name_from_ho or "").strip()
    role = re.sub(r'\s*\(.*?[mfvdix].*?\)\s*', '', str(original_title), flags=re.IGNORECASE).strip()
    role = re.sub(r"\s+jobs?\b", "", role, flags=re.IGNORECASE).strip()
    company, final_role = org_name, role

    if not company:
        preps = ["at", "for", "with"]
        for prep in preps:
            match = re.search(rf"^(.*?)\s+{re.escape(prep)}\s+([\w\s.,'&()-]+)$", role, flags=re.IGNORECASE)
            if match and 2 <= len(match.group(2).split()) <= 5:
                company, final_role = match.group(2).strip(), match.group(1).strip()
                break

    if company:
        final_role = re.sub(rf"\s*\b{re.escape(company)}\b", "", final_role, flags=re.IGNORECASE).strip(" -|,")

    if not company:
        company = "A Leading Local Company"
    if not final_role:
        final_role = "Associate"
    return final_role.strip(), company.strip()


def geo_context_enrichment(html_block: str, role: str, company: str, primary_skill: str, city: str, state: str, industry_display: str) -> str:
    """NEW: Minimal enrichment layer to add location-specific context.
    """
    soup = BeautifulSoup(html_block or "", 'html.parser')
    text = soup.decode_contents() if html_block else ""

    # Controlled synonym map
    synonyms = {
        r"\bcompany\b": "organization",
        r"\bemployees\b": "team members",
        r"\bsalary\b": "compensation",
        r"\bjob\b": "role",
        r"\bexperience\b": "background",
        r"\bbenefits\b": "perks"
    }
    for pat, repl in synonyms.items():
        text = re.sub(pat, repl, text, flags=re.IGNORECASE)

    # NEW: Geo-Context stitches
    stitches = [
        f"<li>This <strong>{city}</strong>-based role is an excellent opportunity for professionals skilled in <strong>{primary_skill or 'relevant skills'}</strong>.</li>",
        f"<li>Our <strong>{company}</strong> team in {city}, {state} is growing.</li>",
        f"<li>Benefit from working in <strong>{city}</strong>, a key hub for the <strong>{industry_display}</strong> industry.</li>"
    ]

    # Insert stitches under first UL; else create a fresh list
    ul = soup.find('ul')
    if ul:
        for item in stitches:
            ul.append(BeautifulSoup(item, 'html.parser'))
    else:
        ul_new = BeautifulSoup("<ul></ul>", 'html.parser')
        for item in stitches:
            ul_new.ul.append(BeautifulSoup(item, 'html.parser'))
        soup.append(ul_new)

    return soup.decode_contents()


def create_geo_targeted_summary(rec: dict, primary_skill: str, salary_details: dict, job_urgency: dict, exp_level_info: dict, industries_info: dict) -> str:
    """NEW: Builds an SEO summary with a strong location-first emphasis.
    """
    role_for_summary, company_for_summary = clean_role_and_company(rec.get('title',''), rec.get('hiringOrganization',{}).get('name',''))
    city, state, _ = get_location_details(rec)

    active = GEO_TONE_TEMPLATES['informational']
    salary_primary_display = salary_details.get("primary_display", "Negotiable") if salary_details else "Negotiable"
    converted_salary_suffix = ""
    if salary_details and salary_details.get("conversions"):
        best_conv = salary_details["conversions"].get("YEAR") or random.choice(list(salary_details["conversions"].values()))
        converted_salary_suffix = f" (approx. {best_conv})"

    data = {
        "role_for_summary": title_case(role_for_summary),
        "company_for_summary": company_for_summary,
        "city": city,
        "state": state,
        "local_area_info": random.choice(LOCAL_AREA_DESCRIPTORS).format(city=city),
        "salary_primary_display": salary_primary_display,
        "converted_salary_suffix": converted_salary_suffix,
        "primary_skill": primary_skill or "your professional skills",
        "industry_display": (industries_info['display_list'][0] if industries_info and industries_info.get('display_list') else "a dynamic")
    }

    def phrase(key):
        return random.choice(active[key]).format(**data)

    job_details = [phrase('intro'), phrase('company'), phrase('location')]

    urgency_key = job_urgency.get("description_key")
    if urgency_key:
        job_details.append(phrase(f'urgency_{urgency_key}'))

    exp_level = exp_level_info.get("description_tag")
    if exp_level == "Entry-Level":
        job_details.append(phrase('experience_entry'))
    elif exp_level == "Mid-Level":
        job_details.append(phrase('experience_mid'))
    elif exp_level == "Senior-Level":
        job_details.append(phrase('experience_senior'))

    comp_benefits = [phrase('salary'), f"<strong>Benefits:</strong> {random.choice(active['benefits'])}"]
    value_props = [f"<em>{random.choice(active['value_props']).format(city=city)}</em>"]
    role_focus = [random.choice(active['focus']).format(**data)]

    summary_html = f"<h3>{random.choice(GEO_HEADER_TEMPLATES).format(city=city, state=state)}</h3>"
    structure_choice = random.randint(1, 3)
    if structure_choice == 1:
        random.shuffle(job_details)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in job_details) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['secondary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in (comp_benefits + value_props)) +
            "</ul>"
        )
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in role_focus) +
            "</ul>"
        )
    elif structure_choice == 2:
        all_parts = job_details + comp_benefits + value_props + role_focus
        random.shuffle(all_parts)
        summary_html += (
            f"<h4>{random.choice(SUB_HEADER_TEMPLATES['primary'])}</h4><ul>" +
            "".join(f"<li>{p}</li>" for p in all_parts) +
            "</ul>"
        )
    else:
        summary_html += f"<p>{' '.join(job_details)}</p>"
        summary_html += f"<p>{' '.join(comp_benefits + value_props)}</p>"
        summary_html += f"<h5>{random.choice(SUB_HEADER_TEMPLATES['tertiary'])}</h5><p>{role_focus[0]}</p>"

    # Enrich with stitched context and synonyms
    summary_html = geo_context_enrichment(
        summary_html,
        role=title_case(role_for_summary),
        company=company_for_summary,
        primary_skill=primary_skill,
        city=city,
        state=state,
        industry_display=data["industry_display"]
    )

    return BeautifulSoup(summary_html, 'html.parser').decode_contents().strip()


# ==========================
# Title building & SEO post-processing
# (Refactored for Geo-Focus)
# ==========================

def enforce_length(title: str, max_len: int) -> str:
    if len(title) <= max_len:
        return title.strip()
    shortened = re.sub(r'\s*\([^)]*\)\s*$', '', title).strip()
    if len(shortened) <= max_len:
        return shortened
    while len(title) > max_len:
        parts = title.rsplit(' ', 1)
        if len(parts) > 1:
            title = parts[0]
        else:
            return title[:max_len-3].strip() + "..."
    return title.strip(" -|,( ")


def enrich_title_for_seo(title: str) -> str:
    """Append one missing, high-intent keyword if space allows; avoid stuffing."""
    t = title
    for kw in JOB_SEO_KEYWORDS:
        if kw.lower() not in t.lower() and len(t) <= 60:  # keep margin
            t = f"{t} | {kw.title()}"
            break
    return t


def generate_location_focused_title(rec: dict, primary_skill: str, salary_details: dict, job_urgency:dict,
                                    exp_level_info:dict, emp_types_info:dict, industries_info:dict,
                                    dynamic_max_len:int) -> str:
    """NEW: Generates SEO title using the GEO_FOCUSED_TITLE_TEMPLATES list.
    """
    ho_name = rec.get('hiringOrganization', {}).get('name', '')
    cleaned_role, company_name = clean_role_and_company(rec.get('title', rec.get('name','')), ho_name)
    city, state, _ = get_location_details(rec)

    salary_fmt = ""
    if salary_details and salary_details.get("primary_display") and not salary_details.get("is_negotiable"):
        salary_fmt = salary_details["primary_display"]
        prim_unit = salary_details["primary_unit_normalized"]
        conv = salary_details.get("conversions", {})
        if prim_unit == "HOUR" and "YEAR" in conv:
            salary_fmt = f"{salary_details['primary_display'].split('/')[0]}/hr"

    parts = {
        "role": title_case(cleaned_role),
        "company": title_case(company_name),
        "city": title_case(city),
        "state": state.upper(),
        "job_type": emp_types_info.get('title_display', ""),
        "hiring": random.choice(HIRING_SYNS),
        "urgency_tag": job_urgency.get('title_tag', ''),
        "experience_tag": exp_level_info.get('title_tag', ''),
        "salary_compact": salary_fmt if "Negotiable" not in salary_fmt else "",
        "skill1": title_case(primary_skill),
        "industry": industries_info.get('title_display', "")
    }

    # CRITICAL CHANGE: Use the new geo-focused template list
    tmpl = random.choice(GEO_FOCUSED_TITLE_TEMPLATES)
    
    # Filter out empty parts to avoid "Role in ()"
    parts_filled = {k: v for k, v in parts.items() if v}
    
    # Simple check to find a template that fits the available data
    # This is a basic way to avoid templates with many missing keys
    attempts = 0
    while any(f"{{{k}}}" in tmpl for k in parts.keys() if k not in parts_filled) and attempts < 10:
        tmpl = random.choice(GEO_FOCUSED_TITLE_TEMPLATES)
        attempts += 1
        if attempts > 5: # Be less strict if we cant find a perfect match
             if "{" + random.choice(list(parts.keys())) + "}" in tmpl:
                 break # Just pick one

    title = tmpl.format(**parts)
    title = re.sub(r'\s{2,}', ' ', title).strip()
    title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', title)
    title = title.replace('()', '').replace('[]', '').strip(" -|,: •—")
    title = re.sub(r'\s*-\s*-\s*', '-', title) # Clean up double dashes
    title = re.sub(r'\s*•\s*•\s*', '•', title) # Clean up double bullets
    title = title.strip(" -|,: •—")

    title = enrich_title_for_seo(title)
    return enforce_length(title, dynamic_max_len)


# --- UPDATED FUNCTION for --slightly-improve-title ---
def generate_slight_title_improvement(original_title: str, city: str, state: str, urgency_tag: str, max_len: int) -> str:
    """
    Applies a minor, additive improvement to the original job title,
    as requested for the --slightly-improve-title mode.
    NOW: Uses a dynamic, *prioritized* template list for more intelligent variety.
    """
    title = original_title.strip()
    
    # 1. More gentle cleanup: just remove trailing separators
    cleaned_title = title.strip(" -|,: •—")
    
    title_lower = cleaned_title.lower()
    
    # 2. Check what information is *already* present
    has_city = city and city.lower() in title_lower
    # Check for state abbreviation, avoiding spaces (e.g., "NY" in "JobNY")
    has_state = state and state.upper() in re.sub(r'[^A-Z]', '', title)
    has_urgency = urgency_tag and urgency_tag.lower() in title_lower
    
    # 3. Define prioritized template buckets
    # {title} is the cleaned original title
    template_buckets = {
        'urgent_geo': [
            "{urgency_tag}: {title} ({city})",
            "{title} - {city}, {state} ({urgency_tag})",
            "{title} ({city}) - {urgency_tag}",
        ],
        'geo': [
            "{title} - {city}, {state}",
            "{title} | {city}, {state}",
            "{title} in {city}",
            "{city} Opening: {title}",
            "{title} ({city})",
        ],
        'urgent': [
            "{title} ({urgency_tag})",
            "{title} - {urgency_tag}",
            "{urgency_tag}: {title}",
            "{title} | {urgency_tag}",
        ],
        'state_only': [
             "{title} ({state} Opening)",
             "{title} - {state}",
        ]
    }

    parts = {
        "title": cleaned_title,
        "city": title_case(city),
        "state": state.upper(),
        "urgency_tag": urgency_tag,
    }

    # 4. Define which buckets to try, in order of priority
    priority_order = []
    
    # Build priority list based on *new* information we can add
    can_add_geo = city and not has_city
    can_add_urgency = urgency_tag and not has_urgency
    # Only use state-only if city isn't available/used
    can_add_state = state and not has_state and not can_add_geo 
    
    if can_add_urgency and can_add_geo:
        priority_order.append('urgent_geo')
    
    if can_add_geo:
        priority_order.append('geo')
        
    if can_add_urgency:
        priority_order.append('urgent')
        
    if can_add_state:
        priority_order.append('state_only')

    # 5. Iterate through prioritized buckets
    for bucket_name in priority_order:
        templates = template_buckets[bucket_name]
        random.shuffle(templates) # Shuffle templates *within* the priority bucket
        
        for tmpl in templates:
            # Check if all placeholders for *this* template are available
            placeholders = re.findall(r'\{([^{}]+)\}', tmpl)
            if not all(parts.get(p) for p in placeholders if p != 'title'): # 'title' is always present
                continue # Skip template if data is missing (e.g., missing {state} for a {city}, {state} template)
            
            new_title = tmpl.format(**parts)
            
            # Clean up potential formatting issues
            new_title = re.sub(r'\s{2,}', ' ', new_title).strip()
            new_title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', new_title)
            new_title = new_title.replace('()', '').replace('[]', '').strip(" -|,: •—")
            
            if len(new_title) <= max_len:
                return enforce_length(new_title, max_len) # Found a good one that fits

    # 6. If no template was applied, return the cleaned original title
    return enforce_length(cleaned_title, max_len)
# --- END UPDATED FUNCTION ---


# ==========================
# Description assembler
# (Refactored for Geo-Focus)
# ==========================

def assemble_location_focused_description(html_str: str, rec: dict, primary_skill: str = "", salary_details:dict = None,
                                          job_urgency:dict=None, exp_level_info:dict=None, industries_info:dict=None) -> str:
    """NEW: Assembles description using the geo-targeted summary function.
    """
    original_html_content = html_str or ""
    original_was_empty = not original_html_content.strip() or original_html_content.strip().lower() in ["<p>no description provided.</p>"]

    # Use the new geo-targeted summary function
    seo_summary_html = create_geo_targeted_summary(rec, primary_skill, salary_details, job_urgency, exp_level_info, industries_info)

    final_description = seo_summary_html if original_was_empty else seo_summary_html + "<hr><br>" + original_html_content

    if CONTACT_CTA:
        final_description += f"<p>{CONTACT_CTA}</p>"
    return final_description


# ==========================
# Hiring org normalization
# (No changes in this section)
# ==========================

def normalize_hiring_org(org: dict, logo_cdn: str) -> dict:
    if not isinstance(org, dict):
        return {}
    sa = org.get('sameAs', '')
    if sa and isinstance(sa, str) and not sa.startswith(('http://', 'https://')):
        org['sameAs'] = 'https://' + sa.lstrip('/')
    if not (isinstance(org.get('logo'), str) and org['logo'].startswith(('http://', 'https://'))):
        org['logo'] = logo_cdn
    return org


# ==========================
# Core pipeline
# (Refactored for Geo-Focus)
# ==========================
ARGS = None

def rewrite_geotargeted_job_records(in_path: str, out_path: str, seed: int=None, logo_cdn: str = DESIRED_DEFAULT_FALLBACK_LOGO_URL,
                                    default_currency_arg: str = DEFAULT_CURRENCY, enable_salary_adj_arg: bool = False,
                                    full_time_hours_arg: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                                    part_time_hours_arg: int = DEFAULT_PART_TIME_HOURS_PER_WEEK,
                                    no_change_in_title_arg: bool = False,
                                    slightly_improve_title_arg: bool = False): # <-- NEW ARG
    
    logging.info(f"Optimizing Geo-Targeted Jobs: {in_path} -> {out_path} with seed {seed}")
    if no_change_in_title_arg:
        logging.info("Running with --no-change-in-title. Original titles will be preserved.")
    # --- NEW LOGGING ---
    if slightly_improve_title_arg:
        logging.info("Running with --slightly-improve-title. ONLY titles will be modified; all other fields preserved.")
    # --- END NEW ---

    # Adaptive stats (Contextual Behavior Simulation)
    stats = {"total": 0, "title_len_sum": 0, "desc_len_sum": 0}

    try:
        with open(in_path, 'r', encoding='utf-8') as fin_check:
            num_lines = sum(1 for line in fin_check if line.strip())
        if num_lines == 0:
            logging.warning(f"Input file '{in_path}' is empty.")
            open(out_path, 'w').close()
            return

        with open(in_path, 'r', encoding='utf-8') as fin, open(out_path, 'w', encoding='utf-8') as fout:
            for line in tqdm(fin, total=num_lines, desc="Processing Geo-Targeted Jobs"):
                if not line.strip():
                    continue
                try:
                    rec = json.loads(line)
                except json.JSONDecodeError as e:
                    logging.warning(f"Bad JSON: {e}. Line: {line[:70]}...")
                    continue
                if not isinstance(rec, dict):
                    logging.warning("Line is not a JSON object, skipping.")
                    continue

                jid = rec.get('@id') or rec.get('url') or hashlib.sha256(line.encode()).hexdigest()
                random.seed(hash(str(jid) + str(seed)))

                # --- NEW LOGIC BRANCH for --slightly-improve-title ---
                if slightly_improve_title_arg:
                    original_title = rec.get('title', rec.get('name', ''))
                    if not original_title:
                        fout.write(line) # Write original line if no title
                        continue

                    # Get JUST enough info for the slight improvement
                    city, state, _ = get_location_details(rec)
                    job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                    
                    # Calculate dynamic max length (copied from existing logic)
                    dynamic_max_len = MAX_TITLE_LEN_BASE
                    if stats["total"] >= max(10, num_lines // 2):
                        avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                        if avg_title_len < 52: dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                        elif avg_title_len > 72: dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                    # Generate the new title using the new "slight improvement" function
                    new_title = generate_slight_title_improvement(
                        original_title, city, state, 
                        job_urgency.get('title_tag', ''), 
                        dynamic_max_len
                    )
                    
                    rec['title'] = new_title # Update the title
                    
                    # Stats tracking (minimal)
                    stats["total"] += 1
                    stats["title_len_sum"] += len(new_title)
                    # DO NOT track description length or other changes

                    fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")
                    continue # IMPORTANT: Skip all other processing
                # --- END OF NEW LOGIC BRANCH ---


                # --- FULL PROCESSING LOGIC (only runs if --slightly-improve-title is OFF) ---

                # Extract & Normalize
                primary_skill = get_primary_skill(rec.get('skills',''))
                emp_types_info = get_employment_types_info(rec)
                industries_info = get_industries_info(rec)
                exp_level_info = get_experience_level_info(rec)

                if rec.get('datePosted'):
                    rec['datePosted'] = to_dhaka_offset(rec['datePosted'])
                if rec.get('validThrough'):
                    rec['validThrough'] = to_midnight(rec['validThrough'])

                job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                curr_code = rec.get('baseSalary', {}).get('currency', default_currency_arg) or default_currency_arg
                curr_symbol = get_currency_symbol(curr_code)
                salary_details = format_salary_details(
                    rec, curr_symbol, enable_salary_adj_arg,
                    emp_types_info['chosen_for_description'],
                    full_time_hours_arg, part_time_hours_arg
                )

                # Description (prepend geo-targeted semantic SEO summary)
                # USE NEW FUNCTION
                rec['description'] = assemble_location_focused_description(
                    rec.get('description',''), rec, primary_skill,
                    salary_details, job_urgency, exp_level_info, industries_info
                )

                # Title (with adaptive max length)
                dynamic_max_len = MAX_TITLE_LEN_BASE
                # Light adaptive tuning after half the dataset processed
                if stats["total"] >= max(10, num_lines // 2):
                    avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                    if avg_title_len < 52:
                        dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                    elif avg_title_len > 72:
                        dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                if no_change_in_title_arg:
                    logging.debug(f"JID {jid}: Keeping original title.")
                else:
                    # This 'else' is now "full improvement", since the other two cases
                    # (no_change and slightly_improve) are handled.
                    rec['title'] = generate_location_focused_title(
                        rec, primary_skill, salary_details, job_urgency,
                        exp_level_info, emp_types_info, industries_info,
                        dynamic_max_len
                    )

                # Employment & Industry for schema
                rec['employmentType'] = emp_types_info['schema_list'] or None
                rec['industry'] = industries_info['schema_list'] or None

                # URL normalization or generation
                if rec.get('url'):
                    rec['url'] = normalize_url(rec['url'])
                else:
                    # Canonical URL fallback from title
                    slug = re.sub(r'[^a-z0-9]+', '-', rec['title'].lower()).strip('-') if rec.get('title') else hashlib.sha1(jid.encode()).hexdigest()[:10]
                    rec['url'] = normalize_url(f"https://walmart.uscareers.co.com/jobs/{slug}/")

                # Hiring Organization
                ho = rec.get('hiringOrganization')
                if isinstance(ho, dict):
                    rec['hiringOrganization'] = normalize_hiring_org(ho, logo_cdn)
                elif isinstance(ho, str) and ho.strip():
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":ho.strip()}, logo_cdn)
                else:
                    _, cname = clean_role_and_company(rec.get('title',''), None)
                    rec['hiringOrganization'] = normalize_hiring_org({"@type":"Organization", "name":cname}, logo_cdn)

                # Base salary cleanup
                bs = rec.get('baseSalary', {})
                if isinstance(bs, dict):
                    bs.setdefault('@type','MonetaryAmount')
                    bs['currency'] = curr_code
                    v = bs.get('value', {})
                    if not isinstance(v, dict):
                        v = {}
                    v.setdefault('@type','QuantitativeValue')
                    if salary_details.get("is_negotiable"):
                        v.update({'description': "Negotiable", 'minValue': None, 'maxValue': None, 'unitText': None})
                    elif salary_details.get("primary_raw_min") is not None or salary_details.get("primary_raw_max") is not None:
                        v.update({
                            'minValue': str(salary_details.get("primary_raw_min")),
                            'maxValue': str(salary_details.get("primary_raw_max")),
                            'unitText': salary_details.get("primary_unit_normalized", "PROJECT").upper(),
                        })
                    bs['value'] = v
                    rec['baseSalary'] = bs

                # Remove null description field in baseSalary.value if present
                if rec.get('baseSalary', {}).get('value', {}).get('description') is None:
                    try:
                        del rec['baseSalary']['value']['description']
                    except KeyError:
                        pass

                # Schema baseline
                rec.update({'@context':'http://schema.org', '@type':'JobPosting'})

                # Clean empty fields
                for k in list(rec.keys()):
                    if rec[k] is None:
                        del rec[k]

                # Stats tracking
                stats["total"] += 1
                stats["title_len_sum"] += len(rec.get('title', ''))
                stats["desc_len_sum"] += len(BeautifulSoup(rec.get('description',''), 'html.parser').get_text())

                fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")

        # After run, log adaptive insight
        if stats["total"]:
            avg_t = stats["title_len_sum"] / stats["total"]
            avg_d = stats["desc_len_sum"] / stats["total"]
            logging.info(f"Adaptive tuning summary: avg title length = {avg_t:.1f}, avg description length = {avg_d:.1f} chars, n={stats['total']}")

    except FileNotFoundError:
        logging.error(f"Input file '{in_path}' not found.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
    logging.info(f"Processing complete. Output: {out_path}")


# ==========================
# Entrypoint
# ==========================

def main():
    global ARGS
    ARGS = parse_args()
    log_level = logging.DEBUG if ARGS.verbose else logging.INFO
    logging.basicConfig(level=log_level,
                        format="%(asctime)s [%(levelname)s] %(filename)s:%(lineno)d - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S")
    # Call the new main processing function
    rewrite_geotargeted_job_records(
        ARGS.input, ARGS.output, ARGS.seed, ARGS.logo_cdn, ARGS.currency,
        ARGS.enable_salary_adjustment, ARGS.full_time_hours, ARGS.part_time_hours,
        ARGS.no_change_in_title,
        ARGS.slightly_improve_title  # <-- Pass the new argument
    )

if __name__ == "__main__":
    main()

