#!/usr/bin/env python3
"""Simple watcher for JSONL event inbox.

Behavior:
- Reads inbox/events.jsonl and processes any event whose event_id has not yet been processed.
- Appends Agent responses to outbox/results.jsonl as JSON lines.

This script is safe to run in the sandbox only. It does not use network or external packages.
"""
import os, json, time, sys
import shutil
from pathlib import Path

BASE = Path(__file__).resolve().parents[1]
INBOX = BASE / 'inbox' / 'events.jsonl'
OUTBOX = BASE / 'outbox' / 'results.jsonl'
CONFIG = BASE / 'config' / 'pipeline_config.json'
# retry and dead letter config
MAX_AGENT_RETRIES = int(os.environ.get('MAX_AGENT_RETRIES', '2'))
RETRIES_PATH = BASE / 'outbox' / 'retries.json'
DEAD_LETTER = BASE / 'outbox' / 'dead_letter.jsonl'
LOCK_DIR = BASE / 'tmp'
LOCK_PATH = LOCK_DIR / 'watcher.lock'

def load_processed_ids():
    """Return set of event_ids that have already been successfully processed (dashboard_updated==True).
    This allows retrying events that previously errored.
    """
    ids = set()
    if OUTBOX.exists():
        with OUTBOX.open(encoding='utf-8') as f:
            for line in f:
                line=line.strip()
                if not line: continue
                try:
                    obj=json.loads(line)
                    if isinstance(obj, dict):
                        # Treat fully successful processed events as done
                        if obj.get('status')=='success' and obj.get('dashboard_updated'):
                            if 'event_id' in obj:
                                ids.add(str(obj.get('event_id')))
                        # Also treat moved-to-dead-letter errors as terminal to avoid endless retries
                        elif obj.get('status')=='error' and obj.get('error_reason')=='moved_to_dead_letter':
                            if 'event_id' in obj:
                                ids.add(str(obj.get('event_id')))
                except Exception:
                    continue
    return ids

def _load_retries():
    try:
        if RETRIES_PATH.exists():
            return json.loads(RETRIES_PATH.read_text(encoding='utf-8'))
    except Exception:
        pass
    return {}

def _save_retries(d):
    try:
        RETRIES_PATH.parent.mkdir(parents=True, exist_ok=True)
        RETRIES_PATH.write_text(json.dumps(d, ensure_ascii=False), encoding='utf-8')
    except Exception:
        pass

def _append_dead_letter(evt, reason=None):
    try:
        DEAD_LETTER.parent.mkdir(parents=True, exist_ok=True)
        entry = {'event': evt, 'reason': reason, 'timestamp': time.time()}
        with open(DEAD_LETTER, 'a', encoding='utf-8') as f:
            f.write(json.dumps(entry, ensure_ascii=False) + '\n')
    except Exception:
        pass


def append_result(res):
    OUTBOX.parent.mkdir(parents=True, exist_ok=True)
    # idempotency: skip if a final dashboard_updated=true result already exists for this event_id
    try:
        eid = str(res.get('event_id'))
        if OUTBOX.exists():
            with OUTBOX.open(encoding='utf-8') as f:
                for line in f:
                    if not line.strip():
                        continue
                    try:
                        o = json.loads(line)
                    except Exception:
                        continue
                    if str(o.get('event_id')) == eid and o.get('dashboard_updated'):
                        # already have a final result, skip append
                        print('Skipping duplicate outbox result for', eid)
                        return
    except Exception:
        # if any error during check, fall back to normal append
        pass

    # handle retry counting for errors
    try:
        if res.get('status')=='error' and res.get('error_reason'):
            retries = _load_retries()
            eid = str(res.get('event_id'))
            count = int(retries.get(eid, 0)) + 1
            retries[eid] = count
            if count > MAX_AGENT_RETRIES:
                # move to dead letter and mark as final error
                _append_dead_letter({'event_id': eid}, res.get('error_reason'))
                res['error_reason'] = 'moved_to_dead_letter'
                # prevent future processing by writing a controlled error result
                _save_retries(retries)
            else:
                _save_retries(retries)
    except Exception:
        pass

    # validate JSON safety before appending (ensure user_message_he and other fields are JSON-safe)
    try:
        txt = json.dumps(res, ensure_ascii=False)
        # sanity-check by parsing back
        _ = json.loads(txt)
    except Exception as e:
        # fallback: write a controlled error entry instead
        err = {'status':'error','dashboard_updated':False,'event_id': res.get('event_id'),'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he':'','error_reason': f'outbox_serialization_failed: {str(e)}'}
        txt = json.dumps(err, ensure_ascii=False)
    with OUTBOX.open('a', encoding='utf-8') as f:
        f.write(txt + '\n')

def _strip_ansi(s):
    import re
    return re.sub(r'\x1B[@-_][0-?]*[ -/]*[@-~]', '', s or '')


def extract_agent_json(stdout, stderr):
    """Robustly extract a JSON object from stdout or stderr.
    Returns a dict or None.
    """
    decoder = json.JSONDecoder()
    s = _strip_ansi(stdout or '')
    # try direct parse
    try:
        obj = json.loads(s)
        # handle wrapper with payloads
        if isinstance(obj, dict) and 'payloads' in obj:
            payloads = obj.get('payloads') or []
            p0 = payloads[0] if payloads else None
            if isinstance(p0, dict) and p0.get('text'):
                try:
                    return json.loads(p0.get('text'))
                except Exception:
                    pass
        return obj
    except Exception:
        pass

    candidates = []
    # scan stdout for JSON objects
    idx = 0
    while idx < len(s):
        try:
            obj, off = decoder.raw_decode(s[idx:])
            candidates.append(obj)
            idx += off
        except Exception:
            break
    # scan stderr too
    t = _strip_ansi(stderr or '')
    idx = 0
    while idx < len(t):
        try:
            obj, off = decoder.raw_decode(t[idx:])
            candidates.append(obj)
            idx += off
        except Exception:
            break
    # prefer a payload-wrapped candidate
    for c in candidates:
        if isinstance(c, dict):
            if 'payloads' in c:
                p0 = (c.get('payloads') or [None])[0]
                if isinstance(p0, dict) and p0.get('text'):
                    try:
                        aj = json.loads(p0.get('text'))
                        if isinstance(aj, dict):
                            return aj
                    except Exception:
                        pass
    # prefer direct agent-shaped dict
    for c in candidates:
        if isinstance(c, dict) and 'status' in c and ('items' in c or 'totals' in c):
            return c
    # fallback: first dict with items+totals
    for c in candidates:
        if isinstance(c, dict) and 'items' in c and 'totals' in c:
            return c
    return None


def process_all_once():
    if not INBOX.exists():
        print('No inbox file:', INBOX)
        return
    processed = load_processed_ids()
    to_process = []
    with INBOX.open(encoding='utf-8') as f:
        for line in f:
            line=line.strip()
            if not line: continue
            try:
                evt=json.loads(line)
            except Exception as e:
                print('invalid json line, skipping')
                continue
            eid=str(evt.get('event_id'))
            if eid in processed:
                # already processed
                continue
            to_process.append(evt)

    if not to_process:
        print('No new events to process')
        return

    # load config if present
    cfg = {}
    try:
        if CONFIG.exists():
            cfg = json.loads(CONFIG.read_text(encoding='utf-8'))
    except Exception:
        cfg = {}

    # import processing helpers for deterministic CSV writes
    scripts_path = str((BASE / 'scripts').resolve())
    if scripts_path not in sys.path:
        sys.path.insert(0, scripts_path)
    try:
        from process_food_event import append_log, update_daily_summary_for
    except Exception:
        from scripts.process_food_event import append_log, update_daily_summary_for

    parser_mode = cfg.get('parser', {}).get('parser_mode', 'stub')

    for evt in to_process:
        # normalize fields: some senders use 'user_text'
        if 'raw_text' not in evt and 'user_text' in evt:
            evt['raw_text'] = evt.get('user_text')
        eid = evt.get('event_id')
        print('Processing event', eid, 'mode=', parser_mode)

        # openclaw_agent mode: dispatch to agent CLI and validate structured JSON response
        if parser_mode == 'openclaw_agent':
            # build DRY_RUN_FOOD_PARSE_TEST wrapper message
            event_json = json.dumps(evt, ensure_ascii=False)
            wrapper = (
                'DRY_RUN_FOOD_PARSE_TEST\n'
                'Instructions:\n'
                '- Parse the food event below.\n'
                '- Return strict JSON only.\n'
                '- Do not write files.\n'
                '- Do not update CSV.\n'
                '- Do not update dashboard.\n'
                '- Do not send Telegram.\n'
                '- Do not provide medical advice.\n'
                '- Nutrition values must be for the exact input quantity.\n'
                "- nutrition_values_for_input_quantity must be present and true for every item.\n"
                "- If parsing succeeds, status must be \"success\".\n"
                "- dashboard_updated must be false.\n"
                "- error_reason must be null.\n"
                "- If parsing fails, status must be \"error\".\n"
                "- Do not include markdown or explanations.\n\n"
                "Required JSON response schema:\n"
                "{\n  \"status\": \"success\",\n  \"dashboard_updated\": false,\n  \"event_id\": \"<event_id>\",\n  \"items\": [\n    {\n      \"food_name\": \"...\",\n      \"quantity\": 0,\n      \"unit\": \"...\",\n      \"calories\": 0,\n      \"protein_g\": 0,\n      \"carbs_g\": 0,\n      \"fat_g\": 0,\n      \"nutrition_values_for_input_quantity\": true,\n      \"confidence\": \"high/medium/low\"\n    }\n  ],\n  \"totals\": {\n    \"calories\": 0,\n    \"protein_g\": 0\n  },\n  \"user_message_he\": \"...\",\n  \"error_reason\": null\n}\n\n"
            )
            message = wrapper + '\nFood event:\n' + event_json
            # resolve binaries respecting service Environment or PATH
            TIMEOUT_BIN = shutil.which('timeout') or '/usr/bin/timeout'
            OPENCLAW_BIN = os.environ.get('OPENCLAW_BIN') or shutil.which('openclaw')
            if not OPENCLAW_BIN:
                res = {'status':'error','dashboard_updated':False,'event_id':eid,'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he':'','error_reason':'openclaw_binary_not_found'}
                append_result(res)
                print('Wrote result for', eid)
                continue
            cmd = [TIMEOUT_BIN, '45s', OPENCLAW_BIN, 'agent', '--agent', 'main', '--message', message, '--json', '--timeout', '30']
            try:
                import subprocess
                proc = subprocess.run(cmd, capture_output=True, text=True)
                stdout = proc.stdout.strip()
                stderr = proc.stderr.strip()
            except Exception as e:
                res = {'status':'error','dashboard_updated':False,'event_id':eid,'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he':'','error_reason':f'agent_call_failed: {str(e)}'}
                append_result(res)
                print('Wrote result for', eid)
                continue

            # robustly extract agent JSON from stdout/stderr
            try:
                agent_json = extract_agent_json(stdout, stderr)
            except Exception as e:
                agent_json = None
            if not agent_json:
                # record controlled error, include short samples in log
                sample_out = (stdout or '')[:1000]
                sample_err = (stderr or '')[:1000]
                res = {'status':'error','dashboard_updated':False,'event_id':eid,'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he':'','error_reason':'agent_json_not_found'}
                append_result(res)
                # log samples for debugging
                print('Agent JSON not found for', eid, 'stdout_sample=', sample_out.replace('\n',' '), 'stderr_sample=', sample_err.replace('\n',' '))
                continue

            # If agent did not return success, check for recoverable inconsistency and normalize
            normalized_note = None
            if agent_json.get('status') != 'success' or not agent_json.get('dashboard_updated'):
                # check recoverable conditions
                items = agent_json.get('items')
                totals = agent_json.get('totals')
                err = agent_json.get('error_reason')
                recoverable = False
                if err is None and isinstance(items, list) and items and isinstance(totals, dict):
                    ok = True
                    for it in items:
                        if not it.get('nutrition_values_for_input_quantity'):
                            ok = False; break
                        # numeric checks
                        try:
                            _ = float(it.get('calories') or 0)
                            _ = float(it.get('protein_g') or 0)
                        except Exception:
                            ok = False; break
                    if ok:
                        recoverable = True
                if recoverable:
                    # normalize
                    agent_json['status'] = 'success'
                    agent_json['dashboard_updated'] = True
                    normalized_note = 'normalized_agent_status_error_to_success'
                else:
                    # forward as controlled error (no CSV writes)
                    res = {'status':'error','dashboard_updated':False,'event_id':eid,'items':agent_json.get('items',[]),'totals':agent_json.get('totals',{'calories':0,'protein_g':0}),'user_message_he': agent_json.get('user_message_he',''),'error_reason': agent_json.get('error_reason')}
                    append_result(res)
                    print('Wrote result for', eid)
                    continue

            # validate required schema
            ok = True
            err_reason = None
            if not isinstance(agent_json, dict) or 'status' not in agent_json or 'event_id' not in agent_json:
                ok = False
                err_reason = 'agent_schema_top_level_missing'
            if ok and agent_json.get('status') == 'success':
                items = agent_json.get('items')
                if not isinstance(items, list) or not items:
                    ok = False
                    err_reason = 'agent_no_items'
                else:
                    for it in items:
                        if not all(k in it for k in ('food_name','quantity','unit','calories','protein_g','nutrition_values_for_input_quantity')):
                            ok = False
                            err_reason = 'agent_item_missing_fields'
                            break
                        if not it.get('nutrition_values_for_input_quantity'):
                            ok = False
                            err_reason = 'nutrition_values_for_input_quantity_required'
                            break
                        # sanity limits
                        try:
                            if int(it.get('calories') or 0) > 3000 or int(it.get('protein_g') or 0) > 250:
                                ok = False
                                err_reason = 'nutrition_values_unrealistic'
                                break
                        except Exception:
                            ok = False
                            err_reason = 'nutrition_values_type_error'
                            break

            if not ok:
                # do not write CSV; write controlled error
                res = {'status':'error','dashboard_updated':False,'event_id':eid,'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he': agent_json.get('user_message_he') if isinstance(agent_json, dict) else '', 'error_reason': err_reason}
                append_result(res)
                print('Wrote result for', eid)
                continue

            # deterministic CSV write using append_log and daily summary updater
            items = agent_json.get('items')
            total_cal = 0
            total_prot = 0
            for idx,it in enumerate(items):
                row_id = f"{eid}-{idx}"
                date = ''
                time_s = ''
                try:
                    ts = evt.get('timestamp')
                    if ts:
                        from datetime import datetime
                        dt = datetime.fromisoformat(ts)
                        date = dt.date().isoformat()
                        time_s = dt.time().isoformat(timespec='seconds')
                    else:
                        from datetime import datetime
                        date = datetime.utcnow().date().isoformat()
                        time_s = datetime.utcnow().time().isoformat(timespec='seconds')
                except Exception:
                    date = evt.get('date') or ''
                    time_s = evt.get('time') or ''
                qty = it.get('quantity')
                unit = it.get('unit')
                cal = int(it.get('calories') or 0)
                prot = float(it.get('protein_g') or 0)
                carbs = it.get('carbs_g') if 'carbs_g' in it else ''
                fat = it.get('fat_g') if 'fat_g' in it else ''
                source = evt.get('source') or evt.get('source_bot') or ''
                raw = evt.get('raw_text') or ''
                row = [eid,row_id,date,time_s,source,raw,'',it.get('food_name'),qty,unit,cal,prot,str(carbs),str(fat),it.get('confidence','high'),'processed','']
                append_log(row)
                total_cal += cal
                total_prot += prot
            # update daily summary
            update_daily_summary_for(date)
            # prepare outbox success
            res = {'status':'success','dashboard_updated':True,'event_id':eid,'items':items,'totals':{'calories':total_cal,'protein_g':total_prot},'user_message_he': agent_json.get('user_message_he'), 'error_reason': None}
            append_result(res)
            print('Wrote result for', eid)
            continue

        # fallback: use local processing function
        try:
            from process_food_event import process_event
        except Exception:
            from scripts.process_food_event import process_event

        try:
            res = process_event(evt)
        except Exception as e:
            res = {'status':'error','dashboard_updated':False,'event_id':evt.get('event_id'),'items':[],'totals':{'calories':0,'protein_g':0},'user_message_he':'','error_reason':str(e)}
        # ensure event_id in response
        res['event_id'] = evt.get('event_id')
        append_result(res)
        print('Wrote result for', evt.get('event_id'))

if __name__=='__main__':
    # ensure single-run via lock file
    LOCK_DIR = Path(__file__).resolve().parents[1] / 'tmp'
    LOCK_DIR.mkdir(parents=True, exist_ok=True)
    LOCK = LOCK_DIR / 'watcher.lock'
    pid = str(os.getpid())
    try:
        # try to create lock atomically
        fd = os.open(str(LOCK), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
        os.write(fd, pid.encode())
        os.close(fd)
    except FileExistsError:
        # already locked; exit
        print('Another watcher run is active; exiting')
        raise SystemExit(0)
    try:
        process_all_once()
    finally:
        try:
            if LOCK.exists():
                LOCK.unlink()
        except Exception:
            pass