#!/usr/bin/env python3
"""Process a single food event JSON and update CSVs.
Usage: import and call process_event(event_dict)
"""
import csv, json, re, os
from datetime import datetime

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'data')
LOG_CSV = os.path.join(DATA_DIR, 'dashboard_food_log.csv')
DAILY_CSV = os.path.join(DATA_DIR, 'dashboard_daily_summary.csv')

FOOD_DB = {
    'ביצה': {'cal':90,'protein':7,'carbs':0.5,'fat':6,'unit':'unit'},
    'בננה': {'cal':105,'protein':1.3,'carbs':27,'fat':0.3,'unit':'unit'},
    'יוגורט': {'cal':130,'protein':10,'carbs':12,'fat':3,'unit':'cup'},
    'אורז': {'cal':200,'protein':4,'carbs':45,'fat':0.5,'unit':'cup'},
    'עוף': {'cal':250,'protein':30,'carbs':0,'fat':8,'unit':'serving'},
    'קפה עם חלב': {'cal':50,'protein':2,'carbs':5,'fat':2,'unit':'cup'},
}

def detect_items(raw_text):
    # Improved detection: match keywords including simple plural/inflection
    # and numbers before/after the food name.
    items=[]
    text = raw_text
    for name in FOOD_DB.keys():
        found=False
        # direct match
        if name in text:
            idx = text.find(name)
            found=True
        else:
            # try matching by root (first 3 non-space chars) to catch plurals
            root = ''.join(name.split())[:3]
            if root and re.search(re.escape(root)+r"\w*", text):
                mpos = re.search(re.escape(root)+r"\w*", text)
                idx = mpos.start()
                found=True
        if not found:
            continue
        # look for a number near the match (left or right within 8 chars)
        qty=1
        unit='unit'
        # search left
        left = text[max(0, idx-8):idx]
        m = re.search(r"(\d+)\s*(?:גרם|g)?", left)
        if m:
            qty=int(m.group(1))
        else:
            # search right
            right = text[idx:idx+12]
            m2 = re.search(r"(\d+)\s*(?:גרם|g)?", right)
            if m2:
                qty=int(m2.group(1))
        items.append({'food_name':name,'quantity':qty,'unit':unit, 'db':FOOD_DB[name]})
    return items


def gpt_parse_stub(raw_text):
    """Stub that simulates GPT-5-mini structured output for known test phrases.
    Returns dict matching the required GPT structured output format.
    """
    rt = raw_text.strip()
    # normalize
    l = rt.replace('\u200f','')
    # Non-food
    if rt in ('תעדכן לי את הדשבורד',''):
        return {"is_food_log": False, "items": [], "needs_clarification": False, "clarification_question_he": None, "reason": "not_food_event"}
    # chicken 100g
    if 'חזה עוף 100' in l or 'חזה עוף 100' in rt or l=='חזה עוף 100 גרם' or '100 גרם עוף' in l:
        return {"is_food_log": True, "items":[{"food_name_he":"חזה עוף","canonical_food_name":"chicken_breast","quantity":100,"unit":"g","estimated_calories":165,"estimated_protein_g":31,"estimated_carbs_g":0,"estimated_fat_g":3.6,"nutrition_values_for_input_quantity": True,"confidence":"high","assumptions":"100g cooked chicken breast"}], "needs_clarification": False, "clarification_question_he": None}
    # 150g chicken
    if '150' in l and ('עוף' in l or 'חזה' in l):
        return {"is_food_log": True, "items":[{"food_name_he":"חזה עוף","canonical_food_name":"chicken_breast","quantity":150,"unit":"g","estimated_calories":248,"estimated_protein_g":46,"estimated_carbs_g":0,"estimated_fat_g":5.4,"nutrition_values_for_input_quantity": True,"confidence":"high","assumptions":"150g cooked chicken breast"}], "needs_clarification": False, "clarification_question_he": None}
    # 2 eggs
    if '2 ביצים' in l or l=='2 ביצים' or 'שתי ביצים' in l or '2 ביצים' in rt:
        return {"is_food_log": True, "items":[{"food_name_he":"ביצה","canonical_food_name":"egg","quantity":2,"unit":"unit","estimated_calories":90*2,"estimated_protein_g":7*2,"estimated_carbs_g":0.5*2,"estimated_fat_g":6*2,"nutrition_values_for_input_quantity": True,"confidence":"high","assumptions":"2 typical eggs"}], "needs_clarification": False, "clarification_question_he": None}
    # yogurt + banana
    if 'יוגורט' in l and 'בננה' in l:
        return {"is_food_log": True, "items":[
            {"food_name_he":"יוגורט","canonical_food_name":"yogurt","quantity":1,"unit":"cup","estimated_calories":130,"estimated_protein_g":10,"estimated_carbs_g":12,"estimated_fat_g":3,"nutrition_values_for_input_quantity": True,"confidence":"high","assumptions":"1 cup yogurt"},
            {"food_name_he":"בננה","canonical_food_name":"banana","quantity":1,"unit":"unit","estimated_calories":105,"estimated_protein_g":1.3,"estimated_carbs_g":27,"estimated_fat_g":0.3,"nutrition_values_for_input_quantity": True,"confidence":"high","assumptions":"1 medium banana"}
        ], "needs_clarification": False, "clarification_question_he": None}
    # fallback: try naive detect
    items = detect_items(raw_text)
    if items:
        out_items=[]
        for it in items:
            out_items.append({
                'food_name_he': it['food_name'], 'canonical_food_name': it['food_name'], 'quantity': it['quantity'], 'unit': it['unit'],
                'estimated_calories': it['db']['cal']*it['quantity'], 'estimated_protein_g': it['db']['protein']*it['quantity'], 'estimated_carbs_g': it['db'].get('carbs',''), 'estimated_fat_g': it['db'].get('fat',''), 'nutrition_values_for_input_quantity': False, 'confidence':'low', 'assumptions':'fallback db'
            })
        return {'is_food_log': True, 'items': out_items, 'needs_clarification': False, 'clarification_question_he': None}
    return {"is_food_log": False, "items": [], "needs_clarification": False, "clarification_question_he": None, "reason": "not_food_event"}


def parse_food_with_gpt(raw_text, event_context=None, cfg=None):
    """Adapter placeholder for real GPT call. Will be implemented to call GPT-5 mini.

    For now, this function is disabled and will raise if use_live_gpt is True.
    When enabled, it should call the OpenAI API, parse JSON, and return the structured dict.
    """
    # This function is the adapter entrypoint for a real GPT-5 mini call.
    # It is intentionally non‑operational in the sandbox until you enable live mode.
    parser_cfg = cfg or {}
    use_live = parser_cfg.get('use_live_gpt') if isinstance(parser_cfg, dict) else False
    mode = parser_cfg.get('parser_mode') if isinstance(parser_cfg, dict) else 'stub'
    # If live mode not requested, return controlled error
    if not use_live or mode != 'live':
        return {'error':'live_gpt_disabled'}

    # live mode requested: check API key in environment
    api_key = os.environ.get('OPENAI_API_KEY')
    if not api_key:
        # controlled error: missing API key
        return {'error': 'missing_api_key'}
    # At this point live mode is requested and key is present.
    # Check that the OpenAI client package is available.
    try:
        import importlib
        openai_spec = importlib.util.find_spec('openai')
        if openai_spec is None:
            return {'error': 'openai_package_missing'}
        openai = importlib.import_module('openai')
    except Exception:
        return {'error': 'openai_package_missing'}

    # Safety gate: require explicit allow flag to actually perform network calls.
    # This prevents accidental live calls during development/testing.
    allow_call = False
    if isinstance(parser_cfg, dict):
        allow_call = parser_cfg.get('allow_live_call', False)
    if not allow_call and os.environ.get('ALLOW_LIVE_GPT_CALL')!='1':
        return {'error':'live_gpt_disabled'}

    # If we reach here, live calls are permitted. Implement the live call with retries.
    # Build system prompt and user prompt
    system_prompt = (
        "You are a parser that extracts structured JSON from Hebrew free-text food logs. "
        "Return ONLY valid JSON following the schema. Do not provide extra text. "
        "Do not give diet or medical advice."
    )
    user_prompt = f"Parse this food log into the required JSON schema. Raw text: {raw_text}\nEvent context: {json.dumps(event_context or {})}"

    max_retries = 2
    last_error = None
    for attempt in range(max_retries+1):
        try:
            # Attempt call using common OpenAI client interfaces. Adjust per installed client.
            # Prefer the newer `openai.responses.create` if available.
            resp_text = None
            try:
                # OpenAI v1+ (responses API)
                if hasattr(openai, 'responses'):
                    response = openai.responses.create(
                        model=parser_cfg.get('model','gpt-5-mini'),
                        input= system_prompt + "\n" + user_prompt,
                        max_output_tokens=parser_cfg.get('max_output_tokens',1024)
                    )
                    # extract text safely
                    if hasattr(response, 'output'):
                        # newer SDK shapes
                        if isinstance(response.output, (list,tuple)) and response.output:
                            resp_text = response.output[0].content[0].text if hasattr(response.output[0],'content') else None
                        else:
                            resp_text = getattr(response, 'output_text', None)
                    else:
                        resp_text = getattr(response, 'output_text', None)
                else:
                    # Fallback to ChatCompletion for older SDKs
                    if hasattr(openai, 'ChatCompletion'):
                        response = openai.ChatCompletion.create(
                            model=parser_cfg.get('model','gpt-5-mini'),
                            messages=[{'role':'system','content':system_prompt},{'role':'user','content':user_prompt}],
                            max_tokens=parser_cfg.get('max_output_tokens',1024)
                        )
                        # extract content
                        choices = response.get('choices') if isinstance(response, dict) else getattr(response,'choices',None)
                        if choices:
                            first = choices[0]
                            resp_text = first.get('message',{}).get('content') if isinstance(first, dict) else getattr(first,'message',None).get('content')
            except Exception as e:
                last_error = str(e)
                resp_text = None

            if not resp_text:
                last_error = last_error or 'no_response_text'
                raise ValueError('no_response_text')

            # parse JSON output
            parsed = json.loads(resp_text)

            # validate schema
            if not isinstance(parsed, dict) or 'is_food_log' not in parsed:
                last_error = 'schema_missing_is_food_log'
                raise ValueError(last_error)
            # additional validation will be handled by caller. Return parsed structure.
            return parsed

        except Exception as e:
            last_error = last_error or str(e)
            # on certain errors, retry
            if attempt < max_retries:
                time.sleep(0.5)
                continue
            # retries exhausted
            if parser_cfg and parser_cfg.get('fallback_to_stub_on_gpt_error'):
                return gpt_parse_stub(raw_text)
            return {'error':'gpt_invalid_schema'}


def parse_food_with_stub(raw_text, event_context=None, cfg=None):
    return gpt_parse_stub(raw_text)


def parse_food_event(raw_text, event_context=None, cfg=None):
    """Unified parser entrypoint. Chooses live GPT or stub based on cfg.
    Returns the GPT-structured dict or raises on critical errors.
    """
    if cfg is None:
        # try load config
        try:
            import json as _json, os as _os
            cfg_path = _os.path.join(BASE_DIR, 'config', 'pipeline_config.json')
            if _os.path.exists(cfg_path):
                cfg = _json.load(open(cfg_path, encoding='utf-8'))
            else:
                cfg = {'parser': {'use_live_gpt': False, 'parser_mode': 'stub'}}
        except Exception:
            cfg = {'parser': {'use_live_gpt': False, 'parser_mode': 'stub'}}
    parser_cfg = cfg.get('parser') if isinstance(cfg, dict) else None
    use_live = parser_cfg.get('use_live_gpt') if parser_cfg else False
    mode = parser_cfg.get('parser_mode') if parser_cfg else 'stub'
    if use_live:
        # attempt live parse (will raise in sandbox until implemented)
        return parse_food_with_gpt(raw_text, event_context=event_context, cfg=parser_cfg)
    # select parser mode
    if mode == 'stub' or not use_live:
        return parse_food_with_stub(raw_text, event_context=event_context, cfg=parser_cfg)
    # default fallback
    return parse_food_with_stub(raw_text, event_context=event_context, cfg=parser_cfg)

def append_log(row):
    # idempotent append: skip if row_id already present
    row_id = row[1]
    if os.path.exists(LOG_CSV):
        try:
            with open(LOG_CSV, encoding='utf-8') as f:
                for r in csv.DictReader(f):
                    if r.get('row_id') == row_id:
                        # already exists, skip
                        return
        except Exception:
            # on read error, fall back to append to avoid data loss
            pass
    exists = os.path.exists(LOG_CSV)
    with open(LOG_CSV,'a',newline='',encoding='utf-8') as f:
        writer=csv.writer(f)
        if not exists:
            writer.writerow(['event_id','row_id','date','time','source','raw_text','meal_type','food_name','quantity','unit','calories','protein_g','carbs_g','fat_g','confidence','processing_status','notes'])
        writer.writerow(row)

def update_daily_summary_for(date_str):
    # read log and sum by date
    rows=[]
    with open(LOG_CSV,encoding='utf-8') as f:
        reader=csv.DictReader(f)
        for r in reader:
            if r['date']==date_str:
                rows.append(r)
    total_cal = sum(float(r['calories'] or 0) for r in rows)
    total_prot = sum(float(r['protein_g'] or 0) for r in rows)
    # read existing daily CSV
    daily=[]
    if os.path.exists(DAILY_CSV):
        with open(DAILY_CSV,encoding='utf-8') as f:
            reader=csv.DictReader(f)
            for r in reader: daily.append(r)
    # replace or append
    found=False
    for r in daily:
        if r['date']==date_str:
            r['cal']=str(int(total_cal))
            r['protein']=str(round(total_prot,1))
            r['last_updated']=datetime.utcnow().isoformat()
            found=True
    if not found:
        daily.append({'date':date_str,'cal':str(int(total_cal)),'protein':str(round(total_prot,1)),'steps':'0','burned':'0','creatine':'false','supplements':'[]','last_updated':datetime.utcnow().isoformat()})
    # write back
    with open(DAILY_CSV,'w',newline='',encoding='utf-8') as f:
        writer=csv.writer(f)
        writer.writerow(['date','cal','protein','steps','burned','creatine','supplements','last_updated'])
        for r in daily:
            writer.writerow([r.get('date',''),r.get('cal',''),r.get('protein',''),r.get('steps',''),r.get('burned',''),r.get('creatine',''),r.get('supplements',''),r.get('last_updated','')])

def process_event(event):
    # Validate
    if event.get('event_type')!='food_log':
        # return standardized error contract
        return {
            'status':'error', 'dashboard_updated':False, 'event_id': event.get('event_id'),
            'items': [], 'totals': {'calories':0,'protein_g':0},
            'user_message_he': 'המידע התקבל, אבל לא נשמר בדשבורד. סיבה: not_food_event.',
            'error_reason':'not_food_event'
        }
    raw = event.get('raw_text') or event.get('user_text','')
    # parse timestamp if present
    ts = event.get('timestamp')
    if ts:
        try:
            dt = datetime.fromisoformat(ts)
            date = dt.date().isoformat()
            time = dt.time().isoformat(timespec='seconds')
        except Exception:
            date = event.get('date') or datetime.utcnow().date().isoformat()
            time = event.get('time','')
    else:
        date = event.get('date') or datetime.utcnow().date().isoformat()
        time = event.get('time','')

    source = event.get('source') or event.get('source_bot') or ''
    # Prefer GPT parsing (stub or live adapter) for structured understanding
    gpt_out = parse_food_event(raw, event_context=event)
    if not gpt_out.get('is_food_log'):
        return {
            'status':'error', 'dashboard_updated':False, 'event_id': event.get('event_id'),
            'items': [], 'totals': {'calories':0,'protein_g':0},
            'user_message_he': 'המידע התקבל, אבל לא נשמר בדשבורד. סיבה: not_food_event.',
            'error_reason':'not_food_event'
        }
    if gpt_out.get('needs_clarification'):
        return {
            'status':'error', 'dashboard_updated':False, 'event_id': event.get('event_id'),
            'items': [], 'totals': {'calories':0,'protein_g':0},
            'user_message_he': gpt_out.get('clarification_question_he') or 'נדרשת הבהרה',
            'error_reason':'needs_clarification'
        }
    items = gpt_out.get('items', [])
    if not items:
        return {
            'status':'error', 'dashboard_updated':False, 'event_id': event.get('event_id'),
            'items': [], 'totals': {'calories':0,'protein_g':0},
            'user_message_he': 'המידע התקבל, אבל לא נמצא פריט אוכל בזיהוי.',
            'error_reason':'no_items_detected'
        }
    responses=[]
    total_cal=0
    total_prot=0
    for idx,it in enumerate(items):
        eid=event.get('event_id')
        row_id=f"{eid}-{idx}"
        # require nutrition_values_for_input_quantity
        if not it.get('nutrition_values_for_input_quantity'):
            return {
                'status':'error', 'dashboard_updated':False, 'event_id': eid,
                'items': [], 'totals': {'calories':0,'protein_g':0},
                'user_message_he': 'המידע התקבל, אבל לא נשמר בדשבורד. סיבה: nutrition_value_basis_unclear.',
                'error_reason':'nutrition_value_basis_unclear'
            }
        # use GPT estimates when provided
        cal = int(it.get('estimated_calories') or 0)
        prot = float(it.get('estimated_protein_g') or 0)
        carbs = it.get('estimated_carbs_g')
        fat = it.get('estimated_fat_g')
        qty = it.get('quantity') or 1
        unit = it.get('unit') or 'unit'
        # sanity checks
        if cal>3000 or prot>250:
            return {
                'status':'error', 'dashboard_updated':False, 'event_id': eid,
                'items': [], 'totals': {'calories':0,'protein_g':0},
                'user_message_he': f'המידע התקבל, אבל לא נשמר בדשבורד. סיבה: nutrition_values_unrealistic.',
                'error_reason':'nutrition_values_unrealistic'
            }
        row = [eid,row_id,date,time,source,raw,'',it.get('food_name_he') or it.get('canonical_food_name'),qty,unit,cal,prot,str(carbs),str(fat),it.get('confidence','high'),'processed','']
        append_log(row)
        responses.append({'food_name': it.get('food_name_he') or it.get('canonical_food_name'),'quantity':qty,'unit':unit,'calories':cal,'protein_g':prot,'carbs_g':carbs,'fat_g':fat,'confidence':it.get('confidence','high')})
        total_cal += cal
        total_prot += prot
    # update daily summary
    update_daily_summary_for(date)
    # prepare user message in Hebrew
    items_lines = []
    for it in responses:
        items_lines.append(f"{it['food_name']} - {it['calories']} קלוריות, {it['protein_g']} גרם חלבון")
    user_msg = 'המידע נשמר בדשבורד:\n' + '\n'.join(items_lines)
    return {
        'status':'success', 'dashboard_updated':True, 'event_id': event.get('event_id'),
        'items': responses,
        'totals': {'calories': total_cal, 'protein_g': total_prot},
        'user_message_he': user_msg,
        'error_reason': None
    }

if __name__=='__main__':
    import sys
    if len(sys.argv)<2:
        print('pass a JSON event file or JSON string')
        sys.exit(1)
    try:
        evt=json.loads(sys.argv[1])
    except Exception:
        with open(sys.argv[1],encoding='utf-8') as f: evt=json.load(f)
    res=process_event(evt)
    print(json.dumps(res,ensure_ascii=False,indent=2))
