MP-Manager/scripts/audit_collision_submissions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""audit_collision_submissions.py

Profundiza la auditoria de los 4 casos de colision de telefono cruzando con
los FORM SUBMISSIONS (read-only). Como solo la cuenta de Marca tiene el
formulario web, mapeamos:
  - Si el contacto (sucursal o Marca) tiene un submission en Marca.
  - Que SUCURSAL eligio el lead en el formulario (campo sucursal_value).
  - El `source` de cada contacto (de donde vino: formulario, manual, n8n...).
  - Submissions que comparten el telefono del caso.

Todo en paralelo. No escribe nada.
"""

import json
import os
import sys
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT_DIR not in sys.path:
    sys.path.insert(0, ROOT_DIR)
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
if SCRIPTS_DIR not in sys.path:
    sys.path.insert(0, SCRIPTS_DIR)

import requests
import sync_engine
from common import normalize_phone

BASE_URL = "https://services.leadconnectorhq.com"
BRAND_LOC = "GbKkBpCmKu2QmloKFHy3"

_TOKENS_MAP = None


def _load_token(loc):
    """Resuelve el token PIT de una location desde el CSV de la mesa de control.

    Los tokens son secretos: nunca se hardcodean ni se persisten. Se cargan
    igual que el resto del repo, vía `sync_engine.get_tokens_map()` (que lee el
    CSV `Bucéfalo - Mesa de control - API Tokens - MP.csv`). El mapa se cachea
    en memoria para no releer el CSV en cada llamada.
    """
    global _TOKENS_MAP
    if _TOKENS_MAP is None:
        _TOKENS_MAP = sync_engine.get_tokens_map()
    token = _TOKENS_MAP.get(loc)
    if not token:
        raise RuntimeError(f"sin token {loc}")
    return token


def _h(token):
    return {"Authorization": f"Bearer {token}", "Version": "2021-07-28", "Accept": "application/json"}


def fetch_all_submissions(loc):
    token = _load_token(loc)
    out, page = [], 1
    while True:
        r = requests.get(f"{BASE_URL}/forms/submissions", headers=_h(token),
                         params={"locationId": loc, "limit": 100, "page": page}, timeout=30)
        if r.status_code != 200:
            break
        d = r.json()
        subs = d.get("submissions", []) or []
        out.extend(subs)
        meta = d.get("meta", {})
        if not subs or not meta.get("nextPage"):
            break
        page += 1
        if page > 50:
            break
    return out


def get_contact(loc, cid):
    token = _load_token(loc)
    r = requests.get(f"{BASE_URL}/contacts/{cid}", headers=_h(token), timeout=30)
    if r.status_code != 200:
        return {}
    return r.json().get("contact", {})


def sucursal_from_submission(sub):
    """El campo de sucursal en `others` no tiene key fija; buscamos heuristicamente
    un valor que parezca 'Ciudad, Estado'. Tambien probamos claves comunes."""
    others = sub.get("others", {}) or {}
    # valores tipicos: listas o strings con ', '
    candidates = []
    for k, v in others.items():
        val = v[0] if isinstance(v, list) and v else v
        if isinstance(val, str) and "," in val and len(val) < 60:
            candidates.append(val)
    return candidates[0] if candidates else None


def main():
    clean = os.path.join(ROOT_DIR, "generated", "reports", "missing_opps_clean.json")
    data = json.load(open(clean, encoding="utf-8"))
    cases = []
    for it in data["items"]:
        coll = next((a for a in it.get("actions", []) if a.get("action") == "phone_collision_unresolved"), None)
        if not coll:
            continue
        bc = it.get("branch_contact", {})
        cases.append({
            "label": (it.get("opp_name") or "")[:18],
            "branch_loc": it.get("branch_location_id"),
            "branch_cid": bc.get("id"),
            "brand_cid": coll.get("colliding_brand_contact_id"),
            "phone": bc.get("phone"),
        })

    # En paralelo: submissions de Marca + detalle de los 8 contactos
    print("Trayendo submissions de Marca + contactos (en paralelo)...\n")
    with ThreadPoolExecutor(max_workers=10) as ex:
        f_subs = ex.submit(fetch_all_submissions, BRAND_LOC)
        contact_futs = {}
        for c in cases:
            contact_futs[("branch", c["label"])] = ex.submit(get_contact, c["branch_loc"], c["branch_cid"])
            contact_futs[("brand", c["label"])] = ex.submit(get_contact, BRAND_LOC, c["brand_cid"])
        brand_subs = f_subs.result()
        contacts = {k: f.result() for k, f in contact_futs.items()}

    # Indexar submissions por contactId y por phone
    sub_by_cid = defaultdict(list)
    sub_by_phone = defaultdict(list)
    for s in brand_subs:
        if s.get("contactId"):
            sub_by_cid[s["contactId"]].append(s)
        np = normalize_phone(s.get("phone"))
        if np:
            sub_by_phone[np].append(s)

    print(f"Submissions de Marca cargados: {len(brand_subs)}\n")

    report = []
    for c in cases:
        np = normalize_phone(c["phone"])
        bc = contacts[("branch", c["label"])]
        mc = contacts[("brand", c["label"])]
        entry = {
            "label": c["label"], "phone": c["phone"],
            "branch": {
                "id": c["branch_cid"], "loc": c["branch_loc"],
                "name": f"{bc.get('firstName') or ''} {bc.get('lastName') or ''}".strip(),
                "source": bc.get("source"), "tags": bc.get("tags"),
                "has_submission_in_marca": bool(sub_by_cid.get(c["branch_cid"])),
            },
            "brand_collider": {
                "id": c["brand_cid"],
                "name": f"{mc.get('firstName') or ''} {mc.get('lastName') or ''}".strip(),
                "source": mc.get("source"), "tags": mc.get("tags"),
                "submissions": [],
            },
            "submissions_sharing_phone": [],
        }
        for s in sub_by_cid.get(c["brand_cid"], []):
            entry["brand_collider"]["submissions"].append({
                "name": s.get("name"), "formId": s.get("formId"),
                "sucursal_elegida": sucursal_from_submission(s),
                "createdAt": s.get("createdAt"), "email": s.get("email"), "phone": s.get("phone"),
            })
        for s in sub_by_phone.get(np, []):
            entry["submissions_sharing_phone"].append({
                "contactId": s.get("contactId"), "name": s.get("name"),
                "sucursal_elegida": sucursal_from_submission(s),
                "createdAt": s.get("createdAt"),
            })
        report.append(entry)

    for e in report:
        print("=" * 80)
        print(f"CASO {e['label']!r}  tel {e['phone']}")
        b = e["branch"]; m = e["brand_collider"]
        print(f"  SUCURSAL : {b['name']!r}  source={b['source']!r}  submission_en_marca={b['has_submission_in_marca']}")
        print(f"  MARCA(col): {m['name']!r}  source={m['source']!r}")
        if m["submissions"]:
            for s in m["submissions"]:
                print(f"    submission Marca: name={s['name']!r} sucursal_elegida={s['sucursal_elegida']!r} created={s['createdAt']}")
        else:
            print(f"    (sin submission en Marca para el colider)")
        print(f"  submissions que comparten el telefono ({len(e['submissions_sharing_phone'])}):")
        for s in e["submissions_sharing_phone"]:
            print(f"    cid={s['contactId']} name={s['name']!r} sucursal_elegida={s['sucursal_elegida']!r} created={s['createdAt']}")
        print()

    out = os.path.join(ROOT_DIR, "generated", "reports", "collision_submissions_audit.json")
    with open(out, "w", encoding="utf-8") as f:
        json.dump(report, f, ensure_ascii=False, indent=2)
    print(f"Reporte: {out}")


if __name__ == "__main__":
    main()