#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ cleanup_puebla_qro_duplicates.py Limpieza puntual de duplicados cruzados entre las sucursales de Puebla y Queretaro detectados en la comparativa Marca vs Sucursales. Contexto (mayo 2026): El audit detecto 29 contactos en Marca que existen fisicamente tanto en Puebla como en Queretaro. Tras inspeccion, las opps de Puebla traen valores monetarios reales (~$1.7M en pipeline) mientras que las de Queretaro estan todas en $0 — son shells huecos. La conclusion del operador fue: las copias en Queretaro son residuales y deben eliminarse. Acciones (TODAS reversibles via audit log salvo los DELETE, que son destructivos pero quedan registrados con todos los datos previos): 1. En la sucursal de QUERETARO: - Elimina cada opp residual (las 29 en $0) - Elimina cada contacto residual (29) Las opps se borran ANTES que el contacto para que el audit log conserve el contact_id de la opp aunque GHL cascadee el delete del contacto. 2. En la cuenta de MARCA: - Para cada opp Marca cuya monetary_value o status no coincide con su contraparte en Puebla, hace PUT con los valores correctos. - No se elimina ni se crea ningun contacto en Marca: los 29 contactos Marca involucrados son unicos (no hay duplicado intra-Marca) y sus datos basicos (phone/email/nombre) ya son correctos. Reglas de seguridad: - Dry-run por default. Requiere --apply para escribir. - Recalcula el plan desde la DB en cada ejecucion (no consume el JSON externo) — asi nunca opera con datos stale. - Re-verifica cada operacion contra los datos vivos antes de aplicar. - Cada cambio se registra en script_audit con planned -> applied. - Soporta --only-contact para procesar uno a uno (debug/granular). Uso: python scripts/cleanup_puebla_qro_duplicates.py # dry-run python scripts/cleanup_puebla_qro_duplicates.py --apply --yes # apply real python scripts/cleanup_puebla_qro_duplicates.py --json # output JSON python scripts/cleanup_puebla_qro_duplicates.py --only-contact ID # un solo contacto """ import argparse import json import os import sqlite3 import sys import uuid from collections import defaultdict from datetime import datetime ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if ROOT_DIR not in sys.path: sys.path.insert(0, ROOT_DIR) from scripts.audit_brand_vs_branches_totals import ( # noqa: E402 load_accounts_filtered, load_contacts, load_opps, build_contact_index, find_match, BRAND_LOCATION_ID, DB_PATH, ) import sync_engine # noqa: E402 (get_tokens_map + ghl_client singleton) import script_audit # noqa: E402 SCRIPT_NAME = "cleanup_puebla_qro_duplicates.py" def safe_print(*args, **kwargs): sep = kwargs.get("sep", " ") end = kwargs.get("end", "\n") text = sep.join(str(a) for a in args) encoding = sys.stdout.encoding or "utf-8" try: sys.stdout.write(text + end) sys.stdout.flush() except UnicodeEncodeError: sys.stdout.write(text.encode(encoding, errors="replace").decode(encoding) + end) sys.stdout.flush() def _norm_name(s: str) -> str: return (s or "").lower().replace("é", "e").replace("á", "a").replace("í", "i").replace("ó", "o").replace("ú", "u") def build_plan(log=safe_print): """Recalcula el plan completo desde la DB. Devuelve dict con: - puebla_location_id, qro_location_id - qro_deletions: [{qro_contact_id, qro_opp_ids:[], brand_contact_id, name}] - brand_updates: [{brand_opp_id, brand_contact_id, name, old:{value,status}, new:{value,status,pipelineStageId,pipelineId}}] Lanza RuntimeError si Puebla/Queretaro no se ubican en la DB. """ if not os.path.exists(DB_PATH): raise FileNotFoundError(f"No existe {DB_PATH}. Sincroniza primero.") conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row try: _brand, branches, _demos = load_accounts_filtered(conn) brand_contacts = load_contacts(conn, BRAND_LOCATION_ID) brand_opps = load_opps(conn, BRAND_LOCATION_ID) puebla_loc = None qro_loc = None branch_full_contacts = {} branch_idx = {} branch_opps_by_cid = {} branch_name_by_loc = {} for b in branches: loc = b["location_id"] name = b["nombre"] branch_name_by_loc[loc] = name bc = load_contacts(conn, loc) bo = load_opps(conn, loc) branch_full_contacts[loc] = {c["id"]: c for c in bc} branch_idx[loc] = build_contact_index(bc) grp = defaultdict(list) for o in bo: grp[o["contact_id"]].append(o) branch_opps_by_cid[loc] = grp nlow = _norm_name(name) if "puebla" in nlow: puebla_loc = loc if "queretaro" in nlow: qro_loc = loc if not puebla_loc or not qro_loc: raise RuntimeError(f"No se ubicaron sucursales: puebla={puebla_loc} qro={qro_loc}") # Mapear contactos Marca con presencia en ambas sucursales brand_by_id = {c["id"]: c for c in brand_contacts} contact_branches = defaultdict(list) for c in brand_contacts: for loc in branch_idx: idxp, idxe, idxn = branch_idx[loc] for m in find_match(c, idxp, idxe, idxn): contact_branches[c["id"]].append((loc, m["id"])) brand_opps_by_cid_marca = defaultdict(list) for o in brand_opps: brand_opps_by_cid_marca[o["contact_id"]].append(o) qro_deletions = [] brand_updates = [] for bid, locs in contact_branches.items(): locset = {l[0] for l in locs} if not (puebla_loc in locset and qro_loc in locset): continue c = brand_by_id[bid] name = f"{c.get('first_name') or ''} {c.get('last_name') or ''}".strip() p_bcid = next(bc for l, bc in locs if l == puebla_loc) q_bcid = next(bc for l, bc in locs if l == qro_loc) qro_opps = branch_opps_by_cid[qro_loc].get(q_bcid, []) qro_deletions.append({ "qro_contact_id": q_bcid, "qro_opp_ids": [o["id"] for o in qro_opps], "qro_opps_snapshot": [ {"id": o["id"], "name": o.get("name"), "status": o.get("status"), "monetary_value": o.get("monetary_value"), "pipeline_id": o.get("pipeline_id"), "pipeline_stage_id": o.get("pipeline_stage_id")} for o in qro_opps ], "brand_contact_id": bid, "name": name, "qro_contact_snapshot": { "id": q_bcid, "first_name": branch_full_contacts[qro_loc][q_bcid].get("first_name"), "last_name": branch_full_contacts[qro_loc][q_bcid].get("last_name"), "phone": branch_full_contacts[qro_loc][q_bcid].get("phone"), "email": branch_full_contacts[qro_loc][q_bcid].get("email"), }, }) # Comparar opps Marca vs Puebla. Si difieren en value o status, plan update. m_opps = brand_opps_by_cid_marca.get(bid, []) p_opps = branch_opps_by_cid[puebla_loc].get(p_bcid, []) if not m_opps or not p_opps: continue # anomalia; no actualizamos sin certeza m_o = m_opps[0] p_o = p_opps[0] m_val = float(m_o.get("monetary_value") or 0) p_val = float(p_o.get("monetary_value") or 0) m_st = (m_o.get("status") or "").lower() p_st = (p_o.get("status") or "").lower() if m_val == p_val and m_st == p_st: continue # ya cuadra brand_updates.append({ "brand_opp_id": m_o["id"], "brand_contact_id": bid, "name": name, "old": { "monetary_value": m_val, "status": m_st, "pipeline_id": m_o.get("pipeline_id"), "pipeline_stage_id": m_o.get("pipeline_stage_id"), }, "new": { "monetary_value": p_val, "status": p_st, # NOTA: pipeline_id y pipeline_stage_id NO se copian entre cuentas # (cada location tiene su propio pipeline). Solo se actualiza # monetary_value y status que son universales. }, }) return { "puebla_location_id": puebla_loc, "puebla_name": branch_name_by_loc[puebla_loc], "qro_location_id": qro_loc, "qro_name": branch_name_by_loc[qro_loc], "qro_deletions": qro_deletions, "brand_updates": brand_updates, } finally: conn.close() def _empty_summary(): return { "candidates_pairs": 0, "qro_contacts_to_delete": 0, "qro_opps_to_delete": 0, "brand_opps_to_update": 0, "qro_contacts_deleted": 0, "qro_opps_deleted": 0, "brand_opps_updated": 0, "errors": 0, } def run_cleanup(dry_run=True, log=None, run_id=None, only_contact_ids=None): """Ejecuta el cleanup. Devuelve dict serializable {dry_run, summary, items}.""" if log is None: log = safe_print plan = build_plan(log=log) only = set(only_contact_ids or []) if only: plan["qro_deletions"] = [d for d in plan["qro_deletions"] if d["brand_contact_id"] in only] plan["brand_updates"] = [u for u in plan["brand_updates"] if u["brand_contact_id"] in only] summary = _empty_summary() summary["candidates_pairs"] = len(plan["qro_deletions"]) summary["qro_contacts_to_delete"] = len(plan["qro_deletions"]) summary["qro_opps_to_delete"] = sum(len(d["qro_opp_ids"]) for d in plan["qro_deletions"]) summary["brand_opps_to_update"] = len(plan["brand_updates"]) log(f"[{datetime.now().strftime('%H:%M:%S')}] === cleanup_puebla_qro_duplicates ===") log(f"Modo: {'DRY-RUN (no escribe)' if dry_run else 'APPLY (escribe en GHL)'}") log(f"Puebla: {plan['puebla_name']} ({plan['puebla_location_id']})") log(f"Qro: {plan['qro_name']} ({plan['qro_location_id']})") log(f"Resumen plan: {summary['candidates_pairs']} contactos Qro a borrar, " f"{summary['qro_opps_to_delete']} opps Qro a borrar, " f"{summary['brand_opps_to_update']} opps Marca a actualizar.") items = [] if not dry_run: tokens_map = sync_engine.get_tokens_map() qro_token = tokens_map.get(plan["qro_location_id"]) brand_token = tokens_map.get(BRAND_LOCATION_ID) if not qro_token: raise RuntimeError(f"No hay token para Queretaro {plan['qro_location_id']}") if not brand_token: raise RuntimeError(f"No hay token para Marca {BRAND_LOCATION_ID}") client = sync_engine.ghl_client # singleton con rate-limit y sesion persistente else: client = None # no se usa en dry-run # --- Fase 1: borrar opps Qro y luego contactos Qro --- for d in plan["qro_deletions"]: item = { "phase": "delete_qro", "brand_contact_id": d["brand_contact_id"], "name": d["name"], "qro_contact_id": d["qro_contact_id"], "qro_opp_ids": d["qro_opp_ids"], "qro_opp_results": [], "contact_result": None, "status": "pending", "error": None, } try: # 1a. Opps de Qro for opp_snap in d["qro_opps_snapshot"]: opp_id = opp_snap["id"] if dry_run: item["qro_opp_results"].append({"id": opp_id, "status": "would_delete", "value": opp_snap["monetary_value"]}) continue cid = script_audit.record_change( run_id, plan["qro_location_id"], "opportunity", opp_id, "", "deleted", opp_snap, None, ) if run_id else None try: client.delete_opportunity(qro_token, opp_id, plan["qro_location_id"]) summary["qro_opps_deleted"] += 1 item["qro_opp_results"].append({"id": opp_id, "status": "deleted"}) if cid: script_audit.mark_change(cid, "applied") except Exception as e: summary["errors"] += 1 item["qro_opp_results"].append({"id": opp_id, "status": "error", "error": str(e)}) if cid: script_audit.mark_change(cid, "failed", error_message=str(e)) raise # 1b. Contacto de Qro if dry_run: item["contact_result"] = "would_delete" else: cid = script_audit.record_change( run_id, plan["qro_location_id"], "contact", d["qro_contact_id"], "", "deleted", d["qro_contact_snapshot"], None, ) if run_id else None try: client.delete_contact(qro_token, d["qro_contact_id"], plan["qro_location_id"]) summary["qro_contacts_deleted"] += 1 item["contact_result"] = "deleted" if cid: script_audit.mark_change(cid, "applied") except Exception as e: summary["errors"] += 1 item["contact_result"] = f"error: {e}" if cid: script_audit.mark_change(cid, "failed", error_message=str(e)) raise item["status"] = "ok" log(f" [{'DRY' if dry_run else 'OK'}] borrar Qro: {d['name']} (opps: {len(d['qro_opp_ids'])})") except Exception as e: item["status"] = "error" item["error"] = str(e) log(f" [ERROR] {d['name']}: {e}") items.append(item) # --- Fase 2: actualizar opps Marca con valores de Puebla --- for u in plan["brand_updates"]: item = { "phase": "update_brand_opp", "brand_contact_id": u["brand_contact_id"], "name": u["name"], "brand_opp_id": u["brand_opp_id"], "old": u["old"], "new": u["new"], "status": "pending", "error": None, } try: if dry_run: item["status"] = "would_update" else: cid = script_audit.record_change( run_id, BRAND_LOCATION_ID, "opportunity", u["brand_opp_id"], "monetary_value+status", "updated_from_puebla", u["old"], u["new"], ) if run_id else None try: # Update field "monetaryValue" via PUT /opportunities/{id}. # Como hay 2 campos a cambiar (value y status), enviamos ambos. # El status se actualiza con /status endpoint para mayor confiabilidad. payload = {"monetaryValue": u["new"]["monetary_value"]} client.update_opportunity(brand_token, u["brand_opp_id"], payload) # status aparte (GHL tiene endpoint dedicado y a veces /opportunities/{id} no lo aplica) if u["old"]["status"] != u["new"]["status"]: client.update_opportunity_status(brand_token, u["brand_opp_id"], u["new"]["status"]) summary["brand_opps_updated"] += 1 item["status"] = "updated" if cid: script_audit.mark_change(cid, "applied") except Exception as e: summary["errors"] += 1 item["status"] = "error" item["error"] = str(e) if cid: script_audit.mark_change(cid, "failed", error_message=str(e)) raise log(f" [{'DRY' if dry_run else 'OK'}] update Marca: {u['name']} " f"value ${u['old']['monetary_value']:,.0f}->${u['new']['monetary_value']:,.0f} " f"status {u['old']['status']}->{u['new']['status']}") except Exception as e: item["status"] = "error" item["error"] = str(e) log(f" [ERROR update] {u['name']}: {e}") items.append(item) log("\n=== RESUMEN ===") for k, v in summary.items(): log(f" {k:<28}: {v}") return { "dry_run": dry_run, "summary": summary, "items": items, "plan_meta": { "puebla_location_id": plan["puebla_location_id"], "qro_location_id": plan["qro_location_id"], }, } def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--apply", action="store_true", help="Ejecuta las escrituras en GHL. Default dry-run.") parser.add_argument("--yes", action="store_true", help="Skip confirmacion interactiva.") parser.add_argument("--only-contact", action="append", default=[], help="Procesa solo el brand_contact_id dado (repetible).") parser.add_argument("--json", action="store_true", help="Imprime el resultado como JSON.") parser.add_argument("--run-id", type=str, default=None, help="Id de script_audit existente. Si se omite y es apply, se genera uno nuevo.") args = parser.parse_args() dry_run = not args.apply if not dry_run and not args.yes: safe_print("\nEsto eliminara contactos+opps en QUERETARO y actualizara opps en MARCA.") safe_print("Es destructivo (los DELETE no son automaticamente reversibles).") confirm = input("Continuar? (y/N): ").strip().lower() if confirm not in ("y", "yes", "s", "si", "sí"): safe_print("Cancelado.") return 1 run_id = args.run_id if not dry_run: if not run_id: run_id = f"cpqd-{uuid.uuid4().hex[:12]}" try: script_audit.init_audit_db() script_audit.create_run( run_id, SCRIPT_NAME, arguments=f"--apply only_contact={args.only_contact or 'all'}", locations=[BRAND_LOCATION_ID], # qro_loc se conoce solo despues de build_plan execution_mode="sequential", ) except Exception as e: safe_print(f"[warn] no se pudo iniciar audit run: {e}") run_id = None try: result = run_cleanup( dry_run=dry_run, log=safe_print, run_id=run_id, only_contact_ids=args.only_contact or None, ) except Exception as e: if run_id: try: script_audit.update_run_status(run_id, "failed", str(e)) except Exception: pass safe_print(f"[FATAL] {e}") return 2 if run_id: try: errors = result["summary"]["errors"] status = "failed" if errors and ( result["summary"]["qro_contacts_deleted"] + result["summary"]["brand_opps_updated"] ) == 0 else "success" script_audit.update_run_status(run_id, status) except Exception: pass result["run_id"] = run_id if args.json: safe_print(json.dumps(result, default=str, ensure_ascii=False, indent=2)) return 0 if result["summary"]["errors"] == 0 else 1 if __name__ == "__main__": sys.exit(main() or 0)