Primer commit
This commit is contained in:
@@ -0,0 +1,475 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
cleanup_puebla_qro_duplicates.py
|
||||
|
||||
Limpieza puntual de duplicados cruzados entre las sucursales de Puebla y
|
||||
Queretaro detectados en la comparativa Marca vs Sucursales.
|
||||
|
||||
Contexto (mayo 2026):
|
||||
El audit detecto 29 contactos en Marca que existen fisicamente tanto en
|
||||
Puebla como en Queretaro. Tras inspeccion, las opps de Puebla traen
|
||||
valores monetarios reales (~$1.7M en pipeline) mientras que las de
|
||||
Queretaro estan todas en $0 — son shells huecos. La conclusion del
|
||||
operador fue: las copias en Queretaro son residuales y deben eliminarse.
|
||||
|
||||
Acciones (TODAS reversibles via audit log salvo los DELETE, que son
|
||||
destructivos pero quedan registrados con todos los datos previos):
|
||||
|
||||
1. En la sucursal de QUERETARO:
|
||||
- Elimina cada opp residual (las 29 en $0)
|
||||
- Elimina cada contacto residual (29)
|
||||
Las opps se borran ANTES que el contacto para que el audit log conserve
|
||||
el contact_id de la opp aunque GHL cascadee el delete del contacto.
|
||||
|
||||
2. En la cuenta de MARCA:
|
||||
- Para cada opp Marca cuya monetary_value o status no coincide con
|
||||
su contraparte en Puebla, hace PUT con los valores correctos.
|
||||
- No se elimina ni se crea ningun contacto en Marca: los 29 contactos
|
||||
Marca involucrados son unicos (no hay duplicado intra-Marca) y sus
|
||||
datos basicos (phone/email/nombre) ya son correctos.
|
||||
|
||||
Reglas de seguridad:
|
||||
- Dry-run por default. Requiere --apply para escribir.
|
||||
- Recalcula el plan desde la DB en cada ejecucion (no consume el JSON
|
||||
externo) — asi nunca opera con datos stale.
|
||||
- Re-verifica cada operacion contra los datos vivos antes de aplicar.
|
||||
- Cada cambio se registra en script_audit con planned -> applied.
|
||||
- Soporta --only-contact para procesar uno a uno (debug/granular).
|
||||
|
||||
Uso:
|
||||
python scripts/cleanup_puebla_qro_duplicates.py # dry-run
|
||||
python scripts/cleanup_puebla_qro_duplicates.py --apply --yes # apply real
|
||||
python scripts/cleanup_puebla_qro_duplicates.py --json # output JSON
|
||||
python scripts/cleanup_puebla_qro_duplicates.py --only-contact ID # un solo contacto
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if ROOT_DIR not in sys.path:
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
|
||||
from scripts.audit_brand_vs_branches_totals import ( # noqa: E402
|
||||
load_accounts_filtered, load_contacts, load_opps,
|
||||
build_contact_index, find_match,
|
||||
BRAND_LOCATION_ID, DB_PATH,
|
||||
)
|
||||
import sync_engine # noqa: E402 (get_tokens_map + ghl_client singleton)
|
||||
import script_audit # noqa: E402
|
||||
|
||||
SCRIPT_NAME = "cleanup_puebla_qro_duplicates.py"
|
||||
|
||||
|
||||
def safe_print(*args, **kwargs):
|
||||
sep = kwargs.get("sep", " ")
|
||||
end = kwargs.get("end", "\n")
|
||||
text = sep.join(str(a) for a in args)
|
||||
encoding = sys.stdout.encoding or "utf-8"
|
||||
try:
|
||||
sys.stdout.write(text + end)
|
||||
sys.stdout.flush()
|
||||
except UnicodeEncodeError:
|
||||
sys.stdout.write(text.encode(encoding, errors="replace").decode(encoding) + end)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _norm_name(s: str) -> str:
|
||||
return (s or "").lower().replace("é", "e").replace("á", "a").replace("í", "i").replace("ó", "o").replace("ú", "u")
|
||||
|
||||
|
||||
def build_plan(log=safe_print):
|
||||
"""Recalcula el plan completo desde la DB. Devuelve dict con:
|
||||
- puebla_location_id, qro_location_id
|
||||
- qro_deletions: [{qro_contact_id, qro_opp_ids:[], brand_contact_id, name}]
|
||||
- brand_updates: [{brand_opp_id, brand_contact_id, name, old:{value,status}, new:{value,status,pipelineStageId,pipelineId}}]
|
||||
Lanza RuntimeError si Puebla/Queretaro no se ubican en la DB.
|
||||
"""
|
||||
if not os.path.exists(DB_PATH):
|
||||
raise FileNotFoundError(f"No existe {DB_PATH}. Sincroniza primero.")
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
_brand, branches, _demos = load_accounts_filtered(conn)
|
||||
brand_contacts = load_contacts(conn, BRAND_LOCATION_ID)
|
||||
brand_opps = load_opps(conn, BRAND_LOCATION_ID)
|
||||
|
||||
puebla_loc = None
|
||||
qro_loc = None
|
||||
branch_full_contacts = {}
|
||||
branch_idx = {}
|
||||
branch_opps_by_cid = {}
|
||||
branch_name_by_loc = {}
|
||||
for b in branches:
|
||||
loc = b["location_id"]
|
||||
name = b["nombre"]
|
||||
branch_name_by_loc[loc] = name
|
||||
bc = load_contacts(conn, loc)
|
||||
bo = load_opps(conn, loc)
|
||||
branch_full_contacts[loc] = {c["id"]: c for c in bc}
|
||||
branch_idx[loc] = build_contact_index(bc)
|
||||
grp = defaultdict(list)
|
||||
for o in bo:
|
||||
grp[o["contact_id"]].append(o)
|
||||
branch_opps_by_cid[loc] = grp
|
||||
nlow = _norm_name(name)
|
||||
if "puebla" in nlow:
|
||||
puebla_loc = loc
|
||||
if "queretaro" in nlow:
|
||||
qro_loc = loc
|
||||
|
||||
if not puebla_loc or not qro_loc:
|
||||
raise RuntimeError(f"No se ubicaron sucursales: puebla={puebla_loc} qro={qro_loc}")
|
||||
|
||||
# Mapear contactos Marca con presencia en ambas sucursales
|
||||
brand_by_id = {c["id"]: c for c in brand_contacts}
|
||||
contact_branches = defaultdict(list)
|
||||
for c in brand_contacts:
|
||||
for loc in branch_idx:
|
||||
idxp, idxe, idxn = branch_idx[loc]
|
||||
for m in find_match(c, idxp, idxe, idxn):
|
||||
contact_branches[c["id"]].append((loc, m["id"]))
|
||||
|
||||
brand_opps_by_cid_marca = defaultdict(list)
|
||||
for o in brand_opps:
|
||||
brand_opps_by_cid_marca[o["contact_id"]].append(o)
|
||||
|
||||
qro_deletions = []
|
||||
brand_updates = []
|
||||
for bid, locs in contact_branches.items():
|
||||
locset = {l[0] for l in locs}
|
||||
if not (puebla_loc in locset and qro_loc in locset):
|
||||
continue
|
||||
c = brand_by_id[bid]
|
||||
name = f"{c.get('first_name') or ''} {c.get('last_name') or ''}".strip()
|
||||
p_bcid = next(bc for l, bc in locs if l == puebla_loc)
|
||||
q_bcid = next(bc for l, bc in locs if l == qro_loc)
|
||||
|
||||
qro_opps = branch_opps_by_cid[qro_loc].get(q_bcid, [])
|
||||
qro_deletions.append({
|
||||
"qro_contact_id": q_bcid,
|
||||
"qro_opp_ids": [o["id"] for o in qro_opps],
|
||||
"qro_opps_snapshot": [
|
||||
{"id": o["id"], "name": o.get("name"), "status": o.get("status"),
|
||||
"monetary_value": o.get("monetary_value"),
|
||||
"pipeline_id": o.get("pipeline_id"),
|
||||
"pipeline_stage_id": o.get("pipeline_stage_id")}
|
||||
for o in qro_opps
|
||||
],
|
||||
"brand_contact_id": bid,
|
||||
"name": name,
|
||||
"qro_contact_snapshot": {
|
||||
"id": q_bcid,
|
||||
"first_name": branch_full_contacts[qro_loc][q_bcid].get("first_name"),
|
||||
"last_name": branch_full_contacts[qro_loc][q_bcid].get("last_name"),
|
||||
"phone": branch_full_contacts[qro_loc][q_bcid].get("phone"),
|
||||
"email": branch_full_contacts[qro_loc][q_bcid].get("email"),
|
||||
},
|
||||
})
|
||||
|
||||
# Comparar opps Marca vs Puebla. Si difieren en value o status, plan update.
|
||||
m_opps = brand_opps_by_cid_marca.get(bid, [])
|
||||
p_opps = branch_opps_by_cid[puebla_loc].get(p_bcid, [])
|
||||
if not m_opps or not p_opps:
|
||||
continue # anomalia; no actualizamos sin certeza
|
||||
m_o = m_opps[0]
|
||||
p_o = p_opps[0]
|
||||
m_val = float(m_o.get("monetary_value") or 0)
|
||||
p_val = float(p_o.get("monetary_value") or 0)
|
||||
m_st = (m_o.get("status") or "").lower()
|
||||
p_st = (p_o.get("status") or "").lower()
|
||||
if m_val == p_val and m_st == p_st:
|
||||
continue # ya cuadra
|
||||
|
||||
brand_updates.append({
|
||||
"brand_opp_id": m_o["id"],
|
||||
"brand_contact_id": bid,
|
||||
"name": name,
|
||||
"old": {
|
||||
"monetary_value": m_val,
|
||||
"status": m_st,
|
||||
"pipeline_id": m_o.get("pipeline_id"),
|
||||
"pipeline_stage_id": m_o.get("pipeline_stage_id"),
|
||||
},
|
||||
"new": {
|
||||
"monetary_value": p_val,
|
||||
"status": p_st,
|
||||
# NOTA: pipeline_id y pipeline_stage_id NO se copian entre cuentas
|
||||
# (cada location tiene su propio pipeline). Solo se actualiza
|
||||
# monetary_value y status que son universales.
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
"puebla_location_id": puebla_loc,
|
||||
"puebla_name": branch_name_by_loc[puebla_loc],
|
||||
"qro_location_id": qro_loc,
|
||||
"qro_name": branch_name_by_loc[qro_loc],
|
||||
"qro_deletions": qro_deletions,
|
||||
"brand_updates": brand_updates,
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _empty_summary():
|
||||
return {
|
||||
"candidates_pairs": 0,
|
||||
"qro_contacts_to_delete": 0,
|
||||
"qro_opps_to_delete": 0,
|
||||
"brand_opps_to_update": 0,
|
||||
"qro_contacts_deleted": 0,
|
||||
"qro_opps_deleted": 0,
|
||||
"brand_opps_updated": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
|
||||
|
||||
def run_cleanup(dry_run=True, log=None, run_id=None, only_contact_ids=None):
|
||||
"""Ejecuta el cleanup. Devuelve dict serializable {dry_run, summary, items}."""
|
||||
if log is None:
|
||||
log = safe_print
|
||||
|
||||
plan = build_plan(log=log)
|
||||
only = set(only_contact_ids or [])
|
||||
if only:
|
||||
plan["qro_deletions"] = [d for d in plan["qro_deletions"] if d["brand_contact_id"] in only]
|
||||
plan["brand_updates"] = [u for u in plan["brand_updates"] if u["brand_contact_id"] in only]
|
||||
|
||||
summary = _empty_summary()
|
||||
summary["candidates_pairs"] = len(plan["qro_deletions"])
|
||||
summary["qro_contacts_to_delete"] = len(plan["qro_deletions"])
|
||||
summary["qro_opps_to_delete"] = sum(len(d["qro_opp_ids"]) for d in plan["qro_deletions"])
|
||||
summary["brand_opps_to_update"] = len(plan["brand_updates"])
|
||||
|
||||
log(f"[{datetime.now().strftime('%H:%M:%S')}] === cleanup_puebla_qro_duplicates ===")
|
||||
log(f"Modo: {'DRY-RUN (no escribe)' if dry_run else 'APPLY (escribe en GHL)'}")
|
||||
log(f"Puebla: {plan['puebla_name']} ({plan['puebla_location_id']})")
|
||||
log(f"Qro: {plan['qro_name']} ({plan['qro_location_id']})")
|
||||
log(f"Resumen plan: {summary['candidates_pairs']} contactos Qro a borrar, "
|
||||
f"{summary['qro_opps_to_delete']} opps Qro a borrar, "
|
||||
f"{summary['brand_opps_to_update']} opps Marca a actualizar.")
|
||||
|
||||
items = []
|
||||
|
||||
if not dry_run:
|
||||
tokens_map = sync_engine.get_tokens_map()
|
||||
qro_token = tokens_map.get(plan["qro_location_id"])
|
||||
brand_token = tokens_map.get(BRAND_LOCATION_ID)
|
||||
if not qro_token:
|
||||
raise RuntimeError(f"No hay token para Queretaro {plan['qro_location_id']}")
|
||||
if not brand_token:
|
||||
raise RuntimeError(f"No hay token para Marca {BRAND_LOCATION_ID}")
|
||||
client = sync_engine.ghl_client # singleton con rate-limit y sesion persistente
|
||||
else:
|
||||
client = None # no se usa en dry-run
|
||||
|
||||
# --- Fase 1: borrar opps Qro y luego contactos Qro ---
|
||||
for d in plan["qro_deletions"]:
|
||||
item = {
|
||||
"phase": "delete_qro",
|
||||
"brand_contact_id": d["brand_contact_id"],
|
||||
"name": d["name"],
|
||||
"qro_contact_id": d["qro_contact_id"],
|
||||
"qro_opp_ids": d["qro_opp_ids"],
|
||||
"qro_opp_results": [],
|
||||
"contact_result": None,
|
||||
"status": "pending",
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
# 1a. Opps de Qro
|
||||
for opp_snap in d["qro_opps_snapshot"]:
|
||||
opp_id = opp_snap["id"]
|
||||
if dry_run:
|
||||
item["qro_opp_results"].append({"id": opp_id, "status": "would_delete",
|
||||
"value": opp_snap["monetary_value"]})
|
||||
continue
|
||||
cid = script_audit.record_change(
|
||||
run_id, plan["qro_location_id"], "opportunity", opp_id,
|
||||
"", "deleted", opp_snap, None,
|
||||
) if run_id else None
|
||||
try:
|
||||
client.delete_opportunity(qro_token, opp_id, plan["qro_location_id"])
|
||||
summary["qro_opps_deleted"] += 1
|
||||
item["qro_opp_results"].append({"id": opp_id, "status": "deleted"})
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "applied")
|
||||
except Exception as e:
|
||||
summary["errors"] += 1
|
||||
item["qro_opp_results"].append({"id": opp_id, "status": "error", "error": str(e)})
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "failed", error_message=str(e))
|
||||
raise
|
||||
|
||||
# 1b. Contacto de Qro
|
||||
if dry_run:
|
||||
item["contact_result"] = "would_delete"
|
||||
else:
|
||||
cid = script_audit.record_change(
|
||||
run_id, plan["qro_location_id"], "contact", d["qro_contact_id"],
|
||||
"", "deleted", d["qro_contact_snapshot"], None,
|
||||
) if run_id else None
|
||||
try:
|
||||
client.delete_contact(qro_token, d["qro_contact_id"], plan["qro_location_id"])
|
||||
summary["qro_contacts_deleted"] += 1
|
||||
item["contact_result"] = "deleted"
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "applied")
|
||||
except Exception as e:
|
||||
summary["errors"] += 1
|
||||
item["contact_result"] = f"error: {e}"
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "failed", error_message=str(e))
|
||||
raise
|
||||
item["status"] = "ok"
|
||||
log(f" [{'DRY' if dry_run else 'OK'}] borrar Qro: {d['name']} (opps: {len(d['qro_opp_ids'])})")
|
||||
except Exception as e:
|
||||
item["status"] = "error"
|
||||
item["error"] = str(e)
|
||||
log(f" [ERROR] {d['name']}: {e}")
|
||||
items.append(item)
|
||||
|
||||
# --- Fase 2: actualizar opps Marca con valores de Puebla ---
|
||||
for u in plan["brand_updates"]:
|
||||
item = {
|
||||
"phase": "update_brand_opp",
|
||||
"brand_contact_id": u["brand_contact_id"],
|
||||
"name": u["name"],
|
||||
"brand_opp_id": u["brand_opp_id"],
|
||||
"old": u["old"],
|
||||
"new": u["new"],
|
||||
"status": "pending",
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
if dry_run:
|
||||
item["status"] = "would_update"
|
||||
else:
|
||||
cid = script_audit.record_change(
|
||||
run_id, BRAND_LOCATION_ID, "opportunity", u["brand_opp_id"],
|
||||
"monetary_value+status", "updated_from_puebla",
|
||||
u["old"], u["new"],
|
||||
) if run_id else None
|
||||
try:
|
||||
# Update field "monetaryValue" via PUT /opportunities/{id}.
|
||||
# Como hay 2 campos a cambiar (value y status), enviamos ambos.
|
||||
# El status se actualiza con /status endpoint para mayor confiabilidad.
|
||||
payload = {"monetaryValue": u["new"]["monetary_value"]}
|
||||
client.update_opportunity(brand_token, u["brand_opp_id"], payload)
|
||||
# status aparte (GHL tiene endpoint dedicado y a veces /opportunities/{id} no lo aplica)
|
||||
if u["old"]["status"] != u["new"]["status"]:
|
||||
client.update_opportunity_status(brand_token, u["brand_opp_id"], u["new"]["status"])
|
||||
summary["brand_opps_updated"] += 1
|
||||
item["status"] = "updated"
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "applied")
|
||||
except Exception as e:
|
||||
summary["errors"] += 1
|
||||
item["status"] = "error"
|
||||
item["error"] = str(e)
|
||||
if cid:
|
||||
script_audit.mark_change(cid, "failed", error_message=str(e))
|
||||
raise
|
||||
log(f" [{'DRY' if dry_run else 'OK'}] update Marca: {u['name']} "
|
||||
f"value ${u['old']['monetary_value']:,.0f}->${u['new']['monetary_value']:,.0f} "
|
||||
f"status {u['old']['status']}->{u['new']['status']}")
|
||||
except Exception as e:
|
||||
item["status"] = "error"
|
||||
item["error"] = str(e)
|
||||
log(f" [ERROR update] {u['name']}: {e}")
|
||||
items.append(item)
|
||||
|
||||
log("\n=== RESUMEN ===")
|
||||
for k, v in summary.items():
|
||||
log(f" {k:<28}: {v}")
|
||||
|
||||
return {
|
||||
"dry_run": dry_run,
|
||||
"summary": summary,
|
||||
"items": items,
|
||||
"plan_meta": {
|
||||
"puebla_location_id": plan["puebla_location_id"],
|
||||
"qro_location_id": plan["qro_location_id"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument("--apply", action="store_true", help="Ejecuta las escrituras en GHL. Default dry-run.")
|
||||
parser.add_argument("--yes", action="store_true", help="Skip confirmacion interactiva.")
|
||||
parser.add_argument("--only-contact", action="append", default=[], help="Procesa solo el brand_contact_id dado (repetible).")
|
||||
parser.add_argument("--json", action="store_true", help="Imprime el resultado como JSON.")
|
||||
parser.add_argument("--run-id", type=str, default=None, help="Id de script_audit existente. Si se omite y es apply, se genera uno nuevo.")
|
||||
args = parser.parse_args()
|
||||
|
||||
dry_run = not args.apply
|
||||
|
||||
if not dry_run and not args.yes:
|
||||
safe_print("\nEsto eliminara contactos+opps en QUERETARO y actualizara opps en MARCA.")
|
||||
safe_print("Es destructivo (los DELETE no son automaticamente reversibles).")
|
||||
confirm = input("Continuar? (y/N): ").strip().lower()
|
||||
if confirm not in ("y", "yes", "s", "si", "sí"):
|
||||
safe_print("Cancelado.")
|
||||
return 1
|
||||
|
||||
run_id = args.run_id
|
||||
if not dry_run:
|
||||
if not run_id:
|
||||
run_id = f"cpqd-{uuid.uuid4().hex[:12]}"
|
||||
try:
|
||||
script_audit.init_audit_db()
|
||||
script_audit.create_run(
|
||||
run_id,
|
||||
SCRIPT_NAME,
|
||||
arguments=f"--apply only_contact={args.only_contact or 'all'}",
|
||||
locations=[BRAND_LOCATION_ID], # qro_loc se conoce solo despues de build_plan
|
||||
execution_mode="sequential",
|
||||
)
|
||||
except Exception as e:
|
||||
safe_print(f"[warn] no se pudo iniciar audit run: {e}")
|
||||
run_id = None
|
||||
|
||||
try:
|
||||
result = run_cleanup(
|
||||
dry_run=dry_run,
|
||||
log=safe_print,
|
||||
run_id=run_id,
|
||||
only_contact_ids=args.only_contact or None,
|
||||
)
|
||||
except Exception as e:
|
||||
if run_id:
|
||||
try:
|
||||
script_audit.update_run_status(run_id, "failed", str(e))
|
||||
except Exception:
|
||||
pass
|
||||
safe_print(f"[FATAL] {e}")
|
||||
return 2
|
||||
|
||||
if run_id:
|
||||
try:
|
||||
errors = result["summary"]["errors"]
|
||||
status = "failed" if errors and (
|
||||
result["summary"]["qro_contacts_deleted"] + result["summary"]["brand_opps_updated"]
|
||||
) == 0 else "success"
|
||||
script_audit.update_run_status(run_id, status)
|
||||
except Exception:
|
||||
pass
|
||||
result["run_id"] = run_id
|
||||
|
||||
if args.json:
|
||||
safe_print(json.dumps(result, default=str, ensure_ascii=False, indent=2))
|
||||
return 0 if result["summary"]["errors"] == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main() or 0)
|
||||
Reference in New Issue
Block a user