405 lines
15 KiB
Python
405 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""Backfill del campo 'ID Contacto Sucursal' (contact.id_contacto_sucursal)
|
|
en los contactos de Marca que aun lo tienen vacio.
|
|
|
|
Se ejecuta despues de haber poblado el mismo campo en todas las sucursales con
|
|
`fill_contact_id_sucursal.py`. Para cada contacto Marca sin el campo, busca su
|
|
contraparte en sucursal usando la heuristica clasica (phone + email + name) y,
|
|
si encuentra match unico, pone el id del contacto sucursal en el CF de Marca.
|
|
|
|
Solo escribe en Marca. Las sucursales no se tocan.
|
|
|
|
Estrategia de match (`scripts.common.match_contacts`):
|
|
- 'strong' : phone, email y name (>=0.80 similitud) coinciden.
|
|
- 'medium' : phone y name coinciden (email puede faltar).
|
|
- cualquier otro se descarta.
|
|
Cuando varios contactos de sucursal matchean al mismo contacto Marca (caso
|
|
cross-branch duplicates) el desempate elige el de `dateAdded` mas antiguo.
|
|
|
|
Modos:
|
|
- dry-run (default): no escribe nada en GHL.
|
|
- --apply --run-id <uuid>: aplica, registra cada cambio en script_audit.
|
|
|
|
Uso:
|
|
python scripts/backfill_brand_contact_id_sucursal.py
|
|
python scripts/backfill_brand_contact_id_sucursal.py --apply --run-id <uuid>
|
|
python scripts/backfill_brand_contact_id_sucursal.py --only-contact <brand_id>
|
|
python scripts/backfill_brand_contact_id_sucursal.py --export-unmatched
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
import datetime
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import uuid
|
|
import warnings
|
|
from collections import defaultdict
|
|
|
|
warnings.filterwarnings("ignore", message=r"urllib3 .* doesn't match a supported version!")
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if ROOT_DIR not in sys.path:
|
|
sys.path.insert(0, ROOT_DIR)
|
|
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
if SCRIPTS_DIR not in sys.path:
|
|
sys.path.insert(0, SCRIPTS_DIR)
|
|
|
|
import sync_engine # noqa: E402
|
|
import script_audit # noqa: E402
|
|
from paths import DB_PATH, MIGRATIONS_DIR, EXPORTS_DIR # noqa: E402
|
|
from common import match_contacts, normalize_phone, normalize_email # noqa: E402
|
|
from audit_brand_vs_branches_totals import ( # noqa: E402
|
|
resolve_contact_link_field_id,
|
|
extract_contact_link_value,
|
|
)
|
|
|
|
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
|
|
DEMO_LOCATION_IDS = {"Vf7qQl3L9vakJ8hDtQ8e", "Z64WQKORPVwXb5mn68Ef"}
|
|
CF_KEY = "contact.id_contacto_sucursal"
|
|
|
|
gc = sync_engine.ghl_client
|
|
|
|
|
|
def safe_print(*args, **kwargs):
|
|
text = " ".join(str(a) for a in args)
|
|
try:
|
|
sys.stdout.write(text + "\n")
|
|
sys.stdout.flush()
|
|
except UnicodeEncodeError:
|
|
enc = sys.stdout.encoding or "utf-8"
|
|
sys.stdout.write(text.encode(enc, errors="replace").decode(enc) + "\n")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def row_to_contact(row):
|
|
"""SQLite row -> dict con shape compatible con match_contacts."""
|
|
return {
|
|
"id": row["id"],
|
|
"location_id": row["location_id"],
|
|
"phone": row["phone"],
|
|
"email": row["email"],
|
|
"firstName": row["first_name"],
|
|
"lastName": row["last_name"],
|
|
"dateAdded": row["date_added"],
|
|
}
|
|
|
|
|
|
def load_brand_unfilled(conn, brand_field_id):
|
|
"""Contactos Marca con CF id_contacto_sucursal vacio."""
|
|
rows = conn.execute(
|
|
"SELECT id, location_id, first_name, last_name, email, phone, date_added, custom_fields_json "
|
|
"FROM contacts WHERE location_id=?",
|
|
(BRAND_LOCATION_ID,),
|
|
).fetchall()
|
|
out = []
|
|
for r in rows:
|
|
val = extract_contact_link_value(r["custom_fields_json"], brand_field_id)
|
|
if not val:
|
|
out.append(r)
|
|
return out
|
|
|
|
|
|
def load_branch_contacts(conn):
|
|
"""Todos los contactos de sucursales (sin demos)."""
|
|
placeholders = ",".join("?" for _ in DEMO_LOCATION_IDS)
|
|
rows = conn.execute(
|
|
f"SELECT id, location_id, first_name, last_name, email, phone, date_added "
|
|
f"FROM contacts WHERE location_id != ? AND location_id NOT IN ({placeholders})",
|
|
(BRAND_LOCATION_ID, *DEMO_LOCATION_IDS),
|
|
).fetchall()
|
|
return rows
|
|
|
|
|
|
def index_branches(rows):
|
|
"""Indices por phone normalizado y por email normalizado para evitar O(n*m)."""
|
|
by_phone = defaultdict(list)
|
|
by_email = defaultdict(list)
|
|
for r in rows:
|
|
c = row_to_contact(r)
|
|
ph = normalize_phone(c.get("phone") or "")
|
|
em = normalize_email(c.get("email") or "")
|
|
if ph:
|
|
by_phone[ph].append(c)
|
|
if em:
|
|
by_email[em].append(c)
|
|
return by_phone, by_email
|
|
|
|
|
|
def pick_oldest(candidates):
|
|
"""Tie-break por dateAdded mas antiguo (sucursal con mas historial)."""
|
|
def ts(c):
|
|
d = c.get("dateAdded") or ""
|
|
try:
|
|
return datetime.datetime.fromisoformat(d.replace("Z", "+00:00")).timestamp()
|
|
except Exception:
|
|
return float("inf")
|
|
return min(candidates, key=ts)
|
|
|
|
|
|
def plan_match(brand_row, by_phone, by_email):
|
|
"""Decide el match para un contacto Marca.
|
|
|
|
Devuelve dict con status y datos. Status:
|
|
- 'match_unique' : 1 sucursal candidata o varias que coinciden en id.
|
|
- 'match_multi' : varias sucursales con ids distintos. Se elige la mas antigua.
|
|
- 'no_data' : el contacto Marca no tiene phone ni email -> no se puede matchear.
|
|
- 'phone_collision' : phone coincide pero name no >=0.80 con ninguno (riesgo de pareja).
|
|
- 'no_match' : sin candidatos validos (probable Marca-only legitimo).
|
|
"""
|
|
brand_c = row_to_contact(brand_row)
|
|
ph = normalize_phone(brand_c.get("phone") or "")
|
|
em = normalize_email(brand_c.get("email") or "")
|
|
|
|
if not ph and not em:
|
|
return {"status": "no_data", "candidates": []}
|
|
|
|
candidates_raw = []
|
|
if ph:
|
|
candidates_raw.extend(by_phone.get(ph, []))
|
|
if em:
|
|
for c in by_email.get(em, []):
|
|
if c not in candidates_raw:
|
|
candidates_raw.append(c)
|
|
|
|
if not candidates_raw:
|
|
return {"status": "no_match", "candidates": []}
|
|
|
|
matches = []
|
|
phone_collisions = []
|
|
for cand in candidates_raw:
|
|
r = match_contacts(brand_c, cand)
|
|
if r["level"] in ("strong", "medium"):
|
|
matches.append((cand, r))
|
|
elif "phone_collision_unresolved" in r["reasons"]:
|
|
phone_collisions.append((cand, r))
|
|
|
|
if not matches:
|
|
if phone_collisions:
|
|
return {"status": "phone_collision", "candidates": [c for c, _ in phone_collisions]}
|
|
return {"status": "no_match", "candidates": []}
|
|
|
|
ids = {c["id"] for c, _ in matches}
|
|
if len(ids) == 1:
|
|
chosen = matches[0][0]
|
|
return {"status": "match_unique",
|
|
"chosen": chosen,
|
|
"level": matches[0][1]["level"],
|
|
"all_matches": matches}
|
|
|
|
# Multi-match: tie-break por dateAdded mas antiguo.
|
|
chosen = pick_oldest([c for c, _ in matches])
|
|
return {"status": "match_multi",
|
|
"chosen": chosen,
|
|
"all_matches": matches}
|
|
|
|
|
|
def render_summary(plans, log):
|
|
counts = defaultdict(int)
|
|
for p in plans:
|
|
counts[p["plan"]["status"]] += 1
|
|
log("\n=== Resumen del plan ===")
|
|
log(f" match_unique : {counts['match_unique']}")
|
|
log(f" match_multi : {counts['match_multi']}")
|
|
log(f" phone_collision : {counts['phone_collision']}")
|
|
log(f" no_match : {counts['no_match']}")
|
|
log(f" no_data : {counts['no_data']}")
|
|
|
|
|
|
def render_examples(plans, log, n=6):
|
|
by_status = defaultdict(list)
|
|
for p in plans:
|
|
by_status[p["plan"]["status"]].append(p)
|
|
for status in ("match_unique", "match_multi", "phone_collision", "no_match", "no_data"):
|
|
items = by_status.get(status, [])
|
|
if not items:
|
|
continue
|
|
log(f"\n [{status}] ejemplos:")
|
|
for item in items[:n]:
|
|
brand = item["brand"]
|
|
name = f"{brand['first_name'] or ''} {brand['last_name'] or ''}".strip() or "(sin nombre)"
|
|
line = f" - {name!r:40} brand={brand['id']} phone={brand['phone']!r} email={brand['email']!r}"
|
|
if status in ("match_unique", "match_multi"):
|
|
chosen = item["plan"]["chosen"]
|
|
line += f" -> sucursal={chosen['id']} ({chosen['location_id']})"
|
|
log(line)
|
|
if len(items) > n:
|
|
log(f" ... y {len(items)-n} más")
|
|
|
|
|
|
def export_csv(plans, status_filter, filename, log):
|
|
path = os.path.join(EXPORTS_DIR, filename)
|
|
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
|
with open(path, "w", encoding="utf-8", newline="") as fh:
|
|
w = csv.writer(fh)
|
|
w.writerow(["status", "brand_id", "brand_name", "brand_phone", "brand_email",
|
|
"branch_id", "branch_location_id", "branch_name", "branch_date_added"])
|
|
for p in plans:
|
|
if p["plan"]["status"] not in status_filter:
|
|
continue
|
|
b = p["brand"]
|
|
name = f"{b['first_name'] or ''} {b['last_name'] or ''}".strip()
|
|
chosen = p["plan"].get("chosen") or {}
|
|
cname = f"{chosen.get('firstName') or ''} {chosen.get('lastName') or ''}".strip()
|
|
w.writerow([p["plan"]["status"], b["id"], name, b["phone"] or "", b["email"] or "",
|
|
chosen.get("id", ""), chosen.get("location_id", ""), cname,
|
|
chosen.get("dateAdded", "")])
|
|
log(f" Export CSV: {path}")
|
|
return path
|
|
|
|
|
|
def apply_match(plan, brand_field_id, brand_token, run_id, log):
|
|
"""Aplica el match: PUT al contacto Marca con el CF poblado."""
|
|
p = plan["plan"]
|
|
brand = plan["brand"]
|
|
if p["status"] not in ("match_unique", "match_multi"):
|
|
return {"applied": False, "reason": p["status"]}
|
|
chosen_id = p["chosen"]["id"]
|
|
change_id = None
|
|
if run_id:
|
|
change_id = script_audit.record_change(
|
|
run_id, BRAND_LOCATION_ID, "contact",
|
|
brand["id"], brand_field_id, "id_contacto_sucursal",
|
|
{"value": None}, {"value": chosen_id})
|
|
try:
|
|
gc._request("PUT", f"/contacts/{brand['id']}", brand_token, json={
|
|
"customFields": [{"id": brand_field_id, "key": CF_KEY, "field_value": chosen_id}]
|
|
})
|
|
if change_id:
|
|
script_audit.mark_change(change_id, "applied")
|
|
return {"applied": True}
|
|
except Exception as exc:
|
|
if change_id:
|
|
script_audit.mark_change(change_id, "failed", str(exc))
|
|
log(f" ✗ Error en {brand['id']}: {exc}")
|
|
return {"applied": False, "error": str(exc)}
|
|
|
|
|
|
def snapshot_run(plans, run_id, dry_run):
|
|
os.makedirs(MIGRATIONS_DIR, exist_ok=True)
|
|
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
path = os.path.join(MIGRATIONS_DIR, f"backfill_brand_contact_id_sucursal_{ts}.json")
|
|
serial = []
|
|
for item in plans:
|
|
b = dict(item["brand"])
|
|
b.pop("custom_fields_json", None) # ruido
|
|
p = dict(item["plan"])
|
|
if "all_matches" in p:
|
|
p["all_matches"] = [{"id": c["id"], "location_id": c["location_id"]} for c, _ in p["all_matches"]]
|
|
if "candidates" in p:
|
|
p["candidates"] = [{"id": c["id"], "location_id": c["location_id"]} for c in p["candidates"]]
|
|
if "chosen" in p:
|
|
p["chosen"] = {k: v for k, v in p["chosen"].items() if k in ("id", "location_id", "phone", "email", "firstName", "lastName", "dateAdded")}
|
|
serial.append({"brand": b, "plan": p})
|
|
with open(path, "w", encoding="utf-8") as fh:
|
|
json.dump({"run_id": run_id, "dry_run": dry_run,
|
|
"timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
"count": len(plans), "plans": serial},
|
|
fh, ensure_ascii=False, indent=2, default=str)
|
|
return path
|
|
|
|
|
|
def run(apply=False, run_id=None, only_contact=None, export_unmatched=False, log=None):
|
|
if log is None:
|
|
log = safe_print
|
|
|
|
accounts = sync_engine.parse_accounts_csv()
|
|
brand = next(a for a in accounts if a["location_id"] == BRAND_LOCATION_ID)
|
|
brand_token = brand["token"]
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
brand_field_id = resolve_contact_link_field_id(conn, BRAND_LOCATION_ID)
|
|
if not brand_field_id:
|
|
raise SystemExit("No se encontro el field_id de ID Contacto Sucursal en Marca. Resync schemas o crea el campo.")
|
|
|
|
log(f"CF id_contacto_sucursal en Marca: field_id={brand_field_id}")
|
|
|
|
unfilled = load_brand_unfilled(conn, brand_field_id)
|
|
if only_contact:
|
|
unfilled = [r for r in unfilled if r["id"] == only_contact]
|
|
log(f"Contactos Marca con CF vacio: {len(unfilled)}")
|
|
|
|
branch_rows = load_branch_contacts(conn)
|
|
log(f"Contactos sucursales indexados: {len(branch_rows)}")
|
|
by_phone, by_email = index_branches(branch_rows)
|
|
|
|
plans = []
|
|
for r in unfilled:
|
|
plan = plan_match(r, by_phone, by_email)
|
|
plans.append({"brand": dict(r), "plan": plan})
|
|
|
|
render_summary(plans, log)
|
|
render_examples(plans, log)
|
|
|
|
snap = snapshot_run(plans, run_id, dry_run=not apply)
|
|
log(f"\nSnapshot: {snap}")
|
|
|
|
if export_unmatched:
|
|
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
export_csv(plans, {"no_match", "no_data", "phone_collision", "match_multi"},
|
|
f"brand_unmatched_{ts}.csv", log)
|
|
|
|
if not apply:
|
|
log("\nDRY-RUN. Para aplicar: --apply --run-id <uuid>")
|
|
return {"plans": plans, "snapshot": snap}
|
|
|
|
matchables = [p for p in plans if p["plan"]["status"] in ("match_unique", "match_multi")]
|
|
log(f"\nAplicando {len(matchables)} matches en Marca...")
|
|
if run_id:
|
|
script_audit.create_run(
|
|
run_id, "backfill_brand_contact_id_sucursal.py",
|
|
arguments=f"matches:{len(matchables)} apply",
|
|
locations=[BRAND_LOCATION_ID])
|
|
|
|
stats = {"applied": 0, "errors": 0, "skipped": 0}
|
|
for item in plans:
|
|
if item["plan"]["status"] not in ("match_unique", "match_multi"):
|
|
stats["skipped"] += 1
|
|
continue
|
|
if run_id and not script_audit.wait_if_paused_or_stopped(run_id):
|
|
log("Detencion solicitada. Saliendo.")
|
|
break
|
|
r = apply_match(item, brand_field_id, brand_token, run_id, log)
|
|
if r["applied"]:
|
|
stats["applied"] += 1
|
|
else:
|
|
stats["errors"] += 1
|
|
|
|
if run_id:
|
|
script_audit.update_run_status(
|
|
run_id,
|
|
"completed" if stats["errors"] == 0 else "failed",
|
|
f"errors={stats['errors']}" if stats["errors"] else None)
|
|
|
|
log(f"\nResumen: applied={stats['applied']} errors={stats['errors']} skipped={stats['skipped']}")
|
|
return {"plans": plans, "snapshot": snap, "stats": stats}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
parser.add_argument("--apply", action="store_true", help="Aplica. Sin esto: dry-run.")
|
|
parser.add_argument("--run-id", help="ID para script_audit.")
|
|
parser.add_argument("--only-contact", help="Filtrar a un solo contact_id Marca.")
|
|
parser.add_argument("--export-unmatched", action="store_true",
|
|
help="Exportar CSV con los que no se pudieron resolver.")
|
|
args = parser.parse_args()
|
|
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
|
|
run_id = args.run_id
|
|
if args.apply and not run_id:
|
|
run_id = str(uuid.uuid4())
|
|
safe_print(f"[info] run_id autogenerado: {run_id}")
|
|
|
|
run(apply=args.apply, run_id=run_id, only_contact=args.only_contact,
|
|
export_unmatched=args.export_unmatched, log=safe_print)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|