Primer commit
This commit is contained in:
@@ -0,0 +1,404 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Backfill del campo 'ID Contacto Sucursal' (contact.id_contacto_sucursal)
|
||||
en los contactos de Marca que aun lo tienen vacio.
|
||||
|
||||
Se ejecuta despues de haber poblado el mismo campo en todas las sucursales con
|
||||
`fill_contact_id_sucursal.py`. Para cada contacto Marca sin el campo, busca su
|
||||
contraparte en sucursal usando la heuristica clasica (phone + email + name) y,
|
||||
si encuentra match unico, pone el id del contacto sucursal en el CF de Marca.
|
||||
|
||||
Solo escribe en Marca. Las sucursales no se tocan.
|
||||
|
||||
Estrategia de match (`scripts.common.match_contacts`):
|
||||
- 'strong' : phone, email y name (>=0.80 similitud) coinciden.
|
||||
- 'medium' : phone y name coinciden (email puede faltar).
|
||||
- cualquier otro se descarta.
|
||||
Cuando varios contactos de sucursal matchean al mismo contacto Marca (caso
|
||||
cross-branch duplicates) el desempate elige el de `dateAdded` mas antiguo.
|
||||
|
||||
Modos:
|
||||
- dry-run (default): no escribe nada en GHL.
|
||||
- --apply --run-id <uuid>: aplica, registra cada cambio en script_audit.
|
||||
|
||||
Uso:
|
||||
python scripts/backfill_brand_contact_id_sucursal.py
|
||||
python scripts/backfill_brand_contact_id_sucursal.py --apply --run-id <uuid>
|
||||
python scripts/backfill_brand_contact_id_sucursal.py --only-contact <brand_id>
|
||||
python scripts/backfill_brand_contact_id_sucursal.py --export-unmatched
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
import uuid
|
||||
import warnings
|
||||
from collections import defaultdict
|
||||
|
||||
warnings.filterwarnings("ignore", message=r"urllib3 .* doesn't match a supported version!")
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if ROOT_DIR not in sys.path:
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
if SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, SCRIPTS_DIR)
|
||||
|
||||
import sync_engine # noqa: E402
|
||||
import script_audit # noqa: E402
|
||||
from paths import DB_PATH, MIGRATIONS_DIR, EXPORTS_DIR # noqa: E402
|
||||
from common import match_contacts, normalize_phone, normalize_email # noqa: E402
|
||||
from audit_brand_vs_branches_totals import ( # noqa: E402
|
||||
resolve_contact_link_field_id,
|
||||
extract_contact_link_value,
|
||||
)
|
||||
|
||||
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
|
||||
DEMO_LOCATION_IDS = {"Vf7qQl3L9vakJ8hDtQ8e", "Z64WQKORPVwXb5mn68Ef"}
|
||||
CF_KEY = "contact.id_contacto_sucursal"
|
||||
|
||||
gc = sync_engine.ghl_client
|
||||
|
||||
|
||||
def safe_print(*args, **kwargs):
|
||||
text = " ".join(str(a) for a in args)
|
||||
try:
|
||||
sys.stdout.write(text + "\n")
|
||||
sys.stdout.flush()
|
||||
except UnicodeEncodeError:
|
||||
enc = sys.stdout.encoding or "utf-8"
|
||||
sys.stdout.write(text.encode(enc, errors="replace").decode(enc) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def row_to_contact(row):
|
||||
"""SQLite row -> dict con shape compatible con match_contacts."""
|
||||
return {
|
||||
"id": row["id"],
|
||||
"location_id": row["location_id"],
|
||||
"phone": row["phone"],
|
||||
"email": row["email"],
|
||||
"firstName": row["first_name"],
|
||||
"lastName": row["last_name"],
|
||||
"dateAdded": row["date_added"],
|
||||
}
|
||||
|
||||
|
||||
def load_brand_unfilled(conn, brand_field_id):
|
||||
"""Contactos Marca con CF id_contacto_sucursal vacio."""
|
||||
rows = conn.execute(
|
||||
"SELECT id, location_id, first_name, last_name, email, phone, date_added, custom_fields_json "
|
||||
"FROM contacts WHERE location_id=?",
|
||||
(BRAND_LOCATION_ID,),
|
||||
).fetchall()
|
||||
out = []
|
||||
for r in rows:
|
||||
val = extract_contact_link_value(r["custom_fields_json"], brand_field_id)
|
||||
if not val:
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
|
||||
def load_branch_contacts(conn):
|
||||
"""Todos los contactos de sucursales (sin demos)."""
|
||||
placeholders = ",".join("?" for _ in DEMO_LOCATION_IDS)
|
||||
rows = conn.execute(
|
||||
f"SELECT id, location_id, first_name, last_name, email, phone, date_added "
|
||||
f"FROM contacts WHERE location_id != ? AND location_id NOT IN ({placeholders})",
|
||||
(BRAND_LOCATION_ID, *DEMO_LOCATION_IDS),
|
||||
).fetchall()
|
||||
return rows
|
||||
|
||||
|
||||
def index_branches(rows):
|
||||
"""Indices por phone normalizado y por email normalizado para evitar O(n*m)."""
|
||||
by_phone = defaultdict(list)
|
||||
by_email = defaultdict(list)
|
||||
for r in rows:
|
||||
c = row_to_contact(r)
|
||||
ph = normalize_phone(c.get("phone") or "")
|
||||
em = normalize_email(c.get("email") or "")
|
||||
if ph:
|
||||
by_phone[ph].append(c)
|
||||
if em:
|
||||
by_email[em].append(c)
|
||||
return by_phone, by_email
|
||||
|
||||
|
||||
def pick_oldest(candidates):
|
||||
"""Tie-break por dateAdded mas antiguo (sucursal con mas historial)."""
|
||||
def ts(c):
|
||||
d = c.get("dateAdded") or ""
|
||||
try:
|
||||
return datetime.datetime.fromisoformat(d.replace("Z", "+00:00")).timestamp()
|
||||
except Exception:
|
||||
return float("inf")
|
||||
return min(candidates, key=ts)
|
||||
|
||||
|
||||
def plan_match(brand_row, by_phone, by_email):
|
||||
"""Decide el match para un contacto Marca.
|
||||
|
||||
Devuelve dict con status y datos. Status:
|
||||
- 'match_unique' : 1 sucursal candidata o varias que coinciden en id.
|
||||
- 'match_multi' : varias sucursales con ids distintos. Se elige la mas antigua.
|
||||
- 'no_data' : el contacto Marca no tiene phone ni email -> no se puede matchear.
|
||||
- 'phone_collision' : phone coincide pero name no >=0.80 con ninguno (riesgo de pareja).
|
||||
- 'no_match' : sin candidatos validos (probable Marca-only legitimo).
|
||||
"""
|
||||
brand_c = row_to_contact(brand_row)
|
||||
ph = normalize_phone(brand_c.get("phone") or "")
|
||||
em = normalize_email(brand_c.get("email") or "")
|
||||
|
||||
if not ph and not em:
|
||||
return {"status": "no_data", "candidates": []}
|
||||
|
||||
candidates_raw = []
|
||||
if ph:
|
||||
candidates_raw.extend(by_phone.get(ph, []))
|
||||
if em:
|
||||
for c in by_email.get(em, []):
|
||||
if c not in candidates_raw:
|
||||
candidates_raw.append(c)
|
||||
|
||||
if not candidates_raw:
|
||||
return {"status": "no_match", "candidates": []}
|
||||
|
||||
matches = []
|
||||
phone_collisions = []
|
||||
for cand in candidates_raw:
|
||||
r = match_contacts(brand_c, cand)
|
||||
if r["level"] in ("strong", "medium"):
|
||||
matches.append((cand, r))
|
||||
elif "phone_collision_unresolved" in r["reasons"]:
|
||||
phone_collisions.append((cand, r))
|
||||
|
||||
if not matches:
|
||||
if phone_collisions:
|
||||
return {"status": "phone_collision", "candidates": [c for c, _ in phone_collisions]}
|
||||
return {"status": "no_match", "candidates": []}
|
||||
|
||||
ids = {c["id"] for c, _ in matches}
|
||||
if len(ids) == 1:
|
||||
chosen = matches[0][0]
|
||||
return {"status": "match_unique",
|
||||
"chosen": chosen,
|
||||
"level": matches[0][1]["level"],
|
||||
"all_matches": matches}
|
||||
|
||||
# Multi-match: tie-break por dateAdded mas antiguo.
|
||||
chosen = pick_oldest([c for c, _ in matches])
|
||||
return {"status": "match_multi",
|
||||
"chosen": chosen,
|
||||
"all_matches": matches}
|
||||
|
||||
|
||||
def render_summary(plans, log):
|
||||
counts = defaultdict(int)
|
||||
for p in plans:
|
||||
counts[p["plan"]["status"]] += 1
|
||||
log("\n=== Resumen del plan ===")
|
||||
log(f" match_unique : {counts['match_unique']}")
|
||||
log(f" match_multi : {counts['match_multi']}")
|
||||
log(f" phone_collision : {counts['phone_collision']}")
|
||||
log(f" no_match : {counts['no_match']}")
|
||||
log(f" no_data : {counts['no_data']}")
|
||||
|
||||
|
||||
def render_examples(plans, log, n=6):
|
||||
by_status = defaultdict(list)
|
||||
for p in plans:
|
||||
by_status[p["plan"]["status"]].append(p)
|
||||
for status in ("match_unique", "match_multi", "phone_collision", "no_match", "no_data"):
|
||||
items = by_status.get(status, [])
|
||||
if not items:
|
||||
continue
|
||||
log(f"\n [{status}] ejemplos:")
|
||||
for item in items[:n]:
|
||||
brand = item["brand"]
|
||||
name = f"{brand['first_name'] or ''} {brand['last_name'] or ''}".strip() or "(sin nombre)"
|
||||
line = f" - {name!r:40} brand={brand['id']} phone={brand['phone']!r} email={brand['email']!r}"
|
||||
if status in ("match_unique", "match_multi"):
|
||||
chosen = item["plan"]["chosen"]
|
||||
line += f" -> sucursal={chosen['id']} ({chosen['location_id']})"
|
||||
log(line)
|
||||
if len(items) > n:
|
||||
log(f" ... y {len(items)-n} más")
|
||||
|
||||
|
||||
def export_csv(plans, status_filter, filename, log):
|
||||
path = os.path.join(EXPORTS_DIR, filename)
|
||||
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8", newline="") as fh:
|
||||
w = csv.writer(fh)
|
||||
w.writerow(["status", "brand_id", "brand_name", "brand_phone", "brand_email",
|
||||
"branch_id", "branch_location_id", "branch_name", "branch_date_added"])
|
||||
for p in plans:
|
||||
if p["plan"]["status"] not in status_filter:
|
||||
continue
|
||||
b = p["brand"]
|
||||
name = f"{b['first_name'] or ''} {b['last_name'] or ''}".strip()
|
||||
chosen = p["plan"].get("chosen") or {}
|
||||
cname = f"{chosen.get('firstName') or ''} {chosen.get('lastName') or ''}".strip()
|
||||
w.writerow([p["plan"]["status"], b["id"], name, b["phone"] or "", b["email"] or "",
|
||||
chosen.get("id", ""), chosen.get("location_id", ""), cname,
|
||||
chosen.get("dateAdded", "")])
|
||||
log(f" Export CSV: {path}")
|
||||
return path
|
||||
|
||||
|
||||
def apply_match(plan, brand_field_id, brand_token, run_id, log):
|
||||
"""Aplica el match: PUT al contacto Marca con el CF poblado."""
|
||||
p = plan["plan"]
|
||||
brand = plan["brand"]
|
||||
if p["status"] not in ("match_unique", "match_multi"):
|
||||
return {"applied": False, "reason": p["status"]}
|
||||
chosen_id = p["chosen"]["id"]
|
||||
change_id = None
|
||||
if run_id:
|
||||
change_id = script_audit.record_change(
|
||||
run_id, BRAND_LOCATION_ID, "contact",
|
||||
brand["id"], brand_field_id, "id_contacto_sucursal",
|
||||
{"value": None}, {"value": chosen_id})
|
||||
try:
|
||||
gc._request("PUT", f"/contacts/{brand['id']}", brand_token, json={
|
||||
"customFields": [{"id": brand_field_id, "key": CF_KEY, "field_value": chosen_id}]
|
||||
})
|
||||
if change_id:
|
||||
script_audit.mark_change(change_id, "applied")
|
||||
return {"applied": True}
|
||||
except Exception as exc:
|
||||
if change_id:
|
||||
script_audit.mark_change(change_id, "failed", str(exc))
|
||||
log(f" ✗ Error en {brand['id']}: {exc}")
|
||||
return {"applied": False, "error": str(exc)}
|
||||
|
||||
|
||||
def snapshot_run(plans, run_id, dry_run):
|
||||
os.makedirs(MIGRATIONS_DIR, exist_ok=True)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
path = os.path.join(MIGRATIONS_DIR, f"backfill_brand_contact_id_sucursal_{ts}.json")
|
||||
serial = []
|
||||
for item in plans:
|
||||
b = dict(item["brand"])
|
||||
b.pop("custom_fields_json", None) # ruido
|
||||
p = dict(item["plan"])
|
||||
if "all_matches" in p:
|
||||
p["all_matches"] = [{"id": c["id"], "location_id": c["location_id"]} for c, _ in p["all_matches"]]
|
||||
if "candidates" in p:
|
||||
p["candidates"] = [{"id": c["id"], "location_id": c["location_id"]} for c in p["candidates"]]
|
||||
if "chosen" in p:
|
||||
p["chosen"] = {k: v for k, v in p["chosen"].items() if k in ("id", "location_id", "phone", "email", "firstName", "lastName", "dateAdded")}
|
||||
serial.append({"brand": b, "plan": p})
|
||||
with open(path, "w", encoding="utf-8") as fh:
|
||||
json.dump({"run_id": run_id, "dry_run": dry_run,
|
||||
"timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
||||
"count": len(plans), "plans": serial},
|
||||
fh, ensure_ascii=False, indent=2, default=str)
|
||||
return path
|
||||
|
||||
|
||||
def run(apply=False, run_id=None, only_contact=None, export_unmatched=False, log=None):
|
||||
if log is None:
|
||||
log = safe_print
|
||||
|
||||
accounts = sync_engine.parse_accounts_csv()
|
||||
brand = next(a for a in accounts if a["location_id"] == BRAND_LOCATION_ID)
|
||||
brand_token = brand["token"]
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
brand_field_id = resolve_contact_link_field_id(conn, BRAND_LOCATION_ID)
|
||||
if not brand_field_id:
|
||||
raise SystemExit("No se encontro el field_id de ID Contacto Sucursal en Marca. Resync schemas o crea el campo.")
|
||||
|
||||
log(f"CF id_contacto_sucursal en Marca: field_id={brand_field_id}")
|
||||
|
||||
unfilled = load_brand_unfilled(conn, brand_field_id)
|
||||
if only_contact:
|
||||
unfilled = [r for r in unfilled if r["id"] == only_contact]
|
||||
log(f"Contactos Marca con CF vacio: {len(unfilled)}")
|
||||
|
||||
branch_rows = load_branch_contacts(conn)
|
||||
log(f"Contactos sucursales indexados: {len(branch_rows)}")
|
||||
by_phone, by_email = index_branches(branch_rows)
|
||||
|
||||
plans = []
|
||||
for r in unfilled:
|
||||
plan = plan_match(r, by_phone, by_email)
|
||||
plans.append({"brand": dict(r), "plan": plan})
|
||||
|
||||
render_summary(plans, log)
|
||||
render_examples(plans, log)
|
||||
|
||||
snap = snapshot_run(plans, run_id, dry_run=not apply)
|
||||
log(f"\nSnapshot: {snap}")
|
||||
|
||||
if export_unmatched:
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
export_csv(plans, {"no_match", "no_data", "phone_collision", "match_multi"},
|
||||
f"brand_unmatched_{ts}.csv", log)
|
||||
|
||||
if not apply:
|
||||
log("\nDRY-RUN. Para aplicar: --apply --run-id <uuid>")
|
||||
return {"plans": plans, "snapshot": snap}
|
||||
|
||||
matchables = [p for p in plans if p["plan"]["status"] in ("match_unique", "match_multi")]
|
||||
log(f"\nAplicando {len(matchables)} matches en Marca...")
|
||||
if run_id:
|
||||
script_audit.create_run(
|
||||
run_id, "backfill_brand_contact_id_sucursal.py",
|
||||
arguments=f"matches:{len(matchables)} apply",
|
||||
locations=[BRAND_LOCATION_ID])
|
||||
|
||||
stats = {"applied": 0, "errors": 0, "skipped": 0}
|
||||
for item in plans:
|
||||
if item["plan"]["status"] not in ("match_unique", "match_multi"):
|
||||
stats["skipped"] += 1
|
||||
continue
|
||||
if run_id and not script_audit.wait_if_paused_or_stopped(run_id):
|
||||
log("Detencion solicitada. Saliendo.")
|
||||
break
|
||||
r = apply_match(item, brand_field_id, brand_token, run_id, log)
|
||||
if r["applied"]:
|
||||
stats["applied"] += 1
|
||||
else:
|
||||
stats["errors"] += 1
|
||||
|
||||
if run_id:
|
||||
script_audit.update_run_status(
|
||||
run_id,
|
||||
"completed" if stats["errors"] == 0 else "failed",
|
||||
f"errors={stats['errors']}" if stats["errors"] else None)
|
||||
|
||||
log(f"\nResumen: applied={stats['applied']} errors={stats['errors']} skipped={stats['skipped']}")
|
||||
return {"plans": plans, "snapshot": snap, "stats": stats}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
||||
parser.add_argument("--apply", action="store_true", help="Aplica. Sin esto: dry-run.")
|
||||
parser.add_argument("--run-id", help="ID para script_audit.")
|
||||
parser.add_argument("--only-contact", help="Filtrar a un solo contact_id Marca.")
|
||||
parser.add_argument("--export-unmatched", action="store_true",
|
||||
help="Exportar CSV con los que no se pudieron resolver.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if hasattr(sys.stdout, "reconfigure"):
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
run_id = args.run_id
|
||||
if args.apply and not run_id:
|
||||
run_id = str(uuid.uuid4())
|
||||
safe_print(f"[info] run_id autogenerado: {run_id}")
|
||||
|
||||
run(apply=args.apply, run_id=run_id, only_contact=args.only_contact,
|
||||
export_unmatched=args.export_unmatched, log=safe_print)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user