Files
MP-Manager/scripts/audit_contact_sync_coverage.py
T
2026-05-30 14:31:19 -06:00

343 lines
13 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Reporte read-only: cuantifica que campos pierde hoy sync_contacts_branch_to_brand.
Compara, por sucursal, el esquema de contactos de la sucursal contra el esquema
de la Marca Principal, y samplea contactos vivos para contar cuantos registros
tienen valor poblado en cada categoria:
- campos estandar que el sync ya copia (firstName/lastName/email/phone)
- campos estandar que el sync NO copia (tags, source, address, dateOfBirth, ...)
- custom fields con match literal entre sucursal y Marca (sync los copia hoy)
- custom fields con match solo por alias/normalizacion (sync los PIERDE hoy)
- custom fields sin contraparte en Marca (sync no puede copiarlos)
No escribe en GHL. No depende de mp_manager.sqlite (consulta live por API).
"""
import argparse
import json
import os
import sys
import unicodedata
from collections import defaultdict
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT_DIR not in sys.path:
sys.path.insert(0, ROOT_DIR)
import sync_engine # noqa: E402
from common import BRAND_LOCATION_ID, FIELD_ALIASES # noqa: E402
STANDARD_FIELDS_SYNCED_TODAY = ["firstName", "lastName", "email", "phone"]
STANDARD_FIELDS_DROPPED_TODAY = [
"name", "address1", "city", "state", "country", "postalCode",
"dateOfBirth", "companyName", "website", "timezone",
"source", "type", "assignedTo", "dnd",
]
LIST_FIELDS_DROPPED_TODAY = ["tags", "additionalEmails", "additionalPhones"]
CUSTOM_VALUE_KEYS = (
"value", "fieldValueString", "fieldValueDate",
"fieldValueNumber", "fieldValueArray",
"fieldValueOptions", "fieldValueFile",
)
def norm_field_name(name):
text = unicodedata.normalize("NFKD", str(name or "").strip().lower())
text = "".join(ch for ch in text if not unicodedata.combining(ch))
return " ".join(text.split())
def build_alias_lookup():
"""Devuelve dict normalizado -> nombre canonico para los aliases del manual."""
lookup = {}
for canonical, variants in FIELD_ALIASES.items():
for variant in variants:
lookup[norm_field_name(variant)] = canonical
return lookup
def classify_custom_fields(branch_schema, brand_schema, alias_lookup):
"""Para cada campo de la sucursal, decide en que bucket de cobertura cae."""
brand_norm = {norm_field_name(name): name for name in brand_schema}
brand_alias_keys = {alias_lookup.get(norm_field_name(name)) for name in brand_schema}
brand_alias_keys.discard(None)
classification = {}
for field_name in branch_schema:
if field_name in brand_schema:
classification[field_name] = ("exact_match", field_name)
continue
norm = norm_field_name(field_name)
if norm in brand_norm:
classification[field_name] = ("alias_match_only", brand_norm[norm])
continue
canonical = alias_lookup.get(norm)
if canonical and canonical in brand_alias_keys:
for brand_name in brand_schema:
if alias_lookup.get(norm_field_name(brand_name)) == canonical:
classification[field_name] = ("alias_match_only", brand_name)
break
continue
classification[field_name] = ("no_match", None)
return classification
def custom_field_has_value(field):
for key in CUSTOM_VALUE_KEYS:
if key not in field:
continue
value = field[key]
if value is None:
continue
if isinstance(value, str) and not value.strip():
continue
if isinstance(value, (list, dict)) and not value:
continue
return True
return False
def standard_field_has_value(contact, field_name):
value = contact.get(field_name)
if value is None:
return False
if isinstance(value, str):
return bool(value.strip())
if isinstance(value, (list, dict)):
return bool(value)
return True
def scan_branch(account, brand_schema, alias_lookup, max_contacts):
location_id = account["location_id"]
token = account["token"]
branch_name = account.get("nombre") or location_id
print(f"\n--- {branch_name} ({location_id}) ---")
branch_schema = sync_engine.ghl_client.get_object_schema(token, location_id, "contact")
if not branch_schema:
print(" WARN: no se pudo leer el schema de contacto de la sucursal.")
return None
classification = classify_custom_fields(branch_schema, brand_schema, alias_lookup)
branch_id_to_name = {fid: name for name, fid in branch_schema.items()}
contacts = sync_engine.ghl_client.get_all_contacts(token, location_id, max_contacts=max_contacts)
total_contacts = len(contacts)
print(f" Contactos sampleados: {total_contacts}")
standard_synced_hits = defaultdict(int)
standard_dropped_hits = defaultdict(int)
list_dropped_hits = defaultdict(int)
custom_hits = defaultdict(int)
for contact in contacts:
for fname in STANDARD_FIELDS_SYNCED_TODAY:
if standard_field_has_value(contact, fname):
standard_synced_hits[fname] += 1
for fname in STANDARD_FIELDS_DROPPED_TODAY:
if standard_field_has_value(contact, fname):
standard_dropped_hits[fname] += 1
for fname in LIST_FIELDS_DROPPED_TODAY:
value = contact.get(fname)
if isinstance(value, list) and value:
list_dropped_hits[fname] += 1
for field in contact.get("customFields", []) or []:
if not isinstance(field, dict):
continue
fid = field.get("id") or field.get("fieldId")
name = branch_id_to_name.get(fid)
if not name:
continue
if custom_field_has_value(field):
custom_hits[name] += 1
buckets = {"exact_match": [], "alias_match_only": [], "no_match": []}
for field_name, (bucket, brand_name) in classification.items():
buckets[bucket].append({
"branch_field": field_name,
"brand_field": brand_name,
"contacts_with_value": custom_hits.get(field_name, 0),
})
for bucket in buckets:
buckets[bucket].sort(key=lambda row: row["contacts_with_value"], reverse=True)
print(f" Custom fields: {len(buckets['exact_match'])} match exacto, "
f"{len(buckets['alias_match_only'])} solo por alias (perdidos hoy), "
f"{len(buckets['no_match'])} sin contraparte en Marca.")
if buckets["alias_match_only"]:
print(" Campos perdidos por mismatch de nombre (top 5 con datos):")
for row in buckets["alias_match_only"][:5]:
print(f" - {row['branch_field']!r} -> Marca {row['brand_field']!r} "
f"({row['contacts_with_value']}/{total_contacts} contactos con valor)")
if buckets["no_match"]:
top_no_match = [r for r in buckets["no_match"] if r["contacts_with_value"]][:5]
if top_no_match:
print(" Campos sin contraparte en Marca (top 5 con datos):")
for row in top_no_match:
print(f" - {row['branch_field']!r} "
f"({row['contacts_with_value']}/{total_contacts} contactos con valor)")
standard_dropped_with_data = {k: v for k, v in standard_dropped_hits.items() if v}
list_dropped_with_data = {k: v for k, v in list_dropped_hits.items() if v}
if standard_dropped_with_data or list_dropped_with_data:
print(" Estandar/lista no sincronizados que SI tienen datos:")
for k, v in sorted(standard_dropped_with_data.items(), key=lambda x: x[1], reverse=True):
print(f" - {k}: {v}/{total_contacts}")
for k, v in sorted(list_dropped_with_data.items(), key=lambda x: x[1], reverse=True):
print(f" - {k} (lista): {v}/{total_contacts}")
return {
"location_id": location_id,
"branch_name": branch_name,
"total_contacts": total_contacts,
"standard_synced_hits": dict(standard_synced_hits),
"standard_dropped_hits": dict(standard_dropped_hits),
"list_dropped_hits": dict(list_dropped_hits),
"custom_field_buckets": buckets,
}
def aggregate(report_rows):
agg_standard_dropped = defaultdict(int)
agg_list_dropped = defaultdict(int)
agg_alias_only = defaultdict(lambda: {"contacts": 0, "branches": 0, "brand_field": None})
agg_no_match = defaultdict(lambda: {"contacts": 0, "branches": 0})
total_contacts = 0
for row in report_rows:
if not row:
continue
total_contacts += row["total_contacts"]
for k, v in row["standard_dropped_hits"].items():
agg_standard_dropped[k] += v
for k, v in row["list_dropped_hits"].items():
agg_list_dropped[k] += v
for entry in row["custom_field_buckets"]["alias_match_only"]:
key = entry["branch_field"]
agg_alias_only[key]["contacts"] += entry["contacts_with_value"]
agg_alias_only[key]["branches"] += 1
agg_alias_only[key]["brand_field"] = entry["brand_field"]
for entry in row["custom_field_buckets"]["no_match"]:
if entry["contacts_with_value"]:
key = entry["branch_field"]
agg_no_match[key]["contacts"] += entry["contacts_with_value"]
agg_no_match[key]["branches"] += 1
return {
"total_contacts": total_contacts,
"standard_dropped": dict(agg_standard_dropped),
"list_dropped": dict(agg_list_dropped),
"alias_only": dict(agg_alias_only),
"no_match": dict(agg_no_match),
}
def print_global(summary):
print("\n" + "=" * 78)
print("RESUMEN GLOBAL")
print("=" * 78)
print(f"Contactos sampleados (suma): {summary['total_contacts']}")
standard = sorted(summary["standard_dropped"].items(), key=lambda x: x[1], reverse=True)
if standard:
print("\nCampos estandar perdidos hoy por el sync (contactos con valor, total bulk):")
for name, count in standard:
if count:
print(f" - {name}: {count}")
lists = sorted(summary["list_dropped"].items(), key=lambda x: x[1], reverse=True)
if lists:
print("\nCampos de lista perdidos hoy (tags/emails/phones secundarios):")
for name, count in lists:
if count:
print(f" - {name}: {count}")
alias = sorted(summary["alias_only"].items(), key=lambda x: x[1]["contacts"], reverse=True)
if alias:
print("\nCustom fields perdidos por mismatch de nombre (arreglables con aliases):")
for branch_field, info in alias[:20]:
print(f" - {branch_field!r} -> Marca {info['brand_field']!r}: "
f"{info['contacts']} contactos en {info['branches']} sucursal(es)")
no_match = sorted(summary["no_match"].items(), key=lambda x: x[1]["contacts"], reverse=True)
if no_match:
print("\nCustom fields sin contraparte en Marca (requieren crearlos en Marca primero):")
for branch_field, info in no_match[:20]:
print(f" - {branch_field!r}: {info['contacts']} contactos en {info['branches']} sucursal(es)")
def select_accounts(args, accounts):
if args.location:
match = [a for a in accounts if a["location_id"] == args.location]
if not match:
raise SystemExit(f"Location {args.location} no existe en el CSV de tokens")
return match
return [a for a in accounts
if a["location_id"] != BRAND_LOCATION_ID
and "demo" not in (a.get("nombre") or "").lower()]
def parse_args():
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument("--location", help="Auditar una sola sucursal por location_id")
parser.add_argument("--max-contacts", type=int, default=500,
help="Contactos a samplear por sucursal. Default 500. Subir para mayor precision.")
parser.add_argument("--json", dest="json_path",
help="Ruta opcional para volcar el reporte completo en JSON.")
return parser.parse_args()
def main():
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
args = parse_args()
accounts = sync_engine.parse_accounts_csv()
brand_account = next((a for a in accounts if a["location_id"] == BRAND_LOCATION_ID), None)
if not brand_account:
raise SystemExit("No se encontro la cuenta de Marca Principal en el CSV de tokens")
branches = select_accounts(args, accounts)
print("=" * 78)
print("AUDIT: COBERTURA DE SINCRONIZACION DE CONTACTOS SUCURSAL -> MARCA")
print("=" * 78)
print(f"Sucursales a auditar: {len(branches)}")
print(f"Sample por sucursal: {args.max_contacts} contactos")
print("\nCargando schema de contactos de Marca...")
brand_schema = sync_engine.ghl_client.get_object_schema(
brand_account["token"], BRAND_LOCATION_ID, "contact"
)
if not brand_schema:
raise SystemExit("No se pudo leer el schema de Marca; abortando.")
print(f"Schema de Marca: {len(brand_schema)} campos custom mapeados por nombre.")
alias_lookup = build_alias_lookup()
rows = []
for account in branches:
rows.append(scan_branch(account, brand_schema, alias_lookup, args.max_contacts))
summary = aggregate(rows)
print_global(summary)
if args.json_path:
payload = {"per_branch": rows, "summary": summary}
with open(args.json_path, "w", encoding="utf-8") as fh:
json.dump(payload, fh, ensure_ascii=False, indent=2)
print(f"\nReporte JSON volcado en: {args.json_path}")
if __name__ == "__main__":
main()