1606 lines
71 KiB
Python
1606 lines
71 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""audit_brand_vs_branches_totals.py
|
|
|
|
Comparativa de conteos totales entre la cuenta de Marca Principal (Monte
|
|
Providencia) y la suma de TODAS las sucursales. Las cuentas demo se excluyen
|
|
automaticamente (nombre contiene 'demo', case-insensitive).
|
|
|
|
Lo que produce:
|
|
|
|
- Totales agregados (contactos y oportunidades) Marca vs Sucursales.
|
|
- Desglose por sucursal con el conteo local.
|
|
- Listado de contactos en sucursal que no estan en Marca.
|
|
- Listado de contactos en Marca que no estan en la sucursal que les
|
|
corresponde segun el verificador (campo TIENDA del contacto Marca cruzado
|
|
con la columna TIENDA del verificador y la columna ID LOCATION BUCEFALO).
|
|
- Listado de oportunidades en sucursal sin contraparte en Marca.
|
|
|
|
Es read-only sobre `mp_manager.sqlite`. No toca GHL. La logica vive en
|
|
`run_audit()` para que el endpoint del dashboard la reutilice.
|
|
|
|
Uso CLI:
|
|
python scripts/audit_brand_vs_branches_totals.py
|
|
python scripts/audit_brand_vs_branches_totals.py --show-missing
|
|
python scripts/audit_brand_vs_branches_totals.py --json
|
|
python scripts/audit_brand_vs_branches_totals.py --limit-missing 100
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
import sys
|
|
import unicodedata
|
|
from collections import defaultdict
|
|
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if ROOT_DIR not in sys.path:
|
|
sys.path.insert(0, ROOT_DIR)
|
|
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
if SCRIPTS_DIR not in sys.path:
|
|
sys.path.insert(0, SCRIPTS_DIR)
|
|
|
|
from paths import DB_PATH
|
|
from common import match_contacts as _match_contacts
|
|
VERIFIER_CSV = os.path.join(
|
|
ROOT_DIR, "Monte Providencia - Verificador de sucursales y correos - Sucursales.csv"
|
|
)
|
|
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
|
|
MATCH_THRESHOLD = 0.80
|
|
|
|
# Consolidacion de hubs digitales. Algunas sucursales "shell" (fisicas / no
|
|
# digitales) no reciben leads digitales: estos viven en la location "hub" que
|
|
# las absorbe. El Verificador CSV no siempre tiene la fila digital -> hub para
|
|
# cada tienda (p.ej. TIENDA=METEPEC solo mapea a 85937, que esta vacia, mientras
|
|
# los leads viven en Pilares 85935). Sin este mapa, esos contactos aparecen como
|
|
# "presente en otra sucursal, no la asignada" -> ruido masivo de falsos positivos.
|
|
# Documentado en la memoria `verificador_tipo_de_tienda_colapso` y en Baserow 750
|
|
# (cluster Toluca / Metepec / Lerma -> Pilares 85935).
|
|
PILARES_HUB_LOC = "uZnMH5bO6MXTHcgHeyq9" # 85935 - MP - Pilares (hub digital)
|
|
DIGITAL_HUB_BY_SHELL = {
|
|
"NSDniGzjxotVDNa5YxqW": PILARES_HUB_LOC, # 85937 - MP - METEPEC
|
|
"Xqpdy12avIk4NFsOhPBX": PILARES_HUB_LOC, # 85941 - MP - Grand Plaza
|
|
"pMPs9M4RaGJvWwfIFVIo": PILARES_HUB_LOC, # 85941 - MP - Grand Plaza Toluca
|
|
"RLAs9sQwbW2DOwzrTMYI": PILARES_HUB_LOC, # 85939 - MP - Independencia
|
|
"UsHXqoj2l6ND7Uc7sEo2": PILARES_HUB_LOC, # 85938 - MP - SENDERO
|
|
"lWp7F6rsgTjy3voFBZ1m": PILARES_HUB_LOC, # 85935 - MP - Lerma
|
|
"clhDZ0hIllKfV0AcgW53": PILARES_HUB_LOC, # 85940 - MP - Isidro Fabela (0 contactos, NO DIGITAL)
|
|
}
|
|
|
|
DEMO_PATTERN = re.compile(r"\bdemo\b", re.IGNORECASE)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Utilidades
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def safe_print(*args, **kwargs):
|
|
sep = kwargs.get("sep", " ")
|
|
end = kwargs.get("end", "\n")
|
|
text = sep.join(str(a) for a in args)
|
|
encoding = sys.stdout.encoding or "utf-8"
|
|
try:
|
|
sys.stdout.write(text + end)
|
|
sys.stdout.flush()
|
|
except UnicodeEncodeError:
|
|
sys.stdout.write(text.encode(encoding, errors="replace").decode(encoding) + end)
|
|
sys.stdout.flush()
|
|
|
|
|
|
def strip_accents(value):
|
|
if not value:
|
|
return ""
|
|
nfkd = unicodedata.normalize("NFD", str(value))
|
|
return "".join(c for c in nfkd if unicodedata.category(c) != "Mn")
|
|
|
|
|
|
def normalize_phone(phone, last_n=10):
|
|
digits = re.sub(r"\D+", "", str(phone or ""))
|
|
return digits[-last_n:] if len(digits) >= last_n else digits
|
|
|
|
|
|
def normalize_email(email):
|
|
return str(email or "").strip().lower()
|
|
|
|
|
|
def normalize_tienda(value):
|
|
"""Normaliza nombres de tienda para matching: sin acentos, mayusculas, espacios colapsados."""
|
|
if not value:
|
|
return ""
|
|
return " ".join(strip_accents(str(value)).upper().split())
|
|
|
|
|
|
def is_demo_account(name):
|
|
return bool(name and DEMO_PATTERN.search(name))
|
|
|
|
|
|
# Detecta contactos que parecen ser de prueba: keywords sueltas (rodeadas de
|
|
# separadores no alfanuméricos) en cualquiera de los campos basicos. Quiero
|
|
# que matchee 'Juan Prueba', 'test@test.com', '+52 prueba', pero NO falsos
|
|
# positivos como 'Pruebal' o 'Contestino'.
|
|
TEST_KEYWORDS_PATTERN = re.compile(
|
|
r"(?:^|[^a-z0-9])(test|testing|prueba|pruebas)(?:$|[^a-z0-9])",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def looks_like_test_contact(c):
|
|
"""True si el nombre, email o teléfono contienen keywords de prueba."""
|
|
haystack_parts = [
|
|
c.get("first_name") or "",
|
|
c.get("last_name") or "",
|
|
c.get("email") or "",
|
|
c.get("phone") or "",
|
|
]
|
|
haystack = " ".join(strip_accents(p).lower() for p in haystack_parts if p)
|
|
if not haystack:
|
|
return False
|
|
return bool(TEST_KEYWORDS_PATTERN.search(haystack))
|
|
|
|
|
|
def fmt_contact(c):
|
|
name = f"{c.get('first_name') or ''} {c.get('last_name') or ''}".strip() or "Sin nombre"
|
|
return {
|
|
"id": c.get("id"),
|
|
"name": name,
|
|
"phone": c.get("phone") or "",
|
|
"email": c.get("email") or "",
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Carga de datos
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def load_verifier():
|
|
"""{location_id: {tienda_norm, sucursal_label}, plus reverse index tienda_norm -> location_id}."""
|
|
by_location = {}
|
|
by_tienda = {}
|
|
if not os.path.exists(VERIFIER_CSV):
|
|
return by_location, by_tienda
|
|
with open(VERIFIER_CSV, encoding="utf-8-sig", newline="") as fh:
|
|
for row in csv.DictReader(fh):
|
|
loc = (row.get("ID LOCATION BUCEFALO") or "").strip()
|
|
if not loc:
|
|
continue
|
|
tienda_raw = (row.get("TIENDA") or "").strip()
|
|
sucursal_raw = (row.get("SUCURSAL") or "").strip()
|
|
tienda_norm = normalize_tienda(tienda_raw) if tienda_raw and tienda_raw != "-" else None
|
|
by_location[loc] = {
|
|
"tienda_raw": tienda_raw,
|
|
"tienda_norm": tienda_norm,
|
|
"sucursal": sucursal_raw if sucursal_raw and sucursal_raw != "-" else None,
|
|
}
|
|
if tienda_norm and loc != BRAND_LOCATION_ID:
|
|
# Una tienda puede aparecer en varias filas del verificador, pero
|
|
# el ID LOCATION BUCEFALO es el discriminante. Si dos sucursales
|
|
# comparten TIENDA en el CSV, nos quedamos con la primera.
|
|
by_tienda.setdefault(tienda_norm, loc)
|
|
return by_location, by_tienda
|
|
|
|
|
|
def resolve_tienda_field_id(conn, location_id):
|
|
"""Lee object_schemas para encontrar el field id de 'TIENDA' en una location."""
|
|
rows = conn.execute(
|
|
"SELECT field_id, field_name FROM object_schemas WHERE location_id=? AND object_key='contact'",
|
|
(location_id,),
|
|
).fetchall()
|
|
for r in rows:
|
|
if strip_accents(r["field_name"]).lower().strip() == "tienda":
|
|
return r["field_id"]
|
|
return None
|
|
|
|
|
|
def extract_tienda_from_custom_fields(custom_fields_json, field_id):
|
|
if not field_id or not custom_fields_json:
|
|
return None
|
|
try:
|
|
cfs = json.loads(custom_fields_json)
|
|
except Exception:
|
|
return None
|
|
if not isinstance(cfs, list):
|
|
return None
|
|
for cf in cfs:
|
|
if cf.get("id") == field_id or cf.get("fieldId") == field_id:
|
|
for key in ("value", "fieldValue", "fieldValueString"):
|
|
v = cf.get(key)
|
|
if v is not None and v != "":
|
|
return v
|
|
return None
|
|
|
|
|
|
def resolve_sucursal_field_id(conn, location_id):
|
|
"""Field id del CF 'Sucursal' en la location dada."""
|
|
rows = conn.execute(
|
|
"SELECT field_id, field_name FROM object_schemas WHERE location_id=? AND object_key='contact'",
|
|
(location_id,),
|
|
).fetchall()
|
|
for r in rows:
|
|
if strip_accents(r["field_name"]).lower().strip() == "sucursal":
|
|
return r["field_id"]
|
|
return None
|
|
|
|
|
|
# Campo de oportunidad que vincula Sucursal<->Marca (llave determinística).
|
|
OPP_LINK_FIELD_KEY = "opportunity.id_oportunidad_sucursal"
|
|
OPP_LINK_FIELD_NAME = "id oportunidad sucursal"
|
|
# Los ids nativos de GHL son exactamente 20 chars alfanuméricos.
|
|
OPP_ID_PATTERN = re.compile(r"^[A-Za-z0-9]{20}$")
|
|
|
|
|
|
def resolve_opp_link_field_id(conn, location_id):
|
|
"""Field id del CF 'ID Oportunidad Sucursal' (opportunity) en una location."""
|
|
rows = conn.execute(
|
|
"SELECT field_id, field_name, field_key FROM object_schemas "
|
|
"WHERE location_id=? AND object_key='opportunity'",
|
|
(location_id,),
|
|
).fetchall()
|
|
for r in rows:
|
|
if r["field_key"] == OPP_LINK_FIELD_KEY:
|
|
return r["field_id"]
|
|
for r in rows:
|
|
if strip_accents(r["field_name"]).lower().strip() == OPP_LINK_FIELD_NAME:
|
|
return r["field_id"]
|
|
return None
|
|
|
|
|
|
def extract_opp_link_value(custom_fields_json, field_id):
|
|
"""Valor del CF 'ID Oportunidad Sucursal' en una opp (None si vacío/ausente)."""
|
|
if not field_id or not custom_fields_json:
|
|
return None
|
|
try:
|
|
cfs = json.loads(custom_fields_json)
|
|
except Exception:
|
|
return None
|
|
if not isinstance(cfs, list):
|
|
return None
|
|
for cf in cfs:
|
|
if cf.get("id") == field_id or cf.get("fieldId") == field_id:
|
|
for key in ("value", "fieldValue", "fieldValueString"):
|
|
v = cf.get(key)
|
|
if v is not None and v != "":
|
|
return v
|
|
return None
|
|
|
|
|
|
# Campo de contacto que vincula Sucursal<->Marca (llave determinística, paralelo a opp).
|
|
CONTACT_LINK_FIELD_KEY = "contact.id_contacto_sucursal"
|
|
CONTACT_LINK_FIELD_NAME = "id contacto sucursal"
|
|
|
|
|
|
def resolve_contact_link_field_id(conn, location_id):
|
|
"""Field id del CF 'ID Contacto Sucursal' (contact) en una location."""
|
|
rows = conn.execute(
|
|
"SELECT field_id, field_name, field_key FROM object_schemas "
|
|
"WHERE location_id=? AND object_key='contact'",
|
|
(location_id,),
|
|
).fetchall()
|
|
for r in rows:
|
|
if r["field_key"] == CONTACT_LINK_FIELD_KEY:
|
|
return r["field_id"]
|
|
for r in rows:
|
|
if strip_accents(r["field_name"]).lower().strip() == CONTACT_LINK_FIELD_NAME:
|
|
return r["field_id"]
|
|
return None
|
|
|
|
|
|
def extract_contact_link_value(custom_fields_json, field_id):
|
|
"""Valor del CF 'ID Contacto Sucursal' en un contacto (None si vacío/ausente)."""
|
|
if not field_id or not custom_fields_json:
|
|
return None
|
|
try:
|
|
cfs = json.loads(custom_fields_json)
|
|
except Exception:
|
|
return None
|
|
if not isinstance(cfs, list):
|
|
return None
|
|
for cf in cfs:
|
|
if cf.get("id") == field_id or cf.get("fieldId") == field_id:
|
|
for key in ("value", "fieldValue", "fieldValueString"):
|
|
v = cf.get(key)
|
|
if v is not None and v != "":
|
|
return v
|
|
return None
|
|
|
|
|
|
# Patrones de normalizacion para matching tolerante de "Sucursal".
|
|
# Convierten abreviaturas comunes a su forma canonica antes de comparar.
|
|
SUCURSAL_ABBREV_PATTERNS = [
|
|
(re.compile(r"\bedo\.?\s*de\s*mex\.?\b"), "estado de mexico"),
|
|
(re.compile(r"\bedo\.?\s*mex\.?\b"), "estado de mexico"),
|
|
(re.compile(r"\bedomex\b"), "estado de mexico"),
|
|
(re.compile(r"\bedo\b"), "estado de mexico"),
|
|
(re.compile(r"\bcdmx\b"), "ciudad de mexico"),
|
|
(re.compile(r"\bd\.?\s*f\.?\b"), "ciudad de mexico"),
|
|
# "Cd." / "Cd " como prefijo de nombre propio (Cd. Satélite, Cd Juárez).
|
|
# Va después de \bcdmx\b para que "cdmx" no se rompa.
|
|
(re.compile(r"\bcd\.?\s+"), "ciudad "),
|
|
(re.compile(r"\bn\.?\s*l\.?\b"), "nuevo leon"),
|
|
(re.compile(r"\bqro\.?\b"), "queretaro"),
|
|
(re.compile(r"\bpue\.?\b"), "puebla"),
|
|
(re.compile(r"\bgto\.?\b"), "guanajuato"),
|
|
(re.compile(r"\bmich\.?\b"), "michoacan"),
|
|
(re.compile(r"\bmor\.?\b"), "morelos"),
|
|
(re.compile(r"\boax\.?\b"), "oaxaca"),
|
|
(re.compile(r"\bgro\.?\b"), "guerrero"),
|
|
(re.compile(r"\bhgo\.?\b"), "hidalgo"),
|
|
(re.compile(r"\btams\.?\b"), "tamaulipas"),
|
|
(re.compile(r"\btamps\.?\b"), "tamaulipas"),
|
|
(re.compile(r"\bchis\.?\b"), "chiapas"),
|
|
(re.compile(r"\bcamp\.?\b"), "campeche"),
|
|
(re.compile(r"\bq\.?\s*roo\b"), "quintana roo"),
|
|
(re.compile(r"\bcoah\.?\b"), "coahuila"),
|
|
]
|
|
|
|
|
|
def normalize_sucursal_value(value):
|
|
"""Normaliza Sucursal para matching: sin acentos, lowercase, sin puntos/comas,
|
|
abreviaturas estatales expandidas, espacios colapsados.
|
|
"""
|
|
if not value:
|
|
return ""
|
|
s = strip_accents(str(value)).lower()
|
|
s = re.sub(r"[.,]", " ", s)
|
|
s = " ".join(s.split())
|
|
for pat, rep in SUCURSAL_ABBREV_PATTERNS:
|
|
s = pat.sub(rep, s)
|
|
return " ".join(s.split())
|
|
|
|
|
|
def resolve_location_from_sucursal(sucursal_value, verifier_by_loc):
|
|
"""Intenta mapear el valor del CF Sucursal a un location_id usando matching
|
|
tolerante contra la columna SUCURSAL del verificador.
|
|
|
|
Devuelve (location_id, match_kind) o (None, None). match_kind es
|
|
"exact" o "substring" para que la UI pueda mostrar confianza.
|
|
"""
|
|
if not sucursal_value:
|
|
return None, None
|
|
target = normalize_sucursal_value(sucursal_value)
|
|
if not target:
|
|
return None, None
|
|
|
|
exact_hit = None
|
|
substring_hits = [] # (loc_id, longitud_diferencia)
|
|
for loc_id, info in verifier_by_loc.items():
|
|
if loc_id == BRAND_LOCATION_ID:
|
|
continue
|
|
ver_sucursal = info.get("sucursal")
|
|
if not ver_sucursal:
|
|
continue
|
|
ver_norm = normalize_sucursal_value(ver_sucursal)
|
|
if not ver_norm:
|
|
continue
|
|
if target == ver_norm:
|
|
exact_hit = loc_id
|
|
break
|
|
# Match tipo substring bidireccional: uno contiene al otro como subcadena.
|
|
if target in ver_norm or ver_norm in target:
|
|
substring_hits.append((loc_id, abs(len(ver_norm) - len(target))))
|
|
|
|
if exact_hit:
|
|
return exact_hit, "exact"
|
|
if len(substring_hits) == 1:
|
|
return substring_hits[0][0], "substring"
|
|
if len(substring_hits) > 1:
|
|
# Varias coincidencias: nos quedamos con la mas cercana en longitud
|
|
# solo si es claramente mejor que el segundo lugar.
|
|
substring_hits.sort(key=lambda x: x[1])
|
|
if len(substring_hits) >= 2 and substring_hits[1][1] - substring_hits[0][1] <= 1:
|
|
# Empate cerrado: ambiguo, no resolver.
|
|
return None, None
|
|
return substring_hits[0][0], "substring"
|
|
return None, None
|
|
|
|
|
|
def load_accounts_filtered(conn):
|
|
"""Devuelve {brand: dict, branches: [dicts], demos: [dicts]}."""
|
|
rows = conn.execute("SELECT location_id, nombre, type FROM accounts").fetchall()
|
|
brand = None
|
|
branches = []
|
|
demos = []
|
|
for r in rows:
|
|
item = dict(r)
|
|
if is_demo_account(item["nombre"]):
|
|
demos.append(item)
|
|
continue
|
|
if item["location_id"] == BRAND_LOCATION_ID:
|
|
brand = item
|
|
else:
|
|
branches.append(item)
|
|
return brand, branches, demos
|
|
|
|
|
|
def load_contacts(conn, location_id):
|
|
return [
|
|
dict(r)
|
|
for r in conn.execute(
|
|
"SELECT id, first_name, last_name, phone, email, custom_fields_json, date_added "
|
|
"FROM contacts WHERE location_id = ?",
|
|
(location_id,),
|
|
).fetchall()
|
|
]
|
|
|
|
|
|
def load_opps(conn, location_id):
|
|
return [
|
|
dict(r)
|
|
for r in conn.execute(
|
|
"SELECT id, contact_id, status, name, pipeline_id, monetary_value, custom_fields_json "
|
|
"FROM opportunities WHERE location_id = ?",
|
|
(location_id,),
|
|
).fetchall()
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Matching
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _contact_full_name_norm(c):
|
|
full = f"{c.get('first_name') or ''} {c.get('last_name') or ''}"
|
|
return " ".join(strip_accents(full).lower().split())
|
|
|
|
|
|
def build_contact_index(contacts):
|
|
"""Indices para matching de contactos.
|
|
|
|
Devuelve (by_phone, by_email, by_name). El index by_name contiene TODOS
|
|
los contactos del target (con o sin phone/email), porque la regla de
|
|
negocio dice que un contacto en Marca sin phone/email proviene siempre
|
|
de una sucursal — y en la sucursal el contacto sà puede tener phone/email
|
|
(porque por ahà se capturo originalmente vÃa formulario/etc). La condicion
|
|
de seguridad para evitar falsos positivos por homonimos se aplica del lado
|
|
source en find_match: solo se intenta match por nombre cuando el source
|
|
NO tiene phone NI email.
|
|
"""
|
|
by_phone = defaultdict(list)
|
|
by_email = defaultdict(list)
|
|
by_name = defaultdict(list)
|
|
for c in contacts:
|
|
p = normalize_phone(c.get("phone"))
|
|
e = normalize_email(c.get("email"))
|
|
if p:
|
|
by_phone[p].append(c)
|
|
if e:
|
|
by_email[e].append(c)
|
|
n = _contact_full_name_norm(c)
|
|
if n:
|
|
by_name[n].append(c)
|
|
return by_phone, by_email, by_name
|
|
|
|
|
|
def find_match(contact, by_phone, by_email, by_name=None,
|
|
return_collisions=False, threshold=MATCH_THRESHOLD):
|
|
"""Busca matches en cascada: phone+nombre -> email -> nombre.
|
|
|
|
Reglas:
|
|
1. Si el source tiene phone, intenta match por phone PERO sólo cuenta
|
|
como match si además el nombre coincide (vía common.match_contacts)
|
|
con similitud >= threshold. Si el teléfono coincide pero el nombre
|
|
diverge → es una colisión (caso pareja con mismo número) y NO se
|
|
incluye como match; se acumula en `collisions` para reporte.
|
|
2. Si el source tiene email, intenta match por email (sin requerir
|
|
nombre — email es identificador más fuerte y la colisión es muy rara).
|
|
3. Si el source NO tiene phone NI email, intenta match por nombre contra
|
|
todos los contactos del target.
|
|
|
|
Args:
|
|
return_collisions: si True, devuelve (matches, collisions). Si False
|
|
(default, back-compat), devuelve sólo matches.
|
|
"""
|
|
p = normalize_phone(contact.get("phone"))
|
|
e = normalize_email(contact.get("email"))
|
|
seen, matches, collisions = set(), [], []
|
|
# IDs marcados como colisión por phone: NO deben ser luego "rescatados"
|
|
# como match por email — la divergencia de nombre invalida el grupo
|
|
# incluso si email coincide (suele indicar datos confundidos por la
|
|
# integración, no la misma persona).
|
|
phone_collision_ids = set()
|
|
if p and p in by_phone:
|
|
for m in by_phone[p]:
|
|
if m["id"] in seen:
|
|
continue
|
|
result = _match_contacts(contact, m, threshold=threshold)
|
|
if result["level"] in ("strong", "medium"):
|
|
matches.append(m)
|
|
seen.add(m["id"])
|
|
else:
|
|
collisions.append(m)
|
|
phone_collision_ids.add(m["id"])
|
|
if e and e in by_email:
|
|
for m in by_email[e]:
|
|
if m["id"] in seen or m["id"] in phone_collision_ids:
|
|
continue
|
|
matches.append(m)
|
|
seen.add(m["id"])
|
|
if by_name is not None and not p and not e:
|
|
n = _contact_full_name_norm(contact)
|
|
if n and n in by_name:
|
|
for m in by_name[n]:
|
|
if m["id"] not in seen:
|
|
matches.append(m)
|
|
seen.add(m["id"])
|
|
if return_collisions:
|
|
return matches, collisions
|
|
return matches
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fuzzy matching (modalidades permisivas) — usado para detectar "posibles
|
|
# coincidencias" en otras sucursales cuando el match estricto no las encuentra.
|
|
# Output puramente informativo: NO mueve contactos a otro bucket, solo se
|
|
# adjunta como advertencia a cada item para que el operador decida.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_GMAIL_DOMAINS = {"gmail.com", "googlemail.com"}
|
|
|
|
|
|
def normalize_phone_partial(phone, last_n=7):
|
|
"""Phone normalizado a los ultimos N digitos (default 7)."""
|
|
digits = re.sub(r"\D+", "", str(phone or ""))
|
|
return digits[-last_n:] if len(digits) >= last_n else ""
|
|
|
|
|
|
def email_local_part(email):
|
|
"""Parte local del email (antes del @), normalizada a lowercase."""
|
|
e = normalize_email(email)
|
|
if "@" not in e:
|
|
return ""
|
|
return e.split("@", 1)[0]
|
|
|
|
|
|
def email_canonical(email):
|
|
"""Email canonico: lowercase + strip + remueve '+alias' y, para gmail,
|
|
elimina puntos en la parte local. Permite detectar el mismo email aunque
|
|
haya sido escrito con variaciones (juan.perez+spam@gmail = juanperez@gmail).
|
|
"""
|
|
e = normalize_email(email)
|
|
if "@" not in e:
|
|
return ""
|
|
local, _, domain = e.partition("@")
|
|
if "+" in local:
|
|
local = local.split("+", 1)[0]
|
|
if domain in _GMAIL_DOMAINS:
|
|
local = local.replace(".", "")
|
|
domain = "gmail.com" # unifica googlemail con gmail
|
|
return f"{local}@{domain}" if local else ""
|
|
|
|
|
|
def first_last_norm(c):
|
|
"""Devuelve 'primer_token_first_name ultimo_token_last_name' normalizado.
|
|
|
|
Tolera divisiones inconsistentes en los CRMs:
|
|
- 'Juan Pablo' en first_name + 'Franco' en last_name -> 'juan franco'
|
|
- 'Juan' en first_name + 'Pablo Franco' en last_name -> 'juan franco'
|
|
- 'Juan' en first_name + 'Franco Gutierrez' (paterno+materno) -> 'juan gutierrez'
|
|
|
|
Esto evita el falso positivo clasico de usar el primer token del apellido,
|
|
que colisiona cuando el "primer apellido" es en realidad un middle name
|
|
(p.ej. 'juan pablo franco' vs 'juan pablo jimenez' tendrian la misma key
|
|
'juan pablo' si tomaramos primer-first + primer-last). Tomando el ultimo
|
|
token del apellido el match exige coincidencia en el apellido real.
|
|
"""
|
|
fn = (c.get("first_name") or "").strip()
|
|
ln = (c.get("last_name") or "").strip()
|
|
if not fn and not ln:
|
|
return ""
|
|
first_tok = strip_accents(fn).lower().split()
|
|
last_tok = strip_accents(ln).lower().split()
|
|
first = first_tok[0] if first_tok else ""
|
|
last = last_tok[-1] if last_tok else ""
|
|
if not first or not last:
|
|
return ""
|
|
return f"{first} {last}"
|
|
|
|
|
|
def build_fuzzy_indexes(contacts):
|
|
"""Construye indices fuzzy adicionales. Cada lista guarda dicts ligeros con
|
|
id + meta (location_id se inyecta antes, ver _augment_global_contacts).
|
|
"""
|
|
by_phone_partial = defaultdict(list)
|
|
by_email_local = defaultdict(list)
|
|
by_email_canon = defaultdict(list)
|
|
by_first_last = defaultdict(list)
|
|
for c in contacts:
|
|
pp = normalize_phone_partial(c.get("phone"))
|
|
if pp:
|
|
by_phone_partial[pp].append(c)
|
|
el = email_local_part(c.get("email"))
|
|
if el:
|
|
by_email_local[el].append(c)
|
|
ec = email_canonical(c.get("email"))
|
|
if ec:
|
|
by_email_canon[ec].append(c)
|
|
fl = first_last_norm(c)
|
|
if fl:
|
|
by_first_last[fl].append(c)
|
|
return {
|
|
"phone_partial": by_phone_partial,
|
|
"email_local": by_email_local,
|
|
"email_canonical": by_email_canon,
|
|
"first_last": by_first_last,
|
|
}
|
|
|
|
|
|
def find_fuzzy_matches(contact, fuzzy_indexes, exclude_ids=None, strict_match_phone=None, strict_match_email=None):
|
|
"""Devuelve lista de dicts {id, location_id, location_name, strategy, ...}
|
|
con coincidencias fuzzy en OTRAS sucursales. Excluye explicitamente IDs
|
|
que ya fueron match estricto (para evitar duplicar la advertencia).
|
|
|
|
Args:
|
|
contact: dict del contacto fuente (Marca).
|
|
fuzzy_indexes: dict producido por build_fuzzy_indexes.
|
|
exclude_ids: set opcional de IDs a omitir (matches estrictos ya contados).
|
|
strict_match_phone: phone normalizado del source — para no incluir
|
|
coincidencias fuzzy que en realidad son match estricto por phone.
|
|
strict_match_email: email normalizado del source — idem para email.
|
|
|
|
Las estrategias se ordenan por confianza descendente. Si un mismo contacto
|
|
matchea por mas de una estrategia, se conserva la mas fuerte.
|
|
"""
|
|
exclude_ids = set(exclude_ids or [])
|
|
# confianza/etiqueta legible
|
|
STRATEGY_LABELS = {
|
|
"phone_partial": ("Teléfono parcial (últ. 7 dÃgitos)", 90),
|
|
"email_canonical": ("Email canónico (gmail sin puntos/alias)", 95),
|
|
"email_local": ("Misma parte local del email (otro dominio)", 70),
|
|
"first_last": ("Mismo nombre + primer apellido", 65),
|
|
}
|
|
candidates = {} # id -> {dict, score, strategy}
|
|
|
|
def _add(strategy, ms):
|
|
label, score = STRATEGY_LABELS[strategy]
|
|
for m in ms:
|
|
mid = m.get("id")
|
|
if not mid or mid in exclude_ids:
|
|
continue
|
|
prev = candidates.get(mid)
|
|
if prev is None or score > prev["score"]:
|
|
candidates[mid] = {"m": m, "score": score, "strategy": strategy, "strategy_label": label}
|
|
|
|
# phone parcial: solo si el source tiene phone con >=7 digitos.
|
|
src_pp = normalize_phone_partial(contact.get("phone"))
|
|
if src_pp:
|
|
ms = fuzzy_indexes["phone_partial"].get(src_pp, [])
|
|
# evitar contar matches que YA son estrictos por phone (mismos ultimos 10).
|
|
if strict_match_phone:
|
|
ms = [m for m in ms if normalize_phone(m.get("phone")) != strict_match_phone]
|
|
_add("phone_partial", ms)
|
|
|
|
# email canonico: si el source tiene email con dominio.
|
|
src_ec = email_canonical(contact.get("email"))
|
|
src_email_norm = normalize_email(contact.get("email"))
|
|
if src_ec:
|
|
ms = fuzzy_indexes["email_canonical"].get(src_ec, [])
|
|
if strict_match_email:
|
|
ms = [m for m in ms if normalize_email(m.get("email")) != strict_match_email]
|
|
_add("email_canonical", ms)
|
|
|
|
# mismo local part en otro dominio.
|
|
src_el = email_local_part(contact.get("email"))
|
|
if src_el:
|
|
ms = fuzzy_indexes["email_local"].get(src_el, [])
|
|
# excluir matches que ya tienen email idéntico (los cubre email_canonical/strict).
|
|
ms = [m for m in ms if normalize_email(m.get("email")) != src_email_norm]
|
|
_add("email_local", ms)
|
|
|
|
# mismo first_name + primer apellido.
|
|
src_fl = first_last_norm(contact)
|
|
if src_fl:
|
|
_add("first_last", fuzzy_indexes["first_last"].get(src_fl, []))
|
|
|
|
out = []
|
|
for cid, info in candidates.items():
|
|
m = info["m"]
|
|
out.append({
|
|
"id": cid,
|
|
"location_id": m.get("_loc"),
|
|
"location_name": m.get("_loc_name"),
|
|
"first_name": m.get("first_name"),
|
|
"last_name": m.get("last_name"),
|
|
"phone": m.get("phone"),
|
|
"email": m.get("email"),
|
|
"strategy": info["strategy"],
|
|
"strategy_label": info["strategy_label"],
|
|
"score": info["score"],
|
|
})
|
|
out.sort(key=lambda x: (-x["score"], x.get("location_name") or ""))
|
|
return out
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Auditoria principal
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def run_audit(limit_missing=None):
|
|
"""Ejecuta la comparativa y devuelve un dict JSON-serializable.
|
|
|
|
Args:
|
|
limit_missing: si es int, recorta cada listado de ausentes a N items
|
|
(para respuestas API mas ligeras). None = sin recorte.
|
|
"""
|
|
if not os.path.exists(DB_PATH):
|
|
raise FileNotFoundError(
|
|
f"No existe {DB_PATH}. Corre una sincronizacion global primero."
|
|
)
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
try:
|
|
brand, branches, demos = load_accounts_filtered(conn)
|
|
if not brand:
|
|
raise RuntimeError(
|
|
"No se encontro la cuenta de Marca en la tabla accounts. "
|
|
"Corre la sincronizacion para poblar el catalogo."
|
|
)
|
|
|
|
verifier_by_loc, verifier_by_tienda = load_verifier()
|
|
brand_tienda_field_id = resolve_tienda_field_id(conn, BRAND_LOCATION_ID)
|
|
brand_sucursal_field_id = resolve_sucursal_field_id(conn, BRAND_LOCATION_ID)
|
|
brand_opp_link_field_id = resolve_opp_link_field_id(conn, BRAND_LOCATION_ID)
|
|
brand_contact_link_field_id = resolve_contact_link_field_id(conn, BRAND_LOCATION_ID)
|
|
|
|
brand_contacts = load_contacts(conn, BRAND_LOCATION_ID)
|
|
brand_opps = load_opps(conn, BRAND_LOCATION_ID)
|
|
|
|
branch_data = {}
|
|
total_branch_contacts = 0
|
|
total_branch_opps = 0
|
|
per_branch_summary = []
|
|
|
|
for b in branches:
|
|
loc = b["location_id"]
|
|
bc = load_contacts(conn, loc)
|
|
bo = load_opps(conn, loc)
|
|
branch_data[loc] = {"contacts": bc, "opps": bo, "name": b["nombre"]}
|
|
total_branch_contacts += len(bc)
|
|
total_branch_opps += len(bo)
|
|
|
|
# ---- Indices globales de sucursal (para buscar contraparte de Marca) ----
|
|
all_branch_contacts = []
|
|
for loc, data in branch_data.items():
|
|
for c in data["contacts"]:
|
|
aug = dict(c)
|
|
aug["_loc"] = loc
|
|
aug["_loc_name"] = data["name"]
|
|
all_branch_contacts.append(aug)
|
|
|
|
branch_idx_phone, branch_idx_email, branch_idx_name = build_contact_index(all_branch_contacts)
|
|
|
|
# Indices fuzzy globales (para detectar "posibles coincidencias" en
|
|
# otras sucursales cuando el match estricto no las encuentra). Solo
|
|
# se consultan para los items del bucket missing_in_assigned_branch.
|
|
branch_fuzzy_indexes = build_fuzzy_indexes(all_branch_contacts)
|
|
|
|
# Indice por sucursal individual (para verificar si el contacto Marca esta
|
|
# en la sucursal especifica que le toca segun el verificador).
|
|
per_branch_idx = {}
|
|
for loc, data in branch_data.items():
|
|
per_branch_idx[loc] = build_contact_index(data["contacts"])
|
|
|
|
# Opps por contact_id en cada sucursal y en Marca.
|
|
brand_opps_by_cid = defaultdict(list)
|
|
for o in brand_opps:
|
|
brand_opps_by_cid[o["contact_id"]].append(o)
|
|
|
|
# Índice de opps de Marca por el valor del campo "ID Oportunidad Sucursal"
|
|
# (= id nativo de la opp de sucursal de origen). Es la llave determinística
|
|
# para el match por campo (criterio principal del bucket de opps faltantes).
|
|
brand_opps_by_link = {}
|
|
for o in brand_opps:
|
|
lv = extract_opp_link_value(o.get("custom_fields_json"), brand_opp_link_field_id)
|
|
if lv:
|
|
brand_opps_by_link.setdefault(lv, o)
|
|
|
|
# Índice de contactos de Marca por el valor del campo "ID Contacto Sucursal"
|
|
# (= id nativo del contacto de sucursal de origen). Llave determinística
|
|
# para match por campo en el bucket contacts_in_branch_not_in_brand.
|
|
brand_contacts_by_link = {}
|
|
for c in brand_contacts:
|
|
lv = extract_contact_link_value(c.get("custom_fields_json"), brand_contact_link_field_id)
|
|
if lv:
|
|
brand_contacts_by_link.setdefault(lv, c)
|
|
|
|
per_branch_opps_by_cid = {}
|
|
for loc, data in branch_data.items():
|
|
grouped = defaultdict(list)
|
|
for o in data["opps"]:
|
|
grouped[o.get("contact_id")].append(o)
|
|
per_branch_opps_by_cid[loc] = grouped
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 1) Contactos en sucursal sin contraparte en Marca
|
|
# ----------------------------------------------------------------------
|
|
brand_idx_phone, brand_idx_email, brand_idx_name = build_contact_index(brand_contacts)
|
|
|
|
missing_in_brand = [] # contactos sucursal que no estan en Marca
|
|
for loc, data in branch_data.items():
|
|
for c in data["contacts"]:
|
|
# Criterio PRINCIPAL: match por el campo "ID Contacto Sucursal".
|
|
# Si existe un contacto Marca cuyo valor de ese campo == id nativo
|
|
# de este contacto de sucursal, está replicado. Determinístico.
|
|
if c.get("id") in brand_contacts_by_link:
|
|
continue
|
|
# Respaldo: lógica histórica por phone/email/name.
|
|
if find_match(c, brand_idx_phone, brand_idx_email, brand_idx_name):
|
|
continue
|
|
opps_here = per_branch_opps_by_cid[loc].get(c["id"], [])
|
|
missing_in_brand.append({
|
|
**fmt_contact(c),
|
|
"branch_location_id": loc,
|
|
"branch_name": data["name"],
|
|
"opps_in_branch": len(opps_here),
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 2) Contactos en Marca que no estan en la sucursal que les corresponde
|
|
# por el verificador (TIENDA del contacto -> location_id).
|
|
# ----------------------------------------------------------------------
|
|
missing_in_assigned_branch = [] # NO esta en ninguna sucursal (incluye la asignada). Tiene TIENDA.
|
|
present_in_other_branch_not_assigned = [] # Esta en OTRA sucursal, no la asignada.
|
|
probable_duplicate_in_brand = [] # Marca tiene phone/email pero hay homónimo con phone/email en la sucursal asignada -> probable duplicado en Marca.
|
|
brand_without_tienda = [] # contactos Marca sin TIENDA poblada
|
|
brand_with_unknown_tienda = [] # contactos Marca con TIENDA que no matchea ninguna fila del verificador
|
|
brand_present_in_any_branch = 0
|
|
brand_not_in_any_branch = [] # contacto Marca que no aparece en ninguna sucursal (no asignable)
|
|
|
|
# Pre-indexar nombres por sucursal para encontrar el candidato exacto en la
|
|
# sucursal asignada cuando el contacto Marca no matchea por phone/email.
|
|
per_branch_name_idx = {}
|
|
per_branch_by_id = {}
|
|
for loc, data in branch_data.items():
|
|
name_idx = {}
|
|
by_id = {}
|
|
for bc in data["contacts"]:
|
|
by_id[bc["id"]] = bc
|
|
full = f"{bc.get('first_name') or ''} {bc.get('last_name') or ''}"
|
|
nm = " ".join(strip_accents(full).lower().split())
|
|
if nm:
|
|
name_idx.setdefault(nm, []).append(bc)
|
|
per_branch_name_idx[loc] = name_idx
|
|
per_branch_by_id[loc] = by_id
|
|
|
|
def _enrich_other_branches(global_matches_list):
|
|
"""Devuelve other_branches con id + tienda del verificador."""
|
|
out = []
|
|
for m in global_matches_list[:5]:
|
|
loc_id = m["_loc"]
|
|
vinfo = verifier_by_loc.get(loc_id) or {}
|
|
out.append({
|
|
"location_id": loc_id,
|
|
"name": m.get("_loc_name") or branch_data.get(loc_id, {}).get("name"),
|
|
"id": m["id"],
|
|
"tienda_value": vinfo.get("tienda_raw"),
|
|
})
|
|
return out
|
|
|
|
for c in brand_contacts:
|
|
tienda_value = extract_tienda_from_custom_fields(
|
|
c.get("custom_fields_json"), brand_tienda_field_id
|
|
)
|
|
sucursal_value = extract_tienda_from_custom_fields(
|
|
c.get("custom_fields_json"), brand_sucursal_field_id
|
|
)
|
|
tienda_norm = normalize_tienda(tienda_value) if tienda_value else None
|
|
target_loc = verifier_by_tienda.get(tienda_norm) if tienda_norm else None
|
|
# Resolution source: "tienda" | "sucursal_exact" | "sucursal_substring" | None
|
|
target_loc_source = "tienda" if target_loc else None
|
|
target_loc_match_kind = None
|
|
# Segundo check: si TIENDA no resolvio, intentar mapear via Sucursal
|
|
# con matching tolerante (abreviaturas + substring).
|
|
if not target_loc and sucursal_value:
|
|
fb_loc, fb_kind = resolve_location_from_sucursal(sucursal_value, verifier_by_loc)
|
|
if fb_loc:
|
|
target_loc = fb_loc
|
|
target_loc_source = f"sucursal_{fb_kind}"
|
|
target_loc_match_kind = fb_kind
|
|
|
|
# Match global contra todas las sucursales (informativo)
|
|
global_matches = find_match(c, branch_idx_phone, branch_idx_email, branch_idx_name)
|
|
if global_matches:
|
|
brand_present_in_any_branch += 1
|
|
else:
|
|
brand_not_in_any_branch.append({
|
|
**fmt_contact(c),
|
|
"tienda": tienda_value,
|
|
"sucursal": sucursal_value,
|
|
"expected_location_id": target_loc,
|
|
"expected_branch_name": branch_data.get(target_loc, {}).get("name") if target_loc else None,
|
|
"resolution_source": target_loc_source,
|
|
"opps_in_brand": len(brand_opps_by_cid.get(c["id"], [])),
|
|
})
|
|
|
|
if not tienda_value:
|
|
# Enriquecer con datos del CF Sucursal para que el dashboard pueda
|
|
# ofrecer "Llenar TIENDA desde Sucursal". Solo es resoluble si
|
|
# Sucursal mapea a una sucursal del verificador con TIENDA poblada.
|
|
sucursal_resolved_loc = None
|
|
sucursal_resolved_kind = None
|
|
expected_tienda = None
|
|
expected_branch_name = None
|
|
if sucursal_value:
|
|
fb_loc, fb_kind = resolve_location_from_sucursal(
|
|
sucursal_value, verifier_by_loc
|
|
)
|
|
if fb_loc:
|
|
sucursal_resolved_loc = fb_loc
|
|
sucursal_resolved_kind = fb_kind
|
|
vinfo = verifier_by_loc.get(fb_loc) or {}
|
|
expected_tienda = vinfo.get("tienda_raw")
|
|
expected_branch_name = branch_data.get(fb_loc, {}).get("name")
|
|
brand_without_tienda.append({
|
|
**fmt_contact(c),
|
|
"sucursal": sucursal_value or "",
|
|
"sucursal_resolved_location_id": sucursal_resolved_loc,
|
|
"sucursal_resolution_kind": sucursal_resolved_kind,
|
|
"expected_tienda": expected_tienda,
|
|
"expected_branch_name": expected_branch_name,
|
|
"looks_like_test": looks_like_test_contact(c),
|
|
})
|
|
continue
|
|
|
|
if not target_loc:
|
|
brand_with_unknown_tienda.append({
|
|
**fmt_contact(c),
|
|
"tienda": tienda_value,
|
|
})
|
|
continue
|
|
|
|
# Buscar en la sucursal asignada
|
|
idx = per_branch_idx.get(target_loc)
|
|
if not idx:
|
|
# La sucursal asignada esta filtrada por demo o no esta cacheada.
|
|
continue
|
|
branch_phone_idx, branch_email_idx, branch_name_idx_loc = idx
|
|
in_assigned = find_match(c, branch_phone_idx, branch_email_idx, branch_name_idx_loc)
|
|
if not in_assigned:
|
|
# Si la sucursal asignada es una "shell" absorbida por un hub
|
|
# digital (Toluca/Metepec/Lerma -> Pilares), aceptar la presencia
|
|
# en el hub como correcta. Evita ~82 falsos positivos.
|
|
hub_loc = DIGITAL_HUB_BY_SHELL.get(target_loc)
|
|
hub_idx = per_branch_idx.get(hub_loc) if hub_loc else None
|
|
if hub_idx and find_match(c, hub_idx[0], hub_idx[1], hub_idx[2]):
|
|
in_assigned = True
|
|
if in_assigned:
|
|
continue # Esta donde corresponde, sin discrepancia.
|
|
|
|
opps_in_brand = len(brand_opps_by_cid.get(c["id"], []))
|
|
other_branches_enriched = _enrich_other_branches(global_matches)
|
|
|
|
if global_matches:
|
|
# Caso B: esta en OTRA sucursal (no la asignada).
|
|
present_in_other_branch_not_assigned.append({
|
|
**fmt_contact(c),
|
|
"tienda": tienda_value,
|
|
"expected_location_id": target_loc,
|
|
"expected_branch_name": branch_data[target_loc]["name"],
|
|
"opps_in_brand": opps_in_brand,
|
|
"other_branches": other_branches_enriched,
|
|
})
|
|
else:
|
|
# Caso A o Caso D: el contacto Marca no matcheó con NINGUNA
|
|
# sucursal por phone/email. Buscar homónimos exactos por nombre
|
|
# en la sucursal asignada para decidir el sub-caso.
|
|
brand_full = f"{c.get('first_name') or ''} {c.get('last_name') or ''}"
|
|
brand_name_norm = " ".join(strip_accents(brand_full).lower().split())
|
|
candidates_by_name = []
|
|
if brand_name_norm:
|
|
candidates_by_name = per_branch_name_idx.get(target_loc, {}).get(brand_name_norm, []) or []
|
|
|
|
# Caso D: probable duplicado en Marca. El contacto Marca SÃ tiene
|
|
# phone/email, no matcheo en ninguna sucursal, pero hay un homónimo
|
|
# con identificadores fuertes en la sucursal asignada. Lo más
|
|
# probable: en sucursal está el contacto bueno y en Marca quedó
|
|
# un registro extra con otro número/email.
|
|
brand_phone_norm = normalize_phone(c.get("phone"))
|
|
brand_email_norm = normalize_email(c.get("email"))
|
|
brand_has_strong_id = bool(brand_phone_norm or brand_email_norm)
|
|
candidate_with_strong_id = None
|
|
for cand in candidates_by_name:
|
|
if normalize_phone(cand.get("phone")) or normalize_email(cand.get("email")):
|
|
candidate_with_strong_id = cand
|
|
break
|
|
|
|
if brand_has_strong_id and candidate_with_strong_id:
|
|
probable_duplicate_in_brand.append({
|
|
**fmt_contact(c),
|
|
"tienda": tienda_value,
|
|
"expected_location_id": target_loc,
|
|
"expected_branch_name": branch_data[target_loc]["name"],
|
|
"opps_in_brand": opps_in_brand,
|
|
"branch_existing_contact": {
|
|
"id": candidate_with_strong_id["id"],
|
|
"phone": candidate_with_strong_id.get("phone"),
|
|
"email": candidate_with_strong_id.get("email"),
|
|
"first_name": candidate_with_strong_id.get("first_name"),
|
|
"last_name": candidate_with_strong_id.get("last_name"),
|
|
},
|
|
"homonyms_in_branch_count": len(candidates_by_name),
|
|
})
|
|
else:
|
|
# Caso A: contacto genuinamente ausente. Mantener candidato
|
|
# por nombre para el botón de update-branch-from-brand
|
|
# cuando aplique (caso tÃpico: ambos sin phone/email).
|
|
target_candidate = None
|
|
if len(candidates_by_name) == 1:
|
|
cand = candidates_by_name[0]
|
|
target_candidate = {
|
|
"id": cand["id"],
|
|
"phone": cand.get("phone"),
|
|
"email": cand.get("email"),
|
|
"first_name": cand.get("first_name"),
|
|
"last_name": cand.get("last_name"),
|
|
}
|
|
# Fuzzy: explora otras sucursales con modalidades permisivas
|
|
# (teléfono parcial, email canónico/local, primer-apellido).
|
|
# Es puramente informativo — el contacto sigue en este bucket.
|
|
fuzzy = find_fuzzy_matches(
|
|
c,
|
|
branch_fuzzy_indexes,
|
|
exclude_ids=None, # aquà global_matches está vacÃo por definición
|
|
strict_match_phone=normalize_phone(c.get("phone")),
|
|
strict_match_email=normalize_email(c.get("email")),
|
|
)
|
|
# Enriquece cada match con la TIENDA del verificador para
|
|
# dar contexto al operador (a qué sucursal apunta cada uno).
|
|
for fm in fuzzy:
|
|
loc_id_fm = fm.get("location_id")
|
|
if loc_id_fm:
|
|
vinfo = verifier_by_loc.get(loc_id_fm) or {}
|
|
fm["location_tienda"] = vinfo.get("tienda_raw")
|
|
missing_in_assigned_branch.append({
|
|
**fmt_contact(c),
|
|
"tienda": tienda_value,
|
|
"expected_location_id": target_loc,
|
|
"expected_branch_name": branch_data[target_loc]["name"],
|
|
"opps_in_brand": opps_in_brand,
|
|
"branch_target_candidate": target_candidate,
|
|
"branch_target_candidates_count": len(candidates_by_name),
|
|
"fuzzy_matches": fuzzy,
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 3) Oportunidades sin contraparte
|
|
# ----------------------------------------------------------------------
|
|
# Para cada opp de sucursal, ver si su contacto tiene match en Marca y si
|
|
# ese contact Marca tiene al menos 1 opp. Si no, es opp ausente en Marca.
|
|
missing_opps_in_brand = []
|
|
for loc, data in branch_data.items():
|
|
bc_by_id = {c["id"]: c for c in data["contacts"]}
|
|
for o in data["opps"]:
|
|
# Criterio PRINCIPAL: match por el campo "ID Oportunidad Sucursal".
|
|
# Si existe una opp de Marca cuyo valor de ese campo == el id nativo
|
|
# de esta opp de sucursal, está replicada. Compara cada opp de forma
|
|
# individual -> detecta el gap multi-empeño. Si no hay match por
|
|
# campo, cae al respaldo por contacto (lógica histórica de abajo).
|
|
if o.get("id") in brand_opps_by_link:
|
|
continue
|
|
contact = bc_by_id.get(o.get("contact_id"))
|
|
if not contact:
|
|
# opp huerfana sin contacto cacheado en sucursal -> reportar pero como anomaly
|
|
missing_opps_in_brand.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"monetary_value": o.get("monetary_value") or 0,
|
|
"branch_location_id": loc,
|
|
"branch_name": data["name"],
|
|
"contact_id": o.get("contact_id") or "",
|
|
"contact_name": "(contacto no cacheado)",
|
|
"contact_phone": "",
|
|
"contact_email": "",
|
|
"reason": "contacto_huerfano",
|
|
})
|
|
continue
|
|
marca_matches = find_match(contact, brand_idx_phone, brand_idx_email, brand_idx_name)
|
|
if not marca_matches:
|
|
missing_opps_in_brand.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"monetary_value": o.get("monetary_value") or 0,
|
|
"branch_location_id": loc,
|
|
"branch_name": data["name"],
|
|
"contact_id": contact["id"],
|
|
"contact_name": fmt_contact(contact)["name"],
|
|
"contact_phone": contact.get("phone") or "",
|
|
"contact_email": contact.get("email") or "",
|
|
"reason": "contacto_no_en_marca",
|
|
})
|
|
continue
|
|
# contacto SI esta en Marca, ver si tiene opps replicadas
|
|
has_brand_opp = any(brand_opps_by_cid.get(m["id"]) for m in marca_matches)
|
|
if not has_brand_opp:
|
|
missing_opps_in_brand.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"monetary_value": o.get("monetary_value") or 0,
|
|
"branch_location_id": loc,
|
|
"branch_name": data["name"],
|
|
"contact_id": contact["id"],
|
|
"contact_name": fmt_contact(contact)["name"],
|
|
"contact_phone": contact.get("phone") or "",
|
|
"contact_email": contact.get("email") or "",
|
|
"reason": "opp_no_replicada",
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 3b) Oportunidades con el campo "ID Oportunidad Sucursal" vacío o inválido
|
|
# ----------------------------------------------------------------------
|
|
# El valor debe ser el id nativo de la opp (20 chars alfanuméricos). Vacío
|
|
# o len != 20 => inválido. Sucursales son accionables (botón de llenado =
|
|
# su propio id); Marca es informativo (su campo se resuelve por matcheo/sync).
|
|
opps_missing_id_field = []
|
|
|
|
def _classify_link(value):
|
|
if not value:
|
|
return "vacio"
|
|
if not OPP_ID_PATTERN.match(str(value)):
|
|
return "longitud_invalida"
|
|
return None
|
|
|
|
for o in brand_opps:
|
|
v = extract_opp_link_value(o.get("custom_fields_json"), brand_opp_link_field_id)
|
|
reason = _classify_link(v)
|
|
if reason is None:
|
|
continue
|
|
opps_missing_id_field.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"location_id": BRAND_LOCATION_ID,
|
|
"location_name": brand["nombre"],
|
|
"is_brand": True,
|
|
"field_value": v or "",
|
|
"field_len": len(str(v)) if v else 0,
|
|
"reason": reason,
|
|
})
|
|
|
|
for loc, data in branch_data.items():
|
|
branch_link_fid = resolve_opp_link_field_id(conn, loc)
|
|
for o in data["opps"]:
|
|
v = extract_opp_link_value(o.get("custom_fields_json"), branch_link_fid)
|
|
reason = _classify_link(v)
|
|
if reason is None:
|
|
continue
|
|
opps_missing_id_field.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"location_id": loc,
|
|
"location_name": data["name"],
|
|
"is_brand": False,
|
|
"field_value": v or "",
|
|
"field_len": len(str(v)) if v else 0,
|
|
"reason": reason,
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 3b-bis) Réplicas DUPLICADAS en Marca (mismo "ID Oportunidad Sucursal")
|
|
# ----------------------------------------------------------------------
|
|
# Descuadre POSITIVO (Marca > sucursales): si dos o más opps de Marca
|
|
# comparten el MISMO valor de "ID Oportunidad Sucursal" (= apuntan a la
|
|
# misma opp de sucursal de origen) son réplicas duplicadas. Causa típica:
|
|
# el workflow n8n de sync de opps hace CREATE en vez de UPDATE (no encontró
|
|
# la opp existente al replicar). Es INVISIBLE para el bucket de huérfanas
|
|
# (que trata el link como salvaguarda y nunca verifica unicidad).
|
|
#
|
|
# Por cada cluster se recomienda conservar la canónica y borrar las
|
|
# sobrantes según la jerarquía de resolución de duplicados:
|
|
# (1) monetary_value mayor, (2) status activo (won/open) > lost/abandoned,
|
|
# (3) más antiguo > reciente [requiere createdAt en vivo -> el limpiador
|
|
# lo resuelve], (4) TIENDA. Cuando valor y status empatan se marca
|
|
# tie_break_needs_live_createdat=True para que el limpiador desempate.
|
|
opps_in_brand_duplicate_link = []
|
|
|
|
# Índice de opps de sucursal por id nativo -> (location_id, branch_name)
|
|
# para nombrar el origen del link de cada cluster.
|
|
branch_opp_owner_by_id = {}
|
|
for loc, data in branch_data.items():
|
|
for o in data["opps"]:
|
|
branch_opp_owner_by_id[o["id"]] = (loc, data["name"])
|
|
|
|
brand_contact_name_by_id = {c["id"]: fmt_contact(c)["name"] for c in brand_contacts}
|
|
|
|
# Agrupa TODAS las opps de Marca por su valor de link válido (20 chars).
|
|
brand_opps_link_groups = defaultdict(list)
|
|
for o in brand_opps:
|
|
v = extract_opp_link_value(o.get("custom_fields_json"), brand_opp_link_field_id)
|
|
if v and OPP_ID_PATTERN.match(str(v)):
|
|
brand_opps_link_groups[v].append(o)
|
|
|
|
_STATUS_RANK = {"won": 3, "open": 2, "lost": 1, "abandoned": 0}
|
|
duplicate_link_group_count = 0
|
|
duplicate_link_extra = 0 # opps sobrantes = sum(group_size - 1)
|
|
for link_value, group in brand_opps_link_groups.items():
|
|
if len(group) < 2:
|
|
continue
|
|
duplicate_link_group_count += 1
|
|
duplicate_link_extra += len(group) - 1
|
|
owner_loc, owner_name = branch_opp_owner_by_id.get(link_value, (None, None))
|
|
|
|
def _rank(o):
|
|
return (
|
|
float(o.get("monetary_value") or 0),
|
|
_STATUS_RANK.get((o.get("status") or "").lower(), 0),
|
|
)
|
|
ordered = sorted(group, key=_rank, reverse=True)
|
|
top, second = ordered[0], ordered[1]
|
|
tie = _rank(top) == _rank(second)
|
|
for idx, o in enumerate(ordered):
|
|
opps_in_brand_duplicate_link.append({
|
|
"id": o["id"],
|
|
"name": o.get("name") or "",
|
|
"status": o.get("status") or "",
|
|
"monetary_value": o.get("monetary_value") or 0,
|
|
"contact_id": o.get("contact_id") or "",
|
|
"contact_name": brand_contact_name_by_id.get(o.get("contact_id"), ""),
|
|
"link_value": link_value,
|
|
"branch_opp_id": link_value,
|
|
"branch_location_id": owner_loc or "",
|
|
"branch_name": owner_name or "(sucursal no cacheada)",
|
|
"group_size": len(group),
|
|
"recommended_action": "keep" if idx == 0 else "delete",
|
|
"tie_break_needs_live_createdat": tie,
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 3c) Contactos con el campo "ID Contacto Sucursal" vacío o inválido
|
|
# ----------------------------------------------------------------------
|
|
# Paralelo al bucket de opps. Sucursales son accionables (botón llenado =
|
|
# contact.id propio); Marca es informativo (su campo se resuelve por
|
|
# matcheo/sync workflow, no se llena manualmente).
|
|
contacts_missing_id_field = []
|
|
|
|
for c in brand_contacts:
|
|
v = extract_contact_link_value(c.get("custom_fields_json"), brand_contact_link_field_id)
|
|
reason = _classify_link(v)
|
|
if reason is None:
|
|
continue
|
|
contacts_missing_id_field.append({
|
|
"id": c["id"],
|
|
"first_name": c.get("first_name") or "",
|
|
"last_name": c.get("last_name") or "",
|
|
"phone": c.get("phone") or "",
|
|
"email": c.get("email") or "",
|
|
"location_id": BRAND_LOCATION_ID,
|
|
"location_name": brand["nombre"],
|
|
"is_brand": True,
|
|
"field_value": v or "",
|
|
"field_len": len(str(v)) if v else 0,
|
|
"reason": reason,
|
|
})
|
|
|
|
for loc, data in branch_data.items():
|
|
branch_contact_link_fid = resolve_contact_link_field_id(conn, loc)
|
|
for c in data["contacts"]:
|
|
v = extract_contact_link_value(c.get("custom_fields_json"), branch_contact_link_fid)
|
|
reason = _classify_link(v)
|
|
if reason is None:
|
|
continue
|
|
contacts_missing_id_field.append({
|
|
"id": c["id"],
|
|
"first_name": c.get("first_name") or "",
|
|
"last_name": c.get("last_name") or "",
|
|
"phone": c.get("phone") or "",
|
|
"email": c.get("email") or "",
|
|
"location_id": loc,
|
|
"location_name": data["name"],
|
|
"is_brand": False,
|
|
"field_value": v or "",
|
|
"field_len": len(str(v)) if v else 0,
|
|
"reason": reason,
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 4) Desglose por sucursal
|
|
# ----------------------------------------------------------------------
|
|
for loc, data in branch_data.items():
|
|
per_branch_summary.append({
|
|
"location_id": loc,
|
|
"name": data["name"],
|
|
"contacts": len(data["contacts"]),
|
|
"opportunities": len(data["opps"]),
|
|
})
|
|
per_branch_summary.sort(key=lambda x: x["name"])
|
|
|
|
# ----------------------------------------------------------------------
|
|
# 5) Duplicados intra-Marca: mismo nombre normalizado, sin phone NI email
|
|
# ----------------------------------------------------------------------
|
|
intra_brand_duplicates = []
|
|
name_groups = defaultdict(list)
|
|
for c in brand_contacts:
|
|
if normalize_phone(c.get("phone")) or normalize_email(c.get("email")):
|
|
continue
|
|
full_name = f"{c.get('first_name') or ''} {c.get('last_name') or ''}"
|
|
n = " ".join(strip_accents(full_name).lower().split())
|
|
if not n:
|
|
continue
|
|
name_groups[n].append(c)
|
|
|
|
# Pre-indexar contactos sucursal por name_norm (solo los que no tienen
|
|
# phone ni email), para encontrar candidatos de sync para "unico restante".
|
|
branch_no_pe_by_name = defaultdict(list)
|
|
for loc, data in branch_data.items():
|
|
from collections import Counter as _Counter
|
|
opps_by_cid = _Counter(o.get("contact_id") for o in data["opps"] if o.get("contact_id"))
|
|
for c in data["contacts"]:
|
|
if normalize_phone(c.get("phone")) or normalize_email(c.get("email")):
|
|
continue
|
|
full = f"{c.get('first_name') or ''} {c.get('last_name') or ''}"
|
|
nm = " ".join(strip_accents(full).lower().split())
|
|
if not nm:
|
|
continue
|
|
branch_no_pe_by_name[nm].append({
|
|
"id": c["id"],
|
|
"location_id": loc,
|
|
"branch_name": data["name"],
|
|
"opps_count": opps_by_cid.get(c["id"], 0),
|
|
})
|
|
|
|
group_count = 0
|
|
for name_norm, ccs in name_groups.items():
|
|
if len(ccs) < 2:
|
|
continue
|
|
group_count += 1
|
|
candidates = branch_no_pe_by_name.get(name_norm, [])
|
|
# Ordenar por fecha_added desc, ponemos primero los recientes
|
|
sorted_ccs = sorted(ccs, key=lambda x: (x.get("date_added") or ""), reverse=True)
|
|
for c in sorted_ccs:
|
|
intra_brand_duplicates.append({
|
|
**fmt_contact(c),
|
|
"name_norm": name_norm,
|
|
"group_size": len(ccs),
|
|
"opps_in_brand": len(brand_opps_by_cid.get(c["id"], [])),
|
|
"date_added": c.get("date_added") or "",
|
|
"branch_candidates": candidates,
|
|
})
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Resumen
|
|
# ----------------------------------------------------------------------
|
|
contact_diff = len(brand_contacts) - total_branch_contacts
|
|
opp_diff = len(brand_opps) - total_branch_opps
|
|
|
|
def maybe_limit(lst):
|
|
if limit_missing is None or len(lst) <= limit_missing:
|
|
return lst, len(lst), False
|
|
return lst[:limit_missing], len(lst), True
|
|
|
|
missing_in_brand_lim, mib_total, mib_trunc = maybe_limit(missing_in_brand)
|
|
missing_in_assigned_lim, mia_total, mia_trunc = maybe_limit(missing_in_assigned_branch)
|
|
present_other_lim, pother_total, pother_trunc = maybe_limit(present_in_other_branch_not_assigned)
|
|
probable_dup_lim, probable_dup_total, probable_dup_trunc = maybe_limit(probable_duplicate_in_brand)
|
|
brand_without_tienda_lim, bwt_total, bwt_trunc = maybe_limit(brand_without_tienda)
|
|
brand_unknown_tienda_lim, but_total, but_trunc = maybe_limit(brand_with_unknown_tienda)
|
|
brand_not_any_lim, bna_total, bna_trunc = maybe_limit(brand_not_in_any_branch)
|
|
missing_opps_lim, mo_total, mo_trunc = maybe_limit(missing_opps_in_brand)
|
|
opps_missing_id_lim, omif_total, omif_trunc = maybe_limit(opps_missing_id_field)
|
|
dup_link_lim, dup_link_total, dup_link_trunc = maybe_limit(opps_in_brand_duplicate_link)
|
|
contacts_missing_id_lim, cmif_total, cmif_trunc = maybe_limit(contacts_missing_id_field)
|
|
dup_lim, dup_total, dup_trunc = maybe_limit(intra_brand_duplicates)
|
|
|
|
return {
|
|
"totals": {
|
|
"brand": {
|
|
"name": brand["nombre"],
|
|
"location_id": BRAND_LOCATION_ID,
|
|
"contacts": len(brand_contacts),
|
|
"opportunities": len(brand_opps),
|
|
},
|
|
"branches_aggregate": {
|
|
"branch_count": len(branches),
|
|
"contacts": total_branch_contacts,
|
|
"opportunities": total_branch_opps,
|
|
},
|
|
"diff": {
|
|
"contacts": contact_diff,
|
|
"opportunities": opp_diff,
|
|
"contacts_match": contact_diff == 0,
|
|
"opportunities_match": opp_diff == 0,
|
|
},
|
|
},
|
|
"demos_excluded": [
|
|
{"location_id": d["location_id"], "name": d["nombre"]} for d in demos
|
|
],
|
|
"per_branch": per_branch_summary,
|
|
"missing": {
|
|
"contacts_in_branch_not_in_brand": {
|
|
"total": mib_total,
|
|
"items": missing_in_brand_lim,
|
|
"truncated": mib_trunc,
|
|
},
|
|
"contacts_in_brand_not_in_assigned_branch": {
|
|
"total": mia_total,
|
|
"items": missing_in_assigned_lim,
|
|
"truncated": mia_trunc,
|
|
},
|
|
"contacts_in_brand_present_in_other_branch_not_assigned": {
|
|
"total": pother_total,
|
|
"items": present_other_lim,
|
|
"truncated": pother_trunc,
|
|
},
|
|
"contacts_in_brand_probable_duplicate": {
|
|
"total": probable_dup_total,
|
|
"items": probable_dup_lim,
|
|
"truncated": probable_dup_trunc,
|
|
},
|
|
"contacts_in_brand_without_tienda": {
|
|
"total": bwt_total,
|
|
"items": brand_without_tienda_lim,
|
|
"truncated": bwt_trunc,
|
|
},
|
|
"contacts_in_brand_with_unknown_tienda": {
|
|
"total": but_total,
|
|
"items": brand_unknown_tienda_lim,
|
|
"truncated": but_trunc,
|
|
},
|
|
"contacts_in_brand_not_in_any_branch": {
|
|
"total": bna_total,
|
|
"items": brand_not_any_lim,
|
|
"truncated": bna_trunc,
|
|
},
|
|
"opportunities_in_branch_not_in_brand": {
|
|
"total": mo_total,
|
|
"items": missing_opps_lim,
|
|
"truncated": mo_trunc,
|
|
},
|
|
"opportunities_missing_id_field": {
|
|
"total": omif_total,
|
|
"items": opps_missing_id_lim,
|
|
"truncated": omif_trunc,
|
|
},
|
|
"opportunities_in_brand_duplicate_link": {
|
|
"total": dup_link_total,
|
|
"items": dup_link_lim,
|
|
"truncated": dup_link_trunc,
|
|
"group_count": duplicate_link_group_count,
|
|
"extra_opps": duplicate_link_extra,
|
|
},
|
|
"contacts_missing_id_field": {
|
|
"total": cmif_total,
|
|
"items": contacts_missing_id_lim,
|
|
"truncated": cmif_trunc,
|
|
},
|
|
"intra_brand_duplicates": {
|
|
"total": dup_total,
|
|
"items": dup_lim,
|
|
"truncated": dup_trunc,
|
|
"group_count": group_count,
|
|
},
|
|
},
|
|
"meta": {
|
|
"brand_tienda_field_id": brand_tienda_field_id,
|
|
"verifier_loaded": bool(verifier_by_loc),
|
|
"verifier_entries": len(verifier_by_loc),
|
|
"brand_present_in_any_branch": brand_present_in_any_branch,
|
|
},
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI / impresion
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def print_report(data, show_missing=False, missing_cap=20):
|
|
t = data["totals"]
|
|
b = t["brand"]
|
|
a = t["branches_aggregate"]
|
|
d = t["diff"]
|
|
|
|
safe_print("=" * 72)
|
|
safe_print("COMPARATIVA MARCA vs SUCURSALES (excluye cuentas demo)")
|
|
safe_print("=" * 72)
|
|
safe_print(f" Marca: {b['name']} ({b['location_id']})")
|
|
safe_print(f" Contactos : {b['contacts']:>8}")
|
|
safe_print(f" Oportunidades : {b['opportunities']:>8}")
|
|
safe_print(f" Sucursales activas: {a['branch_count']}")
|
|
safe_print(f" Contactos suma : {a['contacts']:>8}")
|
|
safe_print(f" Oportunidades : {a['opportunities']:>8}")
|
|
safe_print("-" * 72)
|
|
status_c = "OK (iguales)" if d["contacts_match"] else f"DESCUADRE: {d['contacts']:+}"
|
|
status_o = "OK (iguales)" if d["opportunities_match"] else f"DESCUADRE: {d['opportunities']:+}"
|
|
safe_print(f" Diff contactos : {status_c}")
|
|
safe_print(f" Diff oportunidades : {status_o}")
|
|
|
|
demos = data["demos_excluded"]
|
|
if demos:
|
|
safe_print("-" * 72)
|
|
safe_print(f" Cuentas demo excluidas ({len(demos)}):")
|
|
for d_acc in demos:
|
|
safe_print(f" - {d_acc['name']} ({d_acc['location_id']})")
|
|
|
|
safe_print("=" * 72)
|
|
safe_print("Desglose por sucursal:")
|
|
safe_print(f" {'Sucursal':<45} {'Cont.':>8} {'Opps':>8}")
|
|
safe_print(" " + "-" * 64)
|
|
for row in data["per_branch"]:
|
|
name = row["name"][:44]
|
|
safe_print(f" {name:<45} {row['contacts']:>8} {row['opportunities']:>8}")
|
|
|
|
m = data["missing"]
|
|
safe_print("=" * 72)
|
|
safe_print("Resumen de huecos detectados:")
|
|
safe_print(f" Contactos en sucursal sin contraparte en Marca : {m['contacts_in_branch_not_in_brand']['total']}")
|
|
safe_print(f" Contactos en Marca sin presencia en la sucursal asignada : {m['contacts_in_brand_not_in_assigned_branch']['total']}")
|
|
safe_print(f" Probables duplicados en Marca (homonimo en sucursal) : {m['contacts_in_brand_probable_duplicate']['total']}")
|
|
safe_print(f" Contactos en Marca sin TIENDA poblada : {m['contacts_in_brand_without_tienda']['total']}")
|
|
safe_print(f" Contactos en Marca con TIENDA desconocida en verificador : {m['contacts_in_brand_with_unknown_tienda']['total']}")
|
|
safe_print(f" Contactos en Marca sin contraparte en NINGUNA sucursal : {m['contacts_in_brand_not_in_any_branch']['total']}")
|
|
safe_print(f" Oportunidades en sucursal sin replica en Marca : {m['opportunities_in_branch_not_in_brand']['total']}")
|
|
_dup = m.get("opportunities_in_brand_duplicate_link", {})
|
|
safe_print(f" Replicas DUPLICADAS en Marca (mismo ID Opp Sucursal) : {_dup.get('total', 0)} en {_dup.get('group_count', 0)} grupos ({_dup.get('extra_opps', 0)} sobrantes)")
|
|
|
|
if show_missing:
|
|
def dump(title, key, formatter):
|
|
block = m[key]
|
|
if not block["total"]:
|
|
return
|
|
safe_print("-" * 72)
|
|
safe_print(f" {title} ({block['total']} total, mostrando hasta {missing_cap}):")
|
|
for item in block["items"][:missing_cap]:
|
|
safe_print(" - " + formatter(item))
|
|
|
|
dump(
|
|
"Contactos en sucursal no presentes en Marca",
|
|
"contacts_in_branch_not_in_brand",
|
|
lambda i: f"{i['name']} | {i['phone'] or i['email'] or '(sin contacto)'} | sucursal: {i['branch_name']} | opps locales: {i['opps_in_branch']}",
|
|
)
|
|
dump(
|
|
"Contactos en Marca ausentes de su sucursal asignada",
|
|
"contacts_in_brand_not_in_assigned_branch",
|
|
lambda i: f"{i['name']} | tienda='{i.get('tienda')}' | esperado: {i['expected_branch_name']} | opps en marca: {i['opps_in_brand']} | esta en otra sucursal: {i['present_in_other_branch']}",
|
|
)
|
|
dump(
|
|
"Probables duplicados en Marca (existe homonimo con tel/email en sucursal asignada)",
|
|
"contacts_in_brand_probable_duplicate",
|
|
lambda i: f"{i['name']} | tienda='{i.get('tienda')}' | esperado: {i['expected_branch_name']} | brand_phone={i.get('phone')!r} | branch_phone={(i.get('branch_existing_contact') or {}).get('phone')!r}",
|
|
)
|
|
dump(
|
|
"Oportunidades en sucursal sin replica en Marca",
|
|
"opportunities_in_branch_not_in_brand",
|
|
lambda i: f"{i['name']} [{i['status']}] | ${i['monetary_value']:.0f} | contacto: {i['contact_name']} | sucursal: {i['branch_name']} | motivo: {i['reason']}",
|
|
)
|
|
dump(
|
|
"Replicas DUPLICADAS en Marca (mismo ID Oportunidad Sucursal)",
|
|
"opportunities_in_brand_duplicate_link",
|
|
lambda i: f"{i['recommended_action'].upper():>6} | {i['name']} [{i['status']}] | ${i['monetary_value']:.0f} | opp={i['id']} | link={i['link_value']} | origen: {i['branch_name']} | grupo de {i['group_size']}",
|
|
)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("--show-missing", action="store_true", help="Imprime los listados de ausentes.")
|
|
parser.add_argument("--limit-missing", type=int, default=None, help="Limita el listado interno antes de imprimir (default sin limite).")
|
|
parser.add_argument("--missing-cap", type=int, default=20, help="Cuantos items imprimir por listado cuando --show-missing.")
|
|
parser.add_argument("--json", action="store_true", help="Imprime el resultado como JSON en vez del reporte humano.")
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
data = run_audit(limit_missing=args.limit_missing)
|
|
except FileNotFoundError as e:
|
|
safe_print(f"ERROR: {e}")
|
|
sys.exit(2)
|
|
except RuntimeError as e:
|
|
safe_print(f"ERROR: {e}")
|
|
sys.exit(3)
|
|
|
|
if args.json:
|
|
safe_print(json.dumps(data, ensure_ascii=False, indent=2))
|
|
return
|
|
|
|
print_report(data, show_missing=args.show_missing, missing_cap=args.missing_cap)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|