560 lines
23 KiB
Python
560 lines
23 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""audit_brand_sucursal_vs_form.py
|
|
|
|
Read-only. Compara el campo 'Sucursal' que el contacto envio originalmente en
|
|
el formulario (cuenta de Marca) contra el valor actual del campo 'Sucursal' en
|
|
el contacto de Marca. Usa similitud difusa para tolerar abreviaciones de
|
|
estado de versiones viejas del form (ej. 'QRO' vs 'Queretaro, Queretaro').
|
|
|
|
Requisitos:
|
|
1. Haber corrido el sync global (Sincronizar Todo) para tener contactos.
|
|
2. Haber corrido `python scripts/sync_forms_brand.py` para tener
|
|
form_submissions en SQLite.
|
|
|
|
Buckets (por defecto):
|
|
- OK (similitud >= 0.60): el form y Marca apuntan a la misma sucursal o
|
|
a una abreviacion razonable.
|
|
- VERIFICAR (0.30 <= similitud < 0.60): nombres parecidos pero no
|
|
identicos; conviene revisar manualmente.
|
|
- DISCREPANCIA (similitud < 0.30): el contacto deberia estar en otra
|
|
sucursal segun lo que el mismo cliente puso en el formulario.
|
|
|
|
Casos especiales:
|
|
- Contacto sin campo Sucursal en Marca: bucket CONTACTO_SIN_SUCURSAL.
|
|
- Submission con sucursal_value vacio: se ignora (no aporta evidencia).
|
|
- Multiples submissions por contacto: usamos el ULTIMO (createdAt mas
|
|
reciente) porque refleja la intencion mas actual del cliente.
|
|
|
|
Uso:
|
|
python scripts/audit_brand_sucursal_vs_form.py
|
|
python scripts/audit_brand_sucursal_vs_form.py --filter-marca queretaro
|
|
python scripts/audit_brand_sucursal_vs_form.py --show all
|
|
python scripts/audit_brand_sucursal_vs_form.py --xlsx
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import unicodedata
|
|
from collections import Counter, defaultdict
|
|
from datetime import datetime
|
|
from difflib import SequenceMatcher
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if ROOT_DIR not in sys.path:
|
|
sys.path.insert(0, ROOT_DIR)
|
|
|
|
import db # noqa: E402
|
|
|
|
from common import REPORT_DRIFT # noqa: E402
|
|
|
|
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
|
|
EXPORTS_DIR = REPORT_DRIFT # mantenemos el nombre local por compat con el resto del script
|
|
|
|
# --- Helpers de normalizacion -------------------------------------------------
|
|
|
|
# Estados mexicanos con abreviaciones comunes que aparecen en forms viejos.
|
|
STATE_ABBR = {
|
|
"ags": "aguascalientes",
|
|
"bc": "baja california",
|
|
"bcs": "baja california sur",
|
|
"camp": "campeche",
|
|
"chis": "chiapas",
|
|
"chih": "chihuahua",
|
|
"coah": "coahuila",
|
|
"col": "colima",
|
|
"cdmx": "ciudad de mexico",
|
|
"df": "ciudad de mexico",
|
|
"dgo": "durango",
|
|
"edomex": "estado de mexico",
|
|
"mex": "estado de mexico",
|
|
"gto": "guanajuato",
|
|
"gro": "guerrero",
|
|
"hgo": "hidalgo",
|
|
"jal": "jalisco",
|
|
"mich": "michoacan",
|
|
"mor": "morelos",
|
|
"nay": "nayarit",
|
|
"nl": "nuevo leon",
|
|
"oax": "oaxaca",
|
|
"pue": "puebla",
|
|
"qro": "queretaro",
|
|
"qroo": "quintana roo",
|
|
"slp": "san luis potosi",
|
|
"sin": "sinaloa",
|
|
"son": "sonora",
|
|
"tab": "tabasco",
|
|
"tamps": "tamaulipas",
|
|
"tlax": "tlaxcala",
|
|
"ver": "veracruz",
|
|
"yuc": "yucatan",
|
|
"zac": "zacatecas",
|
|
}
|
|
|
|
|
|
def strip_accents(text):
|
|
if not text:
|
|
return ""
|
|
t = unicodedata.normalize("NFKD", str(text))
|
|
return "".join(ch for ch in t if not unicodedata.combining(ch))
|
|
|
|
|
|
def normalize_sucursal(value):
|
|
"""Normaliza un string de sucursal:
|
|
- minusculas, sin acentos, sin puntuacion
|
|
- expande abreviaciones de estado conocidas
|
|
- resultado es lista de tokens unicos ordenados
|
|
"""
|
|
if value is None:
|
|
return ""
|
|
text = strip_accents(str(value)).lower()
|
|
text = re.sub(r"[^\w\s]", " ", text)
|
|
tokens = text.split()
|
|
expanded = []
|
|
for tok in tokens:
|
|
if tok in STATE_ABBR:
|
|
expanded.extend(STATE_ABBR[tok].split())
|
|
else:
|
|
expanded.append(tok)
|
|
# quitar duplicados y stopwords irrelevantes
|
|
stop = {"de", "la", "el", "los", "las", "mp", "monte", "providencia"}
|
|
cleaned = [t for t in expanded if t and t not in stop]
|
|
return " ".join(cleaned)
|
|
|
|
|
|
def similarity(a, b):
|
|
"""Devuelve un ratio 0-1. Prioriza overlap de tokens (Jaccard) sobre
|
|
coincidencia caracter-a-caracter, porque dos sucursales completamente
|
|
distintas (Puebla vs Queretaro) pueden compartir letras pero ningun token.
|
|
|
|
Reglas:
|
|
- Mismo string normalizado -> 1.0
|
|
- Jaccard de tokens == 0 (disjuntas) -> max 0.25 (forzamos DISCREPANCIA)
|
|
- Si tokens se sobreponen -> promedio 50/50 Jaccard + SequenceMatcher
|
|
"""
|
|
na, nb = normalize_sucursal(a), normalize_sucursal(b)
|
|
if not na or not nb:
|
|
return 0.0
|
|
if na == nb:
|
|
return 1.0
|
|
ta, tb = set(na.split()), set(nb.split())
|
|
if not ta or not tb:
|
|
return 0.0
|
|
jaccard = len(ta & tb) / len(ta | tb)
|
|
seq = SequenceMatcher(None, na, nb).ratio()
|
|
if jaccard == 0:
|
|
# Sin ningun token en comun -> claramente sucursales distintas.
|
|
# Cap a 0.25 para que caiga en DISCREPANCIA. Conservamos algo de seq
|
|
# para distinguir errores tipograficos cercanos de errores burdos.
|
|
return min(0.25, seq * 0.3)
|
|
return 0.5 * jaccard + 0.5 * seq
|
|
|
|
|
|
# --- Clasificacion de origen del contacto ------------------------------------
|
|
# Regla (priorizada de mas a menos confiable):
|
|
# 1. Si tiene submission en form_submissions + tag 'formulario' -> FORMULARIO (evidencia dura)
|
|
# 2. Si tiene tag 'formulario' (sin submission registrado) -> FORMULARIO_SIN_RASTRO
|
|
# 3. Si tiene tag 'sucursal' (sin tag 'formulario') -> SUCURSAL
|
|
# 4. Si source nativo contiene 'web user' -> SUCURSAL (confirma sucursal)
|
|
# 5. Si source contiene facebook/instagram/ads/google/lead -> DIGITAL
|
|
# 6. Si source contiene 'integration' -> INTEGRATION (probable reajuste)
|
|
# 7. Sin senales claras -> DESCONOCIDO
|
|
|
|
DIGITAL_KEYWORDS = ("facebook", "instagram", "google", "tiktok", "youtube",
|
|
"ads", "lead", "landing", "messenger", "linkedin")
|
|
|
|
|
|
def classify_origin(contact_tags, contact_source, has_form_submission):
|
|
tags_lower = {str(t).strip().lower() for t in (contact_tags or []) if t}
|
|
source_lower = str(contact_source or "").strip().lower()
|
|
has_form_tag = "formulario" in tags_lower
|
|
has_suc_tag = "sucursal" in tags_lower
|
|
|
|
if has_form_tag and has_form_submission:
|
|
return "FORMULARIO"
|
|
if has_form_tag:
|
|
return "FORMULARIO_SIN_RASTRO"
|
|
if has_suc_tag and "formulario" not in tags_lower:
|
|
return "SUCURSAL"
|
|
if "web user" in source_lower or "webuser" in source_lower:
|
|
return "SUCURSAL"
|
|
if "sucursal" in source_lower:
|
|
return "SUCURSAL"
|
|
if "form" in source_lower:
|
|
# source = "Formulario" / "Formulario - Sitio Web" sin tag formulario
|
|
return "FORMULARIO_SIN_RASTRO" if not has_form_submission else "FORMULARIO"
|
|
if any(k in source_lower for k in DIGITAL_KEYWORDS):
|
|
return "DIGITAL"
|
|
if "integration" in source_lower:
|
|
return "INTEGRATION"
|
|
return "DESCONOCIDO"
|
|
|
|
|
|
def origin_confidence(origin):
|
|
"""Cuanto pesa la evidencia que aporta el origen al juicio sobre Sucursal."""
|
|
return {
|
|
"FORMULARIO": "alta", # el cliente mismo eligio la sucursal
|
|
"FORMULARIO_SIN_RASTRO": "media",
|
|
"SUCURSAL": "alta", # se creo manualmente en esa sucursal
|
|
"DIGITAL": "media",
|
|
"INTEGRATION": "baja", # probable reajuste historico
|
|
"DESCONOCIDO": "baja",
|
|
}.get(origin, "baja")
|
|
|
|
|
|
def parse_tags(tags_value):
|
|
"""tags se guarda como JSON string en SQLite."""
|
|
if not tags_value:
|
|
return []
|
|
if isinstance(tags_value, list):
|
|
return tags_value
|
|
try:
|
|
parsed = json.loads(tags_value)
|
|
return parsed if isinstance(parsed, list) else []
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
# --- Extraccion de Sucursal del contacto en Marca ----------------------------
|
|
|
|
SUCURSAL_CANONICAL = "sucursal"
|
|
|
|
|
|
def extract_custom_value(field):
|
|
for k in ("value", "fieldValueString", "fieldValueDate",
|
|
"fieldValueNumber", "fieldValueArray",
|
|
"fieldValueOptions", "fieldValueFile"):
|
|
v = field.get(k)
|
|
if v is None:
|
|
continue
|
|
if isinstance(v, str) and not v.strip():
|
|
continue
|
|
if isinstance(v, (list, dict)) and not v:
|
|
continue
|
|
return v
|
|
return None
|
|
|
|
|
|
def resolve_sucursal_field_id_from_db(location_id):
|
|
"""Lee object_schemas (poblada por el sync) para mapear nombre -> id sin
|
|
pegarle a GHL otra vez."""
|
|
conn = db.get_db_connection()
|
|
try:
|
|
rows = conn.execute(
|
|
"SELECT field_id, field_name FROM object_schemas WHERE location_id=? AND object_key='contact'",
|
|
(location_id,)
|
|
).fetchall()
|
|
for r in rows:
|
|
if strip_accents(r["field_name"]).lower().strip() == SUCURSAL_CANONICAL:
|
|
return r["field_id"]
|
|
return None
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_contact_sucursal(contact_row, sucursal_field_id):
|
|
if not sucursal_field_id:
|
|
return None
|
|
try:
|
|
cfs = json.loads(contact_row.get("custom_fields_json") or "[]")
|
|
except Exception:
|
|
return None
|
|
if not isinstance(cfs, list):
|
|
return None
|
|
for f in cfs:
|
|
if not isinstance(f, dict):
|
|
continue
|
|
fid = f.get("id") or f.get("fieldId")
|
|
if fid == sucursal_field_id:
|
|
v = extract_custom_value(f)
|
|
if isinstance(v, list):
|
|
return ", ".join(str(x) for x in v if x is not None) or None
|
|
if v is not None:
|
|
return str(v).strip() or None
|
|
return None
|
|
|
|
|
|
# --- Buckets y reporte -------------------------------------------------------
|
|
|
|
def categorize(sim, ok_threshold, verify_threshold):
|
|
if sim >= ok_threshold:
|
|
return "OK"
|
|
if sim >= verify_threshold:
|
|
return "VERIFICAR"
|
|
return "DISCREPANCIA"
|
|
|
|
|
|
def latest_submission_per_contact(submissions):
|
|
"""Mantiene la submission mas reciente por contact_id (cuando trae sucursal)."""
|
|
by_contact = {}
|
|
for s in submissions:
|
|
cid = s.get("contact_id")
|
|
if not cid:
|
|
continue
|
|
if not s.get("sucursal_value"):
|
|
continue
|
|
prev = by_contact.get(cid)
|
|
if prev is None or (s.get("created_at") or "") > (prev.get("created_at") or ""):
|
|
by_contact[cid] = s
|
|
return by_contact
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
parser.add_argument("--location", default=BRAND_LOCATION_ID,
|
|
help=f"Location a auditar. Default: Marca ({BRAND_LOCATION_ID})")
|
|
parser.add_argument("--ok-threshold", type=float, default=0.60,
|
|
help="Similitud minima para considerar OK. Default 0.60")
|
|
parser.add_argument("--verify-threshold", type=float, default=0.30,
|
|
help="Similitud minima para 'verificar'. <esto = DISCREPANCIA. Default 0.30")
|
|
parser.add_argument("--show", choices=["discrepancia", "verificar", "all", "none"],
|
|
default="discrepancia",
|
|
help="Que detalle imprimir. Default: solo DISCREPANCIA.")
|
|
parser.add_argument("--filter-marca",
|
|
help="Filtra solo contactos cuya Sucursal en Marca contenga este texto (case-insensitive, sin acentos). Ej: 'queretaro'")
|
|
parser.add_argument("--origin",
|
|
help="Filtra el detalle a uno o varios origenes (coma-separados): "
|
|
"FORMULARIO, FORMULARIO_SIN_RASTRO, SUCURSAL, DIGITAL, INTEGRATION, DESCONOCIDO")
|
|
parser.add_argument("--xlsx", dest="xlsx_path", nargs="?", const="",
|
|
help="Exporta Excel. Sin argumento guarda en exports/ con timestamp.")
|
|
args = parser.parse_args()
|
|
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
if not (0 <= args.verify_threshold < args.ok_threshold <= 1):
|
|
raise SystemExit("Thresholds invalidos: 0 <= verify < ok <= 1")
|
|
|
|
location_id = args.location
|
|
|
|
# 1. submissions con sucursal (puede estar vacio, no es fatal: el audit
|
|
# entonces se basa solo en tags + source para clasificar origen)
|
|
submissions = db.get_form_submissions(location_id=location_id, with_sucursal_only=True)
|
|
latest_by_contact = latest_submission_per_contact(submissions)
|
|
print(f"Submissions con sucursal: {len(submissions)} "
|
|
f"(unicos por contacto, ultimo: {len(latest_by_contact)})")
|
|
|
|
# set de contact_ids con submission (para clasificar origen)
|
|
all_subs_any = db.get_form_submissions(location_id=location_id)
|
|
contact_ids_with_submission = {s["contact_id"] for s in all_subs_any if s.get("contact_id")}
|
|
|
|
# 2. contactos de la location
|
|
conn = db.get_db_connection()
|
|
contact_rows = conn.execute(
|
|
"SELECT * FROM contacts WHERE location_id=?", (location_id,)
|
|
).fetchall()
|
|
conn.close()
|
|
contacts_by_id = {r["id"]: dict(r) for r in contact_rows}
|
|
print(f"Contactos en {location_id}: {len(contacts_by_id)}")
|
|
|
|
# 3. field id de Sucursal
|
|
sucursal_fid = resolve_sucursal_field_id_from_db(location_id)
|
|
if not sucursal_fid:
|
|
raise SystemExit(f"No encontre el field 'Sucursal' en object_schemas para {location_id}. "
|
|
"Corre el sync global primero.")
|
|
|
|
# 4. comparar
|
|
filter_norm = strip_accents(args.filter_marca).lower() if args.filter_marca else None
|
|
|
|
buckets = {"OK": [], "VERIFICAR": [], "DISCREPANCIA": [],
|
|
"CONTACTO_SIN_SUCURSAL": [], "CONTACTO_NO_EN_DB": [],
|
|
"SIN_SUBMISSION": []} # contactos sin form submission, solo clasificados por origen
|
|
by_marca_sucursal = defaultdict(lambda: Counter())
|
|
origin_counter = Counter()
|
|
origin_x_bucket = defaultdict(Counter) # origin -> bucket -> count
|
|
|
|
def build_row(contact, cid, sub):
|
|
marca_val = get_contact_sucursal(contact, sucursal_fid)
|
|
tags = parse_tags(contact.get("tags"))
|
|
source = contact.get("source")
|
|
has_sub = cid in contact_ids_with_submission
|
|
origin = classify_origin(tags, source, has_sub)
|
|
return {
|
|
"contact_id": cid,
|
|
"name": (contact.get("first_name") or "") + " " + (contact.get("last_name") or ""),
|
|
"email": contact.get("email"),
|
|
"phone": contact.get("phone"),
|
|
"tags": tags,
|
|
"source": source,
|
|
"origin": origin,
|
|
"origin_confidence": origin_confidence(origin),
|
|
"marca_sucursal": marca_val,
|
|
"form_sucursal": (sub.get("sucursal_value") if sub else None) or None,
|
|
"similarity": None,
|
|
"bucket": None,
|
|
"submission_at": (sub.get("created_at") if sub else None),
|
|
"submission_id": (sub.get("id") if sub else None),
|
|
}
|
|
|
|
# 4a. Contactos CON submission: comparamos form vs marca
|
|
contacts_evaluated_via_form = set()
|
|
for cid, sub in latest_by_contact.items():
|
|
contact = contacts_by_id.get(cid)
|
|
form_val = sub.get("sucursal_value") or ""
|
|
if not contact:
|
|
buckets["CONTACTO_NO_EN_DB"].append({
|
|
"contact_id": cid,
|
|
"name": sub.get("name"),
|
|
"email": sub.get("email"),
|
|
"phone": sub.get("phone"),
|
|
"form_sucursal": form_val,
|
|
"submission_at": sub.get("created_at"),
|
|
"origin": "FORMULARIO",
|
|
"origin_confidence": "alta",
|
|
})
|
|
continue
|
|
row_data = build_row(contact, cid, sub)
|
|
if filter_norm and (not row_data["marca_sucursal"] or
|
|
filter_norm not in strip_accents(row_data["marca_sucursal"]).lower()):
|
|
continue
|
|
if not row_data["marca_sucursal"]:
|
|
row_data["bucket"] = "CONTACTO_SIN_SUCURSAL"
|
|
buckets["CONTACTO_SIN_SUCURSAL"].append(row_data)
|
|
origin_x_bucket[row_data["origin"]]["CONTACTO_SIN_SUCURSAL"] += 1
|
|
origin_counter[row_data["origin"]] += 1
|
|
contacts_evaluated_via_form.add(cid)
|
|
continue
|
|
sim = similarity(form_val, row_data["marca_sucursal"])
|
|
bucket = categorize(sim, args.ok_threshold, args.verify_threshold)
|
|
row_data["similarity"] = round(sim, 3)
|
|
row_data["bucket"] = bucket
|
|
buckets[bucket].append(row_data)
|
|
by_marca_sucursal[strip_accents(row_data["marca_sucursal"]).lower()][bucket] += 1
|
|
origin_x_bucket[row_data["origin"]][bucket] += 1
|
|
origin_counter[row_data["origin"]] += 1
|
|
contacts_evaluated_via_form.add(cid)
|
|
|
|
# 4b. Contactos SIN submission: solo clasificamos por origen (no hay form
|
|
# para comparar contra marca). Util para entender el universo total.
|
|
for cid, contact in contacts_by_id.items():
|
|
if cid in contacts_evaluated_via_form:
|
|
continue
|
|
row_data = build_row(contact, cid, None)
|
|
if filter_norm and (not row_data["marca_sucursal"] or
|
|
filter_norm not in strip_accents(row_data["marca_sucursal"]).lower()):
|
|
continue
|
|
row_data["bucket"] = "SIN_SUBMISSION"
|
|
buckets["SIN_SUBMISSION"].append(row_data)
|
|
origin_x_bucket[row_data["origin"]]["SIN_SUBMISSION"] += 1
|
|
origin_counter[row_data["origin"]] += 1
|
|
|
|
# 5. resumen
|
|
total_with_form = sum(len(buckets[k]) for k in ("OK", "VERIFICAR", "DISCREPANCIA",
|
|
"CONTACTO_SIN_SUCURSAL"))
|
|
total_overall = total_with_form + len(buckets["SIN_SUBMISSION"]) + len(buckets["CONTACTO_NO_EN_DB"])
|
|
print("\n" + "=" * 72)
|
|
print("RESUMEN (con submission del formulario = evidencia dura)")
|
|
print("=" * 72)
|
|
print(f"Contactos con submission y evaluados: {total_with_form}")
|
|
print(f" OK (>= {args.ok_threshold:.2f}): {len(buckets['OK'])}")
|
|
print(f" VERIFICAR ({args.verify_threshold:.2f}-{args.ok_threshold:.2f}): {len(buckets['VERIFICAR'])}")
|
|
print(f" DISCREPANCIA (< {args.verify_threshold:.2f}): {len(buckets['DISCREPANCIA'])}")
|
|
print(f" Contacto sin Sucursal en Marca: {len(buckets['CONTACTO_SIN_SUCURSAL'])}")
|
|
print(f"\nContactos SIN submission (clasificados solo por origen): {len(buckets['SIN_SUBMISSION'])}")
|
|
print(f"Submission sin contacto en DB: {len(buckets['CONTACTO_NO_EN_DB'])}")
|
|
print(f"\nTotal universo: {total_overall}")
|
|
|
|
# Desglose por origen probable
|
|
if origin_counter:
|
|
print("\n" + "-" * 72)
|
|
print("ORIGEN PROBABLE (tags + source nativo + submission)")
|
|
print("-" * 72)
|
|
for origin, n in origin_counter.most_common():
|
|
conf = origin_confidence(origin)
|
|
breakdown = origin_x_bucket[origin]
|
|
parts = [f"{b}={c}" for b, c in breakdown.most_common()]
|
|
print(f" {origin:24s} ({conf:5s} confianza): {n:4d} [{', '.join(parts)}]")
|
|
|
|
if by_marca_sucursal:
|
|
print("\n" + "-" * 72)
|
|
print("DESGLOSE POR SUCURSAL EN MARCA (solo contactos con submission)")
|
|
print("-" * 72)
|
|
for marca_suc, cnt in sorted(by_marca_sucursal.items(), key=lambda x: -sum(x[1].values())):
|
|
total_suc = sum(cnt.values())
|
|
print(f" {marca_suc!r}: {total_suc} "
|
|
f"(OK={cnt.get('OK',0)}, VERIF={cnt.get('VERIFICAR',0)}, DISCREP={cnt.get('DISCREPANCIA',0)})")
|
|
|
|
# 6. detalle
|
|
sections = []
|
|
if args.show == "all":
|
|
sections = ["DISCREPANCIA", "VERIFICAR", "OK", "CONTACTO_SIN_SUCURSAL",
|
|
"SIN_SUBMISSION", "CONTACTO_NO_EN_DB"]
|
|
elif args.show == "discrepancia":
|
|
sections = ["DISCREPANCIA"]
|
|
elif args.show == "verificar":
|
|
sections = ["DISCREPANCIA", "VERIFICAR"]
|
|
|
|
origin_filter = set(o.strip().upper() for o in (args.origin or "").split(",") if o.strip())
|
|
|
|
for sec in sections:
|
|
rows = buckets[sec]
|
|
if origin_filter:
|
|
rows = [r for r in rows if (r.get("origin") or "").upper() in origin_filter]
|
|
if not rows:
|
|
continue
|
|
print("\n" + "-" * 72)
|
|
print(f"{sec} ({len(rows)})")
|
|
print("-" * 72)
|
|
for r in rows:
|
|
name = (r.get("name") or "").strip() or "(sin nombre)"
|
|
sim_str = f"sim={r['similarity']:.2f}" if r.get("similarity") is not None else "sim=N/A"
|
|
origin = r.get("origin", "?")
|
|
conf = r.get("origin_confidence", "?")
|
|
print(f" {name} [{sim_str}] origen={origin} ({conf})")
|
|
print(f" contact_id: {r['contact_id']} phone={r.get('phone')!r} email={r.get('email')!r}")
|
|
print(f" tags={r.get('tags')!r} source={r.get('source')!r}")
|
|
print(f" Marca dice: {r.get('marca_sucursal')!r}")
|
|
if r.get("form_sucursal"):
|
|
print(f" Form dijo: {r['form_sucursal']!r} (submission_at={r.get('submission_at')})")
|
|
else:
|
|
print(f" Form: (sin submission registrado)")
|
|
|
|
# 7. xlsx opcional
|
|
if args.xlsx_path is not None:
|
|
try:
|
|
from openpyxl import Workbook
|
|
except ImportError:
|
|
print("\nWARN: openpyxl no esta instalado; salteo --xlsx")
|
|
else:
|
|
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
|
if args.xlsx_path:
|
|
path = args.xlsx_path
|
|
if not os.path.isabs(path) and not os.sep in path:
|
|
base, ext = os.path.splitext(path)
|
|
if not ext:
|
|
ext = ".xlsx"
|
|
path = os.path.join(EXPORTS_DIR, base + ext)
|
|
else:
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
path = os.path.join(EXPORTS_DIR, f"sucursal_vs_form_{ts}.xlsx")
|
|
|
|
wb = Workbook()
|
|
wb.remove(wb.active)
|
|
header = ["bucket", "similarity", "origin", "origin_confidence",
|
|
"contact_id", "name", "email", "phone",
|
|
"tags", "source", "marca_sucursal", "form_sucursal",
|
|
"submission_at", "submission_id"]
|
|
for sec in ["DISCREPANCIA", "VERIFICAR", "OK", "CONTACTO_SIN_SUCURSAL",
|
|
"SIN_SUBMISSION", "CONTACTO_NO_EN_DB"]:
|
|
ws = wb.create_sheet(sec[:31])
|
|
ws.append(header)
|
|
for r in buckets[sec]:
|
|
ws.append([
|
|
r.get("bucket"), r.get("similarity"),
|
|
r.get("origin"), r.get("origin_confidence"),
|
|
r.get("contact_id"), r.get("name"),
|
|
r.get("email"), r.get("phone"),
|
|
",".join(r.get("tags") or []) if isinstance(r.get("tags"), list) else r.get("tags"),
|
|
r.get("source"),
|
|
r.get("marca_sucursal"), r.get("form_sucursal"),
|
|
r.get("submission_at"), r.get("submission_id"),
|
|
])
|
|
wb.save(path)
|
|
print(f"\nExcel exportado: {path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|