Primer commit

This commit is contained in:
2026-05-30 14:31:19 -06:00
commit a35d26fac0
277 changed files with 265240 additions and 0 deletions
+360
View File
@@ -0,0 +1,360 @@
"""Shared helpers for MP scripts.
Keep GHL custom-field access centralized so scripts resolve dynamic field IDs
per location before reading or updating contact/opportunity data.
"""
import difflib
import json
import os
import re
import sys
import unicodedata
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT_DIR not in sys.path:
sys.path.insert(0, ROOT_DIR)
import sync_engine # noqa: E402
# Re-export paths so scripts can do `from common import REPORTS_DIR, MIGRATIONS_DIR, ...`
# y mantengan una sola fuente de verdad. Ver `paths.py` en la raíz.
from paths import ( # noqa: E402,F401
GENERATED_DIR,
DATA_DIR,
DB_PATH,
REPORTS_DIR,
REPORT_AUDIT_CUSTOM_FIELDS,
REPORT_DUPLICADOS,
REPORT_DRIFT,
REPORT_COVERAGE,
EXPORTS_DIR,
LOGS_DIR,
SCRIPT_RUNS_DIR,
MIGRATIONS_DIR,
BROWSER_DIR,
SESSION_FILE,
BROWSER_PROFILE_DEFAULT,
SCREENSHOTS_DIR,
RUNTIME_DIR,
SERVER_INFO,
BATCH_DIR,
ARCHIVE_DIR,
)
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
FIELD_ALIASES = {
"sucursal": ["Sucursal", "SUCURSAL"],
"tienda": ["TIENDA", "Tienda"],
"canal_origen": ["Canal de Origen", "CANAL DE ORIGEN", "Canal de Origen de la Oportunidad"],
"fuente_prospecto": ["Fuente de Prospecto", "Fuente del Prospecto", "FUENTE DE PROSPECTO"],
"tipo_lead": ["Tipo de Lead", "TIPO DE LEAD", "Lead Type"],
"vehiculo": ["Vehículo", "Vehiculo"],
"marca_vehiculo": ["Marca del Vehículo", "MARCA DE VEHÍCULO", "Marca del Vehiculo"],
"version_vehiculo": ["Versión del Vehículo", "VERSION DEL VEHICULO", "Versión del Vehiculo"],
"ano_vehiculo": ["Año del Vehículo", "AÑO DE VEHÍCULO", "Año del Vehiculo"],
}
def normalize_name(value):
# Mantiene comportamiento original (sin remover acentos) para no afectar a
# SchemaResolver, que confía en FIELD_ALIASES para variantes de acentos.
# Para comparación de nombres de contactos usar normalize_text.
return " ".join(str(value or "").strip().lower().split())
_PUNCT_RE = re.compile(r"[^\w\s]", flags=re.UNICODE)
def normalize_text(value):
"""Normaliza texto para comparación tolerante (nombres, sucursales, etc.).
- NFD + remoción de combining chars (quita acentos: á→a, ñ→n)
- lower()
- quita puntuación, deja palabras y espacios
- colapsa espacios
Idempotente y sin dependencias externas.
"""
text = unicodedata.normalize("NFD", str(value or ""))
text = "".join(ch for ch in text if not unicodedata.combining(ch))
text = text.lower()
text = _PUNCT_RE.sub(" ", text)
return " ".join(text.split())
PHONE_TAIL_LENGTH = 10
def normalize_phone(value):
"""Devuelve los últimos 10 dígitos del teléfono, o '' si no llega a 10."""
digits = re.sub(r"\D+", "", str(value or ""))
if len(digits) < PHONE_TAIL_LENGTH:
return ""
return digits[-PHONE_TAIL_LENGTH:]
def normalize_email(value):
"""NFKC + strip + lower. Devuelve '' si no parece email (sin '@')."""
text = unicodedata.normalize("NFKC", str(value or "")).strip().lower()
return text if "@" in text else ""
def contact_full_name(contact):
"""Resuelve y normaliza el nombre completo de un contacto.
Acepta diccionarios con keys de GHL (contactName / name / firstName+lastName)
o de SQLite (first_name / last_name). Devuelve string normalizado con
normalize_text (sin acentos, sin puntuación, lowercase).
"""
if not isinstance(contact, dict):
return normalize_text(contact)
full = contact.get("contactName") or contact.get("name")
if not full:
first = contact.get("firstName") or contact.get("first_name") or ""
last = contact.get("lastName") or contact.get("last_name") or ""
full = f"{first} {last}"
return normalize_text(full)
def _name_tokens(name):
return [t for t in normalize_text(name).split() if t]
def name_similarity(a, b):
"""Similitud entre dos nombres en rango [0.0, 1.0].
Combina tres métricas y devuelve el máximo:
- Jaccard sobre tokens (tolera orden invertido)
- Overlap sobre tokens (tolera palabras omitidas; requiere ≥2 tokens
compartidos para evitar falsos positivos por un único token común)
- SequenceMatcher.ratio() sobre la cadena completa normalizada
(tolera tipos y reordenamientos menores)
Devuelve 0.0 si alguno de los nombres está vacío post-normalización.
"""
tokens_a = set(_name_tokens(a))
tokens_b = set(_name_tokens(b))
if not tokens_a or not tokens_b:
return 0.0
intersection = tokens_a & tokens_b
union = tokens_a | tokens_b
jaccard = len(intersection) / len(union) if union else 0.0
if len(intersection) >= 2:
overlap = len(intersection) / min(len(tokens_a), len(tokens_b))
else:
overlap = 0.0
norm_a = " ".join(sorted(tokens_a))
norm_b = " ".join(sorted(tokens_b))
ratio = difflib.SequenceMatcher(None, norm_a, norm_b).ratio()
return max(jaccard, overlap, ratio)
def match_contacts(a, b, threshold=0.80):
"""Decide si dos contactos representan a la misma persona.
Niveles devueltos:
- 'strong' : phone idéntico AND email idéntico AND name_similarity ≥ threshold
- 'medium' : phone idéntico AND name_similarity ≥ threshold (email puede faltar/diferir)
- 'none' : cualquier otro caso. Si comparten phone pero el nombre no
alcanza el threshold, reasons incluye 'phone_collision_unresolved'
para que los mutadores los reporten en vez de fusionarlos.
Devuelve dict con keys: level, score, reasons, phone_match, email_match, name_score.
`a` y `b` son diccionarios de contacto. Se aceptan keys de GHL y de SQLite
(phone, email, firstName/lastName o first_name/last_name, contactName/name).
"""
def _get(contact, *keys):
if not isinstance(contact, dict):
return ""
for key in keys:
if contact.get(key):
return contact[key]
return ""
phone_a = normalize_phone(_get(a, "phone"))
phone_b = normalize_phone(_get(b, "phone"))
email_a = normalize_email(_get(a, "email"))
email_b = normalize_email(_get(b, "email"))
name_a = contact_full_name(a)
name_b = contact_full_name(b)
phone_match = bool(phone_a) and phone_a == phone_b
email_match = bool(email_a) and email_a == email_b
name_score = name_similarity(name_a, name_b) if name_a and name_b else 0.0
reasons = []
if not (name_a and name_b) and not phone_a and not email_a:
reasons.append("incomplete_data")
return {
"level": "none",
"score": 0.0,
"reasons": reasons,
"phone_match": False,
"email_match": False,
"name_score": 0.0,
}
if phone_match and email_match and name_score >= threshold:
reasons.extend(["phone", "email", "name"])
return {
"level": "strong",
"score": name_score,
"reasons": reasons,
"phone_match": True,
"email_match": True,
"name_score": name_score,
}
if phone_match and name_score >= threshold:
reasons.extend(["phone", "name"])
return {
"level": "medium",
"score": name_score,
"reasons": reasons,
"phone_match": True,
"email_match": email_match,
"name_score": name_score,
}
if phone_match and name_score < threshold:
reasons.append("phone_collision_unresolved")
return {
"level": "none",
"score": name_score,
"reasons": reasons,
"phone_match": phone_match,
"email_match": email_match,
"name_score": name_score,
}
def load_accounts(include_main=True):
accounts = sync_engine.parse_accounts_csv()
if include_main:
return accounts
return [account for account in accounts if account.get("type") == "branch"]
def select_accounts(location_id=None, all_locations=False, include_main=False):
if location_id:
matches = [account for account in load_accounts(include_main=True) if account["location_id"] == location_id]
if not matches:
raise SystemExit(f"Location {location_id} no existe en el CSV de mesa de control")
return matches
if all_locations:
return load_accounts(include_main=include_main)
return load_accounts(include_main=include_main)
class SchemaResolver:
def __init__(self):
self._schemas = {}
self._fields = {}
def get_schema(self, token, location_id, object_key):
cache_key = (location_id, object_key)
if cache_key not in self._schemas:
self._schemas[cache_key] = sync_engine.ghl_client.get_object_schema(token, location_id, object_key)
return self._schemas[cache_key]
def get_fields(self, token, location_id, object_key):
# Raw fields list — preserva duplicados (mismo name, IDs distintos).
cache_key = (location_id, object_key)
if cache_key not in self._fields:
self._fields[cache_key] = sync_engine.ghl_client.get_object_schema_fields(token, location_id, object_key)
return self._fields[cache_key]
def get_field_id(self, token, location_id, object_key, field_name_or_alias):
# Compat: devuelve un solo ID (el primero de los matches).
ids = self.get_field_ids(token, location_id, object_key, field_name_or_alias)
return ids[0] if ids else None
def get_field_ids(self, token, location_id, object_key, field_name_or_alias):
"""Devuelve TODOS los field IDs del schema que matcheen con el alias
o con el nombre dado. Resuelve duplicados intra-location (mismo name,
IDs distintos) y normaliza mayúsculas/acentos/espacios.
"""
candidate_names = FIELD_ALIASES.get(field_name_or_alias, [field_name_or_alias])
target_norm = {normalize_name(name) for name in candidate_names}
fields = self.get_fields(token, location_id, object_key)
matches = []
for field in fields:
fname = field.get("name")
fid = field.get("id")
if not fname or not fid:
continue
if normalize_name(fname) in target_norm:
matches.append(fid)
return matches
def resolve_required(self, token, location_id, object_key, field_names_or_aliases):
resolved = {}
missing = []
for field_name in field_names_or_aliases:
field_id = self.get_field_id(token, location_id, object_key, field_name)
if field_id:
resolved[field_name] = field_id
else:
missing.append(field_name)
return resolved, missing
def parse_custom_fields(value):
if not value:
return []
if isinstance(value, list):
return value
try:
parsed = json.loads(value)
except Exception:
return []
return parsed if isinstance(parsed, list) else []
def get_custom_field_value(record_or_json, field_id):
# GHL devuelve customFields con keys distintas según el objeto:
# contacts → "value"
# opportunities → "fieldValue"
# Algunos endpoints históricos también devuelven "fieldValueString".
if not field_id:
return None
if isinstance(record_or_json, dict):
custom_fields = record_or_json.get("customFields") or record_or_json.get("custom_fields") or []
else:
custom_fields = parse_custom_fields(record_or_json)
for field in custom_fields or []:
if field.get("id") != field_id and field.get("fieldId") != field_id:
continue
for key in ("value", "fieldValue", "fieldValueString"):
val = field.get(key)
if val is not None:
return val
return None
return None
def custom_fields_by_name(record_or_json, id_to_name):
result = {}
custom_fields = record_or_json.get("customFields", []) if isinstance(record_or_json, dict) else parse_custom_fields(record_or_json)
for field in custom_fields or []:
field_id = field.get("id") or field.get("fieldId")
field_name = id_to_name.get(field_id)
if field_name:
result[field_name] = field.get("value")
return result
def build_custom_fields_payload(field_values_by_name, schema):
payload = []
for field_name, value in field_values_by_name.items():
field_id = schema.get(field_name)
if field_id:
payload.append({"id": field_id, "value": value})
return payload