Primer commit
This commit is contained in:
@@ -0,0 +1,360 @@
|
||||
"""Shared helpers for MP scripts.
|
||||
|
||||
Keep GHL custom-field access centralized so scripts resolve dynamic field IDs
|
||||
per location before reading or updating contact/opportunity data.
|
||||
"""
|
||||
|
||||
import difflib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if ROOT_DIR not in sys.path:
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
|
||||
import sync_engine # noqa: E402
|
||||
# Re-export paths so scripts can do `from common import REPORTS_DIR, MIGRATIONS_DIR, ...`
|
||||
# y mantengan una sola fuente de verdad. Ver `paths.py` en la raíz.
|
||||
from paths import ( # noqa: E402,F401
|
||||
GENERATED_DIR,
|
||||
DATA_DIR,
|
||||
DB_PATH,
|
||||
REPORTS_DIR,
|
||||
REPORT_AUDIT_CUSTOM_FIELDS,
|
||||
REPORT_DUPLICADOS,
|
||||
REPORT_DRIFT,
|
||||
REPORT_COVERAGE,
|
||||
EXPORTS_DIR,
|
||||
LOGS_DIR,
|
||||
SCRIPT_RUNS_DIR,
|
||||
MIGRATIONS_DIR,
|
||||
BROWSER_DIR,
|
||||
SESSION_FILE,
|
||||
BROWSER_PROFILE_DEFAULT,
|
||||
SCREENSHOTS_DIR,
|
||||
RUNTIME_DIR,
|
||||
SERVER_INFO,
|
||||
BATCH_DIR,
|
||||
ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
|
||||
|
||||
FIELD_ALIASES = {
|
||||
"sucursal": ["Sucursal", "SUCURSAL"],
|
||||
"tienda": ["TIENDA", "Tienda"],
|
||||
"canal_origen": ["Canal de Origen", "CANAL DE ORIGEN", "Canal de Origen de la Oportunidad"],
|
||||
"fuente_prospecto": ["Fuente de Prospecto", "Fuente del Prospecto", "FUENTE DE PROSPECTO"],
|
||||
"tipo_lead": ["Tipo de Lead", "TIPO DE LEAD", "Lead Type"],
|
||||
"vehiculo": ["Vehículo", "Vehiculo"],
|
||||
"marca_vehiculo": ["Marca del Vehículo", "MARCA DE VEHÍCULO", "Marca del Vehiculo"],
|
||||
"version_vehiculo": ["Versión del Vehículo", "VERSION DEL VEHICULO", "Versión del Vehiculo"],
|
||||
"ano_vehiculo": ["Año del Vehículo", "AÑO DE VEHÍCULO", "Año del Vehiculo"],
|
||||
}
|
||||
|
||||
|
||||
def normalize_name(value):
|
||||
# Mantiene comportamiento original (sin remover acentos) para no afectar a
|
||||
# SchemaResolver, que confía en FIELD_ALIASES para variantes de acentos.
|
||||
# Para comparación de nombres de contactos usar normalize_text.
|
||||
return " ".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
_PUNCT_RE = re.compile(r"[^\w\s]", flags=re.UNICODE)
|
||||
|
||||
|
||||
def normalize_text(value):
|
||||
"""Normaliza texto para comparación tolerante (nombres, sucursales, etc.).
|
||||
|
||||
- NFD + remoción de combining chars (quita acentos: á→a, ñ→n)
|
||||
- lower()
|
||||
- quita puntuación, deja palabras y espacios
|
||||
- colapsa espacios
|
||||
|
||||
Idempotente y sin dependencias externas.
|
||||
"""
|
||||
text = unicodedata.normalize("NFD", str(value or ""))
|
||||
text = "".join(ch for ch in text if not unicodedata.combining(ch))
|
||||
text = text.lower()
|
||||
text = _PUNCT_RE.sub(" ", text)
|
||||
return " ".join(text.split())
|
||||
|
||||
|
||||
PHONE_TAIL_LENGTH = 10
|
||||
|
||||
|
||||
def normalize_phone(value):
|
||||
"""Devuelve los últimos 10 dígitos del teléfono, o '' si no llega a 10."""
|
||||
digits = re.sub(r"\D+", "", str(value or ""))
|
||||
if len(digits) < PHONE_TAIL_LENGTH:
|
||||
return ""
|
||||
return digits[-PHONE_TAIL_LENGTH:]
|
||||
|
||||
|
||||
def normalize_email(value):
|
||||
"""NFKC + strip + lower. Devuelve '' si no parece email (sin '@')."""
|
||||
text = unicodedata.normalize("NFKC", str(value or "")).strip().lower()
|
||||
return text if "@" in text else ""
|
||||
|
||||
|
||||
def contact_full_name(contact):
|
||||
"""Resuelve y normaliza el nombre completo de un contacto.
|
||||
|
||||
Acepta diccionarios con keys de GHL (contactName / name / firstName+lastName)
|
||||
o de SQLite (first_name / last_name). Devuelve string normalizado con
|
||||
normalize_text (sin acentos, sin puntuación, lowercase).
|
||||
"""
|
||||
if not isinstance(contact, dict):
|
||||
return normalize_text(contact)
|
||||
full = contact.get("contactName") or contact.get("name")
|
||||
if not full:
|
||||
first = contact.get("firstName") or contact.get("first_name") or ""
|
||||
last = contact.get("lastName") or contact.get("last_name") or ""
|
||||
full = f"{first} {last}"
|
||||
return normalize_text(full)
|
||||
|
||||
|
||||
def _name_tokens(name):
|
||||
return [t for t in normalize_text(name).split() if t]
|
||||
|
||||
|
||||
def name_similarity(a, b):
|
||||
"""Similitud entre dos nombres en rango [0.0, 1.0].
|
||||
|
||||
Combina tres métricas y devuelve el máximo:
|
||||
- Jaccard sobre tokens (tolera orden invertido)
|
||||
- Overlap sobre tokens (tolera palabras omitidas; requiere ≥2 tokens
|
||||
compartidos para evitar falsos positivos por un único token común)
|
||||
- SequenceMatcher.ratio() sobre la cadena completa normalizada
|
||||
(tolera tipos y reordenamientos menores)
|
||||
|
||||
Devuelve 0.0 si alguno de los nombres está vacío post-normalización.
|
||||
"""
|
||||
tokens_a = set(_name_tokens(a))
|
||||
tokens_b = set(_name_tokens(b))
|
||||
if not tokens_a or not tokens_b:
|
||||
return 0.0
|
||||
|
||||
intersection = tokens_a & tokens_b
|
||||
union = tokens_a | tokens_b
|
||||
jaccard = len(intersection) / len(union) if union else 0.0
|
||||
|
||||
if len(intersection) >= 2:
|
||||
overlap = len(intersection) / min(len(tokens_a), len(tokens_b))
|
||||
else:
|
||||
overlap = 0.0
|
||||
|
||||
norm_a = " ".join(sorted(tokens_a))
|
||||
norm_b = " ".join(sorted(tokens_b))
|
||||
ratio = difflib.SequenceMatcher(None, norm_a, norm_b).ratio()
|
||||
|
||||
return max(jaccard, overlap, ratio)
|
||||
|
||||
|
||||
def match_contacts(a, b, threshold=0.80):
|
||||
"""Decide si dos contactos representan a la misma persona.
|
||||
|
||||
Niveles devueltos:
|
||||
- 'strong' : phone idéntico AND email idéntico AND name_similarity ≥ threshold
|
||||
- 'medium' : phone idéntico AND name_similarity ≥ threshold (email puede faltar/diferir)
|
||||
- 'none' : cualquier otro caso. Si comparten phone pero el nombre no
|
||||
alcanza el threshold, reasons incluye 'phone_collision_unresolved'
|
||||
para que los mutadores los reporten en vez de fusionarlos.
|
||||
|
||||
Devuelve dict con keys: level, score, reasons, phone_match, email_match, name_score.
|
||||
|
||||
`a` y `b` son diccionarios de contacto. Se aceptan keys de GHL y de SQLite
|
||||
(phone, email, firstName/lastName o first_name/last_name, contactName/name).
|
||||
"""
|
||||
def _get(contact, *keys):
|
||||
if not isinstance(contact, dict):
|
||||
return ""
|
||||
for key in keys:
|
||||
if contact.get(key):
|
||||
return contact[key]
|
||||
return ""
|
||||
|
||||
phone_a = normalize_phone(_get(a, "phone"))
|
||||
phone_b = normalize_phone(_get(b, "phone"))
|
||||
email_a = normalize_email(_get(a, "email"))
|
||||
email_b = normalize_email(_get(b, "email"))
|
||||
name_a = contact_full_name(a)
|
||||
name_b = contact_full_name(b)
|
||||
|
||||
phone_match = bool(phone_a) and phone_a == phone_b
|
||||
email_match = bool(email_a) and email_a == email_b
|
||||
name_score = name_similarity(name_a, name_b) if name_a and name_b else 0.0
|
||||
|
||||
reasons = []
|
||||
if not (name_a and name_b) and not phone_a and not email_a:
|
||||
reasons.append("incomplete_data")
|
||||
return {
|
||||
"level": "none",
|
||||
"score": 0.0,
|
||||
"reasons": reasons,
|
||||
"phone_match": False,
|
||||
"email_match": False,
|
||||
"name_score": 0.0,
|
||||
}
|
||||
|
||||
if phone_match and email_match and name_score >= threshold:
|
||||
reasons.extend(["phone", "email", "name"])
|
||||
return {
|
||||
"level": "strong",
|
||||
"score": name_score,
|
||||
"reasons": reasons,
|
||||
"phone_match": True,
|
||||
"email_match": True,
|
||||
"name_score": name_score,
|
||||
}
|
||||
|
||||
if phone_match and name_score >= threshold:
|
||||
reasons.extend(["phone", "name"])
|
||||
return {
|
||||
"level": "medium",
|
||||
"score": name_score,
|
||||
"reasons": reasons,
|
||||
"phone_match": True,
|
||||
"email_match": email_match,
|
||||
"name_score": name_score,
|
||||
}
|
||||
|
||||
if phone_match and name_score < threshold:
|
||||
reasons.append("phone_collision_unresolved")
|
||||
|
||||
return {
|
||||
"level": "none",
|
||||
"score": name_score,
|
||||
"reasons": reasons,
|
||||
"phone_match": phone_match,
|
||||
"email_match": email_match,
|
||||
"name_score": name_score,
|
||||
}
|
||||
|
||||
|
||||
def load_accounts(include_main=True):
|
||||
accounts = sync_engine.parse_accounts_csv()
|
||||
if include_main:
|
||||
return accounts
|
||||
return [account for account in accounts if account.get("type") == "branch"]
|
||||
|
||||
|
||||
def select_accounts(location_id=None, all_locations=False, include_main=False):
|
||||
if location_id:
|
||||
matches = [account for account in load_accounts(include_main=True) if account["location_id"] == location_id]
|
||||
if not matches:
|
||||
raise SystemExit(f"Location {location_id} no existe en el CSV de mesa de control")
|
||||
return matches
|
||||
if all_locations:
|
||||
return load_accounts(include_main=include_main)
|
||||
return load_accounts(include_main=include_main)
|
||||
|
||||
|
||||
class SchemaResolver:
|
||||
def __init__(self):
|
||||
self._schemas = {}
|
||||
self._fields = {}
|
||||
|
||||
def get_schema(self, token, location_id, object_key):
|
||||
cache_key = (location_id, object_key)
|
||||
if cache_key not in self._schemas:
|
||||
self._schemas[cache_key] = sync_engine.ghl_client.get_object_schema(token, location_id, object_key)
|
||||
return self._schemas[cache_key]
|
||||
|
||||
def get_fields(self, token, location_id, object_key):
|
||||
# Raw fields list — preserva duplicados (mismo name, IDs distintos).
|
||||
cache_key = (location_id, object_key)
|
||||
if cache_key not in self._fields:
|
||||
self._fields[cache_key] = sync_engine.ghl_client.get_object_schema_fields(token, location_id, object_key)
|
||||
return self._fields[cache_key]
|
||||
|
||||
def get_field_id(self, token, location_id, object_key, field_name_or_alias):
|
||||
# Compat: devuelve un solo ID (el primero de los matches).
|
||||
ids = self.get_field_ids(token, location_id, object_key, field_name_or_alias)
|
||||
return ids[0] if ids else None
|
||||
|
||||
def get_field_ids(self, token, location_id, object_key, field_name_or_alias):
|
||||
"""Devuelve TODOS los field IDs del schema que matcheen con el alias
|
||||
o con el nombre dado. Resuelve duplicados intra-location (mismo name,
|
||||
IDs distintos) y normaliza mayúsculas/acentos/espacios.
|
||||
"""
|
||||
candidate_names = FIELD_ALIASES.get(field_name_or_alias, [field_name_or_alias])
|
||||
target_norm = {normalize_name(name) for name in candidate_names}
|
||||
fields = self.get_fields(token, location_id, object_key)
|
||||
matches = []
|
||||
for field in fields:
|
||||
fname = field.get("name")
|
||||
fid = field.get("id")
|
||||
if not fname or not fid:
|
||||
continue
|
||||
if normalize_name(fname) in target_norm:
|
||||
matches.append(fid)
|
||||
return matches
|
||||
|
||||
def resolve_required(self, token, location_id, object_key, field_names_or_aliases):
|
||||
resolved = {}
|
||||
missing = []
|
||||
for field_name in field_names_or_aliases:
|
||||
field_id = self.get_field_id(token, location_id, object_key, field_name)
|
||||
if field_id:
|
||||
resolved[field_name] = field_id
|
||||
else:
|
||||
missing.append(field_name)
|
||||
return resolved, missing
|
||||
|
||||
|
||||
def parse_custom_fields(value):
|
||||
if not value:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except Exception:
|
||||
return []
|
||||
return parsed if isinstance(parsed, list) else []
|
||||
|
||||
|
||||
def get_custom_field_value(record_or_json, field_id):
|
||||
# GHL devuelve customFields con keys distintas según el objeto:
|
||||
# contacts → "value"
|
||||
# opportunities → "fieldValue"
|
||||
# Algunos endpoints históricos también devuelven "fieldValueString".
|
||||
if not field_id:
|
||||
return None
|
||||
if isinstance(record_or_json, dict):
|
||||
custom_fields = record_or_json.get("customFields") or record_or_json.get("custom_fields") or []
|
||||
else:
|
||||
custom_fields = parse_custom_fields(record_or_json)
|
||||
for field in custom_fields or []:
|
||||
if field.get("id") != field_id and field.get("fieldId") != field_id:
|
||||
continue
|
||||
for key in ("value", "fieldValue", "fieldValueString"):
|
||||
val = field.get(key)
|
||||
if val is not None:
|
||||
return val
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def custom_fields_by_name(record_or_json, id_to_name):
|
||||
result = {}
|
||||
custom_fields = record_or_json.get("customFields", []) if isinstance(record_or_json, dict) else parse_custom_fields(record_or_json)
|
||||
for field in custom_fields or []:
|
||||
field_id = field.get("id") or field.get("fieldId")
|
||||
field_name = id_to_name.get(field_id)
|
||||
if field_name:
|
||||
result[field_name] = field.get("value")
|
||||
return result
|
||||
|
||||
|
||||
def build_custom_fields_payload(field_values_by_name, schema):
|
||||
payload = []
|
||||
for field_name, value in field_values_by_name.items():
|
||||
field_id = schema.get(field_name)
|
||||
if field_id:
|
||||
payload.append({"id": field_id, "value": value})
|
||||
return payload
|
||||
Reference in New Issue
Block a user