"""Shared helpers for MP scripts. Keep GHL custom-field access centralized so scripts resolve dynamic field IDs per location before reading or updating contact/opportunity data. """ import difflib import json import os import re import sys import unicodedata ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if ROOT_DIR not in sys.path: sys.path.insert(0, ROOT_DIR) import sync_engine # noqa: E402 # Re-export paths so scripts can do `from common import REPORTS_DIR, MIGRATIONS_DIR, ...` # y mantengan una sola fuente de verdad. Ver `paths.py` en la raíz. from paths import ( # noqa: E402,F401 GENERATED_DIR, DATA_DIR, DB_PATH, REPORTS_DIR, REPORT_AUDIT_CUSTOM_FIELDS, REPORT_DUPLICADOS, REPORT_DRIFT, REPORT_COVERAGE, EXPORTS_DIR, LOGS_DIR, SCRIPT_RUNS_DIR, MIGRATIONS_DIR, BROWSER_DIR, SESSION_FILE, BROWSER_PROFILE_DEFAULT, SCREENSHOTS_DIR, RUNTIME_DIR, SERVER_INFO, BATCH_DIR, ARCHIVE_DIR, ) BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3" FIELD_ALIASES = { "sucursal": ["Sucursal", "SUCURSAL"], "tienda": ["TIENDA", "Tienda"], "canal_origen": ["Canal de Origen", "CANAL DE ORIGEN", "Canal de Origen de la Oportunidad"], "fuente_prospecto": ["Fuente de Prospecto", "Fuente del Prospecto", "FUENTE DE PROSPECTO"], "tipo_lead": ["Tipo de Lead", "TIPO DE LEAD", "Lead Type"], "vehiculo": ["Vehículo", "Vehiculo"], "marca_vehiculo": ["Marca del Vehículo", "MARCA DE VEHÍCULO", "Marca del Vehiculo"], "version_vehiculo": ["Versión del Vehículo", "VERSION DEL VEHICULO", "Versión del Vehiculo"], "ano_vehiculo": ["Año del Vehículo", "AÑO DE VEHÍCULO", "Año del Vehiculo"], } def normalize_name(value): # Mantiene comportamiento original (sin remover acentos) para no afectar a # SchemaResolver, que confía en FIELD_ALIASES para variantes de acentos. # Para comparación de nombres de contactos usar normalize_text. return " ".join(str(value or "").strip().lower().split()) _PUNCT_RE = re.compile(r"[^\w\s]", flags=re.UNICODE) def normalize_text(value): """Normaliza texto para comparación tolerante (nombres, sucursales, etc.). - NFD + remoción de combining chars (quita acentos: á→a, ñ→n) - lower() - quita puntuación, deja palabras y espacios - colapsa espacios Idempotente y sin dependencias externas. """ text = unicodedata.normalize("NFD", str(value or "")) text = "".join(ch for ch in text if not unicodedata.combining(ch)) text = text.lower() text = _PUNCT_RE.sub(" ", text) return " ".join(text.split()) PHONE_TAIL_LENGTH = 10 def normalize_phone(value): """Devuelve los últimos 10 dígitos del teléfono, o '' si no llega a 10.""" digits = re.sub(r"\D+", "", str(value or "")) if len(digits) < PHONE_TAIL_LENGTH: return "" return digits[-PHONE_TAIL_LENGTH:] def normalize_email(value): """NFKC + strip + lower. Devuelve '' si no parece email (sin '@').""" text = unicodedata.normalize("NFKC", str(value or "")).strip().lower() return text if "@" in text else "" def contact_full_name(contact): """Resuelve y normaliza el nombre completo de un contacto. Acepta diccionarios con keys de GHL (contactName / name / firstName+lastName) o de SQLite (first_name / last_name). Devuelve string normalizado con normalize_text (sin acentos, sin puntuación, lowercase). """ if not isinstance(contact, dict): return normalize_text(contact) full = contact.get("contactName") or contact.get("name") if not full: first = contact.get("firstName") or contact.get("first_name") or "" last = contact.get("lastName") or contact.get("last_name") or "" full = f"{first} {last}" return normalize_text(full) def _name_tokens(name): return [t for t in normalize_text(name).split() if t] def name_similarity(a, b): """Similitud entre dos nombres en rango [0.0, 1.0]. Combina tres métricas y devuelve el máximo: - Jaccard sobre tokens (tolera orden invertido) - Overlap sobre tokens (tolera palabras omitidas; requiere ≥2 tokens compartidos para evitar falsos positivos por un único token común) - SequenceMatcher.ratio() sobre la cadena completa normalizada (tolera tipos y reordenamientos menores) Devuelve 0.0 si alguno de los nombres está vacío post-normalización. """ tokens_a = set(_name_tokens(a)) tokens_b = set(_name_tokens(b)) if not tokens_a or not tokens_b: return 0.0 intersection = tokens_a & tokens_b union = tokens_a | tokens_b jaccard = len(intersection) / len(union) if union else 0.0 if len(intersection) >= 2: overlap = len(intersection) / min(len(tokens_a), len(tokens_b)) else: overlap = 0.0 norm_a = " ".join(sorted(tokens_a)) norm_b = " ".join(sorted(tokens_b)) ratio = difflib.SequenceMatcher(None, norm_a, norm_b).ratio() return max(jaccard, overlap, ratio) def match_contacts(a, b, threshold=0.80): """Decide si dos contactos representan a la misma persona. Niveles devueltos: - 'strong' : phone idéntico AND email idéntico AND name_similarity ≥ threshold - 'medium' : phone idéntico AND name_similarity ≥ threshold (email puede faltar/diferir) - 'none' : cualquier otro caso. Si comparten phone pero el nombre no alcanza el threshold, reasons incluye 'phone_collision_unresolved' para que los mutadores los reporten en vez de fusionarlos. Devuelve dict con keys: level, score, reasons, phone_match, email_match, name_score. `a` y `b` son diccionarios de contacto. Se aceptan keys de GHL y de SQLite (phone, email, firstName/lastName o first_name/last_name, contactName/name). """ def _get(contact, *keys): if not isinstance(contact, dict): return "" for key in keys: if contact.get(key): return contact[key] return "" phone_a = normalize_phone(_get(a, "phone")) phone_b = normalize_phone(_get(b, "phone")) email_a = normalize_email(_get(a, "email")) email_b = normalize_email(_get(b, "email")) name_a = contact_full_name(a) name_b = contact_full_name(b) phone_match = bool(phone_a) and phone_a == phone_b email_match = bool(email_a) and email_a == email_b name_score = name_similarity(name_a, name_b) if name_a and name_b else 0.0 reasons = [] if not (name_a and name_b) and not phone_a and not email_a: reasons.append("incomplete_data") return { "level": "none", "score": 0.0, "reasons": reasons, "phone_match": False, "email_match": False, "name_score": 0.0, } if phone_match and email_match and name_score >= threshold: reasons.extend(["phone", "email", "name"]) return { "level": "strong", "score": name_score, "reasons": reasons, "phone_match": True, "email_match": True, "name_score": name_score, } if phone_match and name_score >= threshold: reasons.extend(["phone", "name"]) return { "level": "medium", "score": name_score, "reasons": reasons, "phone_match": True, "email_match": email_match, "name_score": name_score, } if phone_match and name_score < threshold: reasons.append("phone_collision_unresolved") return { "level": "none", "score": name_score, "reasons": reasons, "phone_match": phone_match, "email_match": email_match, "name_score": name_score, } def load_accounts(include_main=True): accounts = sync_engine.parse_accounts_csv() if include_main: return accounts return [account for account in accounts if account.get("type") == "branch"] def select_accounts(location_id=None, all_locations=False, include_main=False): if location_id: matches = [account for account in load_accounts(include_main=True) if account["location_id"] == location_id] if not matches: raise SystemExit(f"Location {location_id} no existe en el CSV de mesa de control") return matches if all_locations: return load_accounts(include_main=include_main) return load_accounts(include_main=include_main) class SchemaResolver: def __init__(self): self._schemas = {} self._fields = {} def get_schema(self, token, location_id, object_key): cache_key = (location_id, object_key) if cache_key not in self._schemas: self._schemas[cache_key] = sync_engine.ghl_client.get_object_schema(token, location_id, object_key) return self._schemas[cache_key] def get_fields(self, token, location_id, object_key): # Raw fields list — preserva duplicados (mismo name, IDs distintos). cache_key = (location_id, object_key) if cache_key not in self._fields: self._fields[cache_key] = sync_engine.ghl_client.get_object_schema_fields(token, location_id, object_key) return self._fields[cache_key] def get_field_id(self, token, location_id, object_key, field_name_or_alias): # Compat: devuelve un solo ID (el primero de los matches). ids = self.get_field_ids(token, location_id, object_key, field_name_or_alias) return ids[0] if ids else None def get_field_ids(self, token, location_id, object_key, field_name_or_alias): """Devuelve TODOS los field IDs del schema que matcheen con el alias o con el nombre dado. Resuelve duplicados intra-location (mismo name, IDs distintos) y normaliza mayúsculas/acentos/espacios. """ candidate_names = FIELD_ALIASES.get(field_name_or_alias, [field_name_or_alias]) target_norm = {normalize_name(name) for name in candidate_names} fields = self.get_fields(token, location_id, object_key) matches = [] for field in fields: fname = field.get("name") fid = field.get("id") if not fname or not fid: continue if normalize_name(fname) in target_norm: matches.append(fid) return matches def resolve_required(self, token, location_id, object_key, field_names_or_aliases): resolved = {} missing = [] for field_name in field_names_or_aliases: field_id = self.get_field_id(token, location_id, object_key, field_name) if field_id: resolved[field_name] = field_id else: missing.append(field_name) return resolved, missing def parse_custom_fields(value): if not value: return [] if isinstance(value, list): return value try: parsed = json.loads(value) except Exception: return [] return parsed if isinstance(parsed, list) else [] def get_custom_field_value(record_or_json, field_id): # GHL devuelve customFields con keys distintas según el objeto: # contacts → "value" # opportunities → "fieldValue" # Algunos endpoints históricos también devuelven "fieldValueString". if not field_id: return None if isinstance(record_or_json, dict): custom_fields = record_or_json.get("customFields") or record_or_json.get("custom_fields") or [] else: custom_fields = parse_custom_fields(record_or_json) for field in custom_fields or []: if field.get("id") != field_id and field.get("fieldId") != field_id: continue for key in ("value", "fieldValue", "fieldValueString"): val = field.get(key) if val is not None: return val return None return None def custom_fields_by_name(record_or_json, id_to_name): result = {} custom_fields = record_or_json.get("customFields", []) if isinstance(record_or_json, dict) else parse_custom_fields(record_or_json) for field in custom_fields or []: field_id = field.get("id") or field.get("fieldId") field_name = id_to_name.get(field_id) if field_name: result[field_name] = field.get("value") return result def build_custom_fields_payload(field_values_by_name, schema): payload = [] for field_name, value in field_values_by_name.items(): field_id = schema.get(field_name) if field_id: payload.append({"id": field_id, "value": value}) return payload