Files
MP-Manager/scripts/audit_custom_fields_cross_object.py
T
2026-05-30 14:31:19 -06:00

524 lines
21 KiB
Python

#!/usr/bin/env python3
"""Audit cross-object: campos personalizados de contact vs opportunity.
Muchos campos están "repetidos" entre los objetos `contact` y `opportunity`
dentro de una misma cuenta (Marca o sucursal). Por ejemplo:
contact."Canal de Origen" ↔ opportunity."CANAL DE ORIGEN"
contact."Fuente de Prospecto" ↔ opportunity."Fuente de Prospecto"
contact."Sucursal" (SINGLE_OPTIONS) ↔ opportunity."Sucursal" (TEXT)
contact."TIENDA" ↔ opportunity."TIENDA"
Como el fieldKey difiere por objeto (`contact.*` vs `opportunity.*`), el match
debe hacerse por nombre normalizado. Para cada par homólogo emitimos hallazgos:
NAME_CASE_MISMATCH — mismo nombre normalizado pero casing distinto
("Canal de Origen" vs "CANAL DE ORIGEN").
DATATYPE_MISMATCH — el mismo concepto se modela como picklist en un
objeto y como texto libre en el otro (riesgo
alto de datos no comparables).
OPTIONS_MISSING_* — opciones presentes en un objeto y ausentes en el
otro (comparación case-sensitive).
OPTIONS_CASE_MISMATCH — la opción existe en ambos lados pero con casing
distinto ("Sucursal" vs "SUCURSAL").
OPTIONS_NOT_UPPERCASE — la opción contiene caracteres en minúscula y el
convenio para este campo es MAYÚSCULAS.
(Solo se emite para campos cuya mayoría de
opciones ya están en mayúsculas — heurística.)
ONLY_IN_CONTACT — el campo existe solo en contact (no se compara).
ONLY_IN_OPPORTUNITY — el campo existe solo en opportunity.
Output: JSON detallado + XLSX para revisión humana en
generated/reports/audit_custom_fields/.
"""
import argparse
import datetime
import json
import os
import sys
import unicodedata
import warnings
from collections import defaultdict
warnings.filterwarnings("ignore", message=r"urllib3 .* doesn't match a supported version!")
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT_DIR not in sys.path:
sys.path.insert(0, ROOT_DIR)
import sync_engine # noqa: E402
import script_audit # noqa: E402
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
PICKLIST_TYPES = ("SINGLE_OPTIONS", "MULTIPLE_OPTIONS", "RADIO", "CHECKBOX")
def normalize_name(value):
s = str(value or "").strip().lower()
s = unicodedata.normalize("NFKD", s)
s = "".join(c for c in s if not unicodedata.combining(c))
s = " ".join(s.split())
# Quitar corchetes y signos de interrogación/exclamación para igualar
# variantes tipo "[Correo_Empresa]" y "¿Cuándo necesitas el dinero?".
for ch in "[](){}¿?¡!":
s = s.replace(ch, "")
return " ".join(s.split())
def extract_option_labels(field):
raw = field.get("options")
if isinstance(raw, list) and raw:
labels = []
for opt in raw:
if isinstance(opt, dict):
label = opt.get("label")
if label is None:
label = opt.get("key")
if label is not None:
labels.append(label)
elif opt is not None:
labels.append(str(opt))
return labels
legacy = field.get("picklistOptions")
if isinstance(legacy, list):
return [str(o) for o in legacy if o is not None]
return []
def fetch_custom_fields(account, object_key):
raw = sync_engine.ghl_client.get_object_schema_fields(
account["token"], account["location_id"], object_key
)
return [
f for f in (raw or [])
if f.get("id") and f.get("name")
and not f.get("standard")
and f.get("dataType") != "STANDARD_FIELD"
]
def is_mostly_uppercase(labels):
"""¿La mayoría de las opciones están en MAYÚSCULAS?
Solo cuentan letras (ignoramos números, puntuación, espacios). Si ≥ 60%
de las opciones que tienen alguna letra están totalmente en mayúsculas,
asumimos que el convenio del campo es MAYÚSCULAS.
"""
candidates = [l for l in labels if any(c.isalpha() for c in str(l))]
if not candidates:
return False
upper_count = sum(
1 for l in candidates
if str(l) == str(l).upper()
)
return upper_count / len(candidates) >= 0.60
def label_is_uppercase(label):
s = str(label)
if not any(c.isalpha() for c in s):
return True
return s == s.upper()
def compare_options(contact_labels, opp_labels, field_name, uppercase_convention):
"""Devuelve lista de findings comparando dos sets de labels.
- OPTIONS_MISSING_IN_OPPORTUNITY / OPTIONS_MISSING_IN_CONTACT: presentes
en un lado y ausentes (case-sensitive) en el otro.
- OPTIONS_CASE_MISMATCH: misma label normalizada (lower) pero textos
distintos. Si solo difieren en case lo destacamos como case_only=True.
- OPTIONS_NOT_UPPERCASE: si el campo tiene convenio MAYÚSCULAS, listar las
opciones que rompen el convenio en ambos lados.
"""
findings = []
contact_set = set(contact_labels)
opp_set = set(opp_labels)
contact_by_norm = defaultdict(list)
opp_by_norm = defaultdict(list)
for l in contact_labels:
contact_by_norm[str(l).strip().lower()].append(l)
for l in opp_labels:
opp_by_norm[str(l).strip().lower()].append(l)
# Diferencias case-sensitive normales (texto totalmente distinto)
truly_missing_in_opp = [
l for l in contact_labels
if str(l).strip().lower() not in opp_by_norm
]
truly_missing_in_contact = [
l for l in opp_labels
if str(l).strip().lower() not in contact_by_norm
]
# Casos donde existe el "mismo" texto en ambos pero el casing difiere
case_mismatches = []
for norm, contact_variants in contact_by_norm.items():
if norm not in opp_by_norm:
continue
opp_variants = opp_by_norm[norm]
# Si las labels exactas no aparecen en el otro lado, es case mismatch
contact_exact = set(contact_variants)
opp_exact = set(opp_variants)
if contact_exact != opp_exact:
case_mismatches.append({
"normalized": norm,
"contact_variants": sorted(contact_exact),
"opportunity_variants": sorted(opp_exact),
"case_only": all(
v.lower() == norm for v in contact_exact | opp_exact
),
})
if truly_missing_in_opp:
findings.append({
"category": "OPTIONS_MISSING_IN_OPPORTUNITY",
"field_name": field_name,
"labels": truly_missing_in_opp,
"details": (
f"{len(truly_missing_in_opp)} opción(es) existen en contact "
"y NO existen (ni en otro casing) en opportunity."
),
})
if truly_missing_in_contact:
findings.append({
"category": "OPTIONS_MISSING_IN_CONTACT",
"field_name": field_name,
"labels": truly_missing_in_contact,
"details": (
f"{len(truly_missing_in_contact)} opción(es) existen en opportunity "
"y NO existen (ni en otro casing) en contact."
),
})
if case_mismatches:
findings.append({
"category": "OPTIONS_CASE_MISMATCH",
"field_name": field_name,
"mismatches": case_mismatches,
"details": (
f"{len(case_mismatches)} opción(es) presentes en ambos objetos "
"con casing/texto distinto."
),
})
if uppercase_convention:
offenders_contact = [l for l in contact_labels if not label_is_uppercase(l)]
offenders_opp = [l for l in opp_labels if not label_is_uppercase(l)]
if offenders_contact or offenders_opp:
findings.append({
"category": "OPTIONS_NOT_UPPERCASE",
"field_name": field_name,
"contact_offenders": offenders_contact,
"opportunity_offenders": offenders_opp,
"details": (
"El campo aparenta usar convenio MAYÚSCULAS pero contiene "
"opciones en minúsculas/mixed-case."
),
})
return findings
def audit_location(location_name, location_id, contact_fields, opportunity_fields):
"""Compara contact↔opportunity dentro de una location."""
findings = []
contact_by_norm = defaultdict(list)
opp_by_norm = defaultdict(list)
for f in contact_fields:
contact_by_norm[normalize_name(f.get("name"))].append(f)
for f in opportunity_fields:
opp_by_norm[normalize_name(f.get("name"))].append(f)
all_norm_names = set(contact_by_norm) | set(opp_by_norm)
for norm in sorted(all_norm_names):
cs = contact_by_norm.get(norm, [])
os_ = opp_by_norm.get(norm, [])
if cs and not os_:
# No es necesariamente un problema — muchos campos solo aplican a uno
# de los dos objetos. Lo reportamos en "informativo" pero sin marca crítica.
findings.append({
"category": "ONLY_IN_CONTACT",
"normalized_name": norm,
"contact_name": cs[0].get("name"),
"contact_dataType": cs[0].get("dataType"),
"details": "Campo presente solo en contact; sin contraparte en opportunity.",
})
continue
if os_ and not cs:
findings.append({
"category": "ONLY_IN_OPPORTUNITY",
"normalized_name": norm,
"opportunity_name": os_[0].get("name"),
"opportunity_dataType": os_[0].get("dataType"),
"details": "Campo presente solo en opportunity; sin contraparte en contact.",
})
continue
# Hay match. Como puede haber duplicados intra-objeto, comparamos pares.
# En la práctica esperamos len==1 en ambos lados.
for c in cs:
for o in os_:
base = {
"normalized_name": norm,
"contact_field_name": c.get("name"),
"opportunity_field_name": o.get("name"),
"contact_field_id": c.get("id"),
"opportunity_field_id": o.get("id"),
"contact_fieldKey": c.get("fieldKey"),
"opportunity_fieldKey": o.get("fieldKey"),
}
# Casing del nombre del field
if c.get("name") != o.get("name"):
findings.append({
**base,
"category": "NAME_CASE_MISMATCH",
"details": (
f"Nombre del campo difiere entre objetos: "
f"contact={c.get('name')!r}, opportunity={o.get('name')!r}."
),
})
# dataType
if c.get("dataType") != o.get("dataType"):
findings.append({
**base,
"category": "DATATYPE_MISMATCH",
"contact_dataType": c.get("dataType"),
"opportunity_dataType": o.get("dataType"),
"details": (
f"dataType distinto: contact={c.get('dataType')}, "
f"opportunity={o.get('dataType')}. El mismo concepto "
"se está modelando con tipos distintos."
),
})
# Opciones: solo si ambos tienen tipo picklist
if (
c.get("dataType") in PICKLIST_TYPES
and o.get("dataType") in PICKLIST_TYPES
):
c_labels = extract_option_labels(c)
o_labels = extract_option_labels(o)
if c_labels or o_labels:
# convenio MAYÚSCULAS si la mayoría en cualquiera de los
# dos ya está en uppercase
uppercase = (
is_mostly_uppercase(c_labels)
or is_mostly_uppercase(o_labels)
)
for f in compare_options(c_labels, o_labels, c.get("name"), uppercase):
findings.append({**base, **f})
return findings
def write_json(report, path):
with open(path, "w", encoding="utf-8") as fh:
json.dump(report, fh, ensure_ascii=False, indent=2)
def write_xlsx(report, path):
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment
wb = openpyxl.Workbook()
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill("solid", fgColor="305496")
severity_fills = {
"DATATYPE_MISMATCH": PatternFill("solid", fgColor="F8CBAD"),
"OPTIONS_MISSING_IN_OPPORTUNITY": PatternFill("solid", fgColor="FFE699"),
"OPTIONS_MISSING_IN_CONTACT": PatternFill("solid", fgColor="FFE699"),
"OPTIONS_CASE_MISMATCH": PatternFill("solid", fgColor="FFD966"),
"OPTIONS_NOT_UPPERCASE": PatternFill("solid", fgColor="C6E0B4"),
"NAME_CASE_MISMATCH": PatternFill("solid", fgColor="BDD7EE"),
}
def write_header(ws, headers):
ws.append(headers)
for cell in ws[1]:
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal="center")
ws.freeze_panes = "A2"
# Sheet 1: Resumen
ws_sum = wb.active
ws_sum.title = "Resumen"
write_header(ws_sum, [
"Cuenta", "Location ID", "Total hallazgos",
"Name case", "DataType", "Opts missing→opp", "Opts missing→contact",
"Opts case", "Opts not UPPER",
"Solo en contact", "Solo en opportunity",
])
for loc_id, loc_data in report["locations"].items():
findings = loc_data["findings"]
ws_sum.append([
loc_data["name"], loc_id, len(findings),
sum(1 for f in findings if f["category"] == "NAME_CASE_MISMATCH"),
sum(1 for f in findings if f["category"] == "DATATYPE_MISMATCH"),
sum(1 for f in findings if f["category"] == "OPTIONS_MISSING_IN_OPPORTUNITY"),
sum(1 for f in findings if f["category"] == "OPTIONS_MISSING_IN_CONTACT"),
sum(1 for f in findings if f["category"] == "OPTIONS_CASE_MISMATCH"),
sum(1 for f in findings if f["category"] == "OPTIONS_NOT_UPPERCASE"),
sum(1 for f in findings if f["category"] == "ONLY_IN_CONTACT"),
sum(1 for f in findings if f["category"] == "ONLY_IN_OPPORTUNITY"),
])
for col_letter in ["A", "B"]:
ws_sum.column_dimensions[col_letter].width = 32
# Sheet 2: Hallazgos críticos (excluye ONLY_IN_*)
ws_f = wb.create_sheet("Hallazgos")
write_header(ws_f, [
"Cuenta", "Location ID", "Categoría", "Campo (normalizado)",
"Nombre en contact", "Nombre en opportunity",
"dataType contact", "dataType opp",
"Labels / detalles",
])
critical = {
"NAME_CASE_MISMATCH", "DATATYPE_MISMATCH",
"OPTIONS_MISSING_IN_OPPORTUNITY", "OPTIONS_MISSING_IN_CONTACT",
"OPTIONS_CASE_MISMATCH", "OPTIONS_NOT_UPPERCASE",
}
for loc_id, loc_data in report["locations"].items():
for f in loc_data["findings"]:
if f["category"] not in critical:
continue
payload_bits = []
if f.get("labels"):
payload_bits.append("labels: " + "; ".join(map(str, f["labels"])))
if f.get("mismatches"):
for m in f["mismatches"]:
payload_bits.append(
f" {m['contact_variants']}{m['opportunity_variants']}"
+ (" (case-only)" if m.get("case_only") else "")
)
if f.get("contact_offenders") or f.get("opportunity_offenders"):
payload_bits.append("contact: " + "; ".join(map(str, f.get("contact_offenders") or [])))
payload_bits.append("opportunity: " + "; ".join(map(str, f.get("opportunity_offenders") or [])))
payload_bits.append(f.get("details", ""))
row = [
loc_data["name"], loc_id, f["category"],
f.get("normalized_name", ""),
f.get("contact_field_name", ""),
f.get("opportunity_field_name", ""),
f.get("contact_dataType", ""),
f.get("opportunity_dataType", ""),
" | ".join(b for b in payload_bits if b),
]
ws_f.append(row)
fill = severity_fills.get(f["category"])
if fill:
for cell in ws_f[ws_f.max_row]:
cell.fill = fill
for col_letter, width in [("A", 32), ("C", 28), ("D", 28), ("E", 30), ("F", 30), ("I", 80)]:
ws_f.column_dimensions[col_letter].width = width
# Sheet 3: Solo en uno (informativo)
ws_o = wb.create_sheet("Solo en un objeto")
write_header(ws_o, ["Cuenta", "Location ID", "Categoría", "Nombre", "dataType"])
for loc_id, loc_data in report["locations"].items():
for f in loc_data["findings"]:
if f["category"] in ("ONLY_IN_CONTACT", "ONLY_IN_OPPORTUNITY"):
ws_o.append([
loc_data["name"], loc_id, f["category"],
f.get("contact_name") or f.get("opportunity_name"),
f.get("contact_dataType") or f.get("opportunity_dataType"),
])
for col_letter, width in [("A", 32), ("D", 36)]:
ws_o.column_dimensions[col_letter].width = width
wb.save(path)
def select_targets(args, accounts):
if args.location:
matches = [a for a in accounts if a["location_id"] == args.location]
if not matches:
raise SystemExit(f"Location {args.location} no encontrada en el CSV.")
return matches
if args.all:
return accounts
if args.brand_only:
return [a for a in accounts if a["location_id"] == BRAND_LOCATION_ID]
raise SystemExit("Especifica --location <id>, --brand-only o --all.")
def main():
parser = argparse.ArgumentParser(
description="Audit cross-object: contact↔opportunity custom fields dentro de cada cuenta.",
)
parser.add_argument("--location", help="Procesar solo esta location (debugging).")
parser.add_argument("--brand-only", action="store_true",
help="Procesar solo la Marca Principal.")
parser.add_argument("--all", action="store_true",
help="Procesar Marca + todas las sucursales.")
parser.add_argument("--json", help="Ruta de salida JSON (opcional).")
parser.add_argument("--xlsx", help="Ruta de salida XLSX (opcional).")
parser.add_argument("--run-id", help="Audit run ID (dashboard).")
args = parser.parse_args()
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
accounts = sync_engine.parse_accounts_csv()
targets = select_targets(args, accounts)
report = {
"run_id": args.run_id,
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"locations": {},
}
for acc in targets:
if not script_audit.wait_if_paused_or_stopped(args.run_id):
print("\nDetención solicitada. Saliendo.")
break
loc_id = acc["location_id"]
loc_name = acc["nombre"]
print(f"\n[LOCATION] {loc_name} ({loc_id})")
try:
c_fields = fetch_custom_fields(acc, "contact")
o_fields = fetch_custom_fields(acc, "opportunity")
print(f" contact={len(c_fields)} cf, opportunity={len(o_fields)} cf")
findings = audit_location(loc_name, loc_id, c_fields, o_fields)
except Exception as exc:
print(f" ERROR: {exc}")
findings = [{"category": "FETCH_ERROR", "details": str(exc)}]
report["locations"][loc_id] = {"name": loc_name, "findings": findings}
cats = defaultdict(int)
for f in findings:
cats[f["category"]] += 1
if cats:
for cat, count in sorted(cats.items()):
print(f" {cat}: {count}")
else:
print(" sin discrepancias")
from common import REPORT_AUDIT_CUSTOM_FIELDS
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
json_path = args.json or os.path.join(REPORT_AUDIT_CUSTOM_FIELDS, f"audit_cross_object_{ts}.json")
xlsx_path = args.xlsx or os.path.join(REPORT_AUDIT_CUSTOM_FIELDS, f"audit_cross_object_{ts}.xlsx")
write_json(report, json_path)
write_xlsx(report, xlsx_path)
total = sum(len(loc["findings"]) for loc in report["locations"].values())
critical_total = sum(
1 for loc in report["locations"].values() for f in loc["findings"]
if f["category"] not in ("ONLY_IN_CONTACT", "ONLY_IN_OPPORTUNITY")
)
print(f"\n{'=' * 60}")
print(f"Total hallazgos: {total} ({critical_total} críticos) en {len(report['locations'])} cuentas")
print(f"JSON: {json_path}")
print(f"XLSX: {xlsx_path}")
if __name__ == "__main__":
main()