Files
MP-Manager/scripts/audit_custom_fields_full_from_db.py
T
2026-05-30 14:31:19 -06:00

535 lines
21 KiB
Python

#!/usr/bin/env python3
"""Auditoría completa de custom fields usando SQLite (rápida, sin hits a API).
Verifica que las 49 sucursales tengan EXACTAMENTE la misma configuración que
Marca en sus custom fields (contact + opportunity). Lee únicamente la tabla
`object_schemas` de SQLite — esa tabla se mantiene fresca vía
`sync_account_metadata` en sync_engine. Si dudas de la frescura, corre antes:
python -c "import sync_engine; \\
[sync_engine.sync_account_metadata(a['location_id'], a['token']) \\
for a in sync_engine.parse_accounts_csv()]"
Categorías de hallazgos por sucursal (vs Marca):
MISSING_IN_BRANCH — campo existe en Marca y no en sucursal.
EXTRA_IN_BRANCH — campo existe en sucursal y no en Marca.
NAME_MISMATCH — mismo fieldKey, distinto display name.
DATATYPE_MISMATCH — mismo fieldKey, distinto dataType.
OPTIONS_MISMATCH — mismo fieldKey, opciones distintas (orden o valores).
POSITION_MISMATCH — mismo fieldKey, posición distinta en UI (cosmético).
También emite hallazgos cross-object dentro de cada cuenta:
XOBJ_DATATYPE_MISMATCH — mismo nombre normalizado, dataTypes distintos
entre contact y opportunity.
XOBJ_NAME_CASE — mismo nombre normalizado, casing distinto.
XOBJ_OPTIONS_DIFFER — opciones distintas entre el field homólogo.
Output:
- JSON ejecutivo + XLSX en generated/reports/audit_custom_fields/.
- Resumen por stdout: cuentas afectadas por categoría, lista de hallazgos.
Filtros excluidos automáticamente (decisiones de negocio confirmadas):
- `contact.Sucursal` SINGLE_OPTIONS vs `opportunity.Sucursal` TEXT
es divergencia intencional. El audit lo registra como `IGNORED_BY_RULE`
en lugar de DATATYPE_MISMATCH.
"""
import argparse
import datetime
import json
import os
import sqlite3
import sys
import unicodedata
from collections import defaultdict
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT_DIR not in sys.path:
sys.path.insert(0, ROOT_DIR)
from paths import DB_PATH, REPORT_AUDIT_CUSTOM_FIELDS # noqa: E402
BRAND_LOCATION_ID = "GbKkBpCmKu2QmloKFHy3"
# Reglas de negocio explícitas — divergencias confirmadas como intencionales
# por el owner del proyecto. (campo_normalizado, "object_a", "object_b")
INTENTIONAL_DIVERGENCES = {
("sucursal", "contact", "opportunity"),
}
def normalize_name(value):
s = str(value or "").strip().lower()
s = unicodedata.normalize("NFKD", s)
s = "".join(c for c in s if not unicodedata.combining(c))
return " ".join(s.split())
def extract_options(raw_json):
"""Devuelve [labels] desde el raw_json del schema."""
try:
data = json.loads(raw_json or "{}")
except Exception:
return []
opts = data.get("options")
if isinstance(opts, list):
out = []
for o in opts:
if isinstance(o, dict):
lbl = o.get("label") or o.get("key")
if lbl is not None: out.append(lbl)
elif o is not None:
out.append(str(o))
return out
legacy = data.get("picklistOptions")
if isinstance(legacy, list):
return [str(o) for o in legacy if o is not None]
return []
def field_position(raw_json):
try:
return json.loads(raw_json or "{}").get("position")
except Exception:
return None
def load_schemas(conn):
"""Devuelve dict[(location_id, object_key)] -> list[field dict].
Solo custom fields (excluye STANDARD_FIELD).
"""
cur = conn.cursor()
rows = cur.execute("""
SELECT location_id, object_key, field_id, field_name, field_key,
field_type, raw_json
FROM object_schemas
WHERE field_type != 'STANDARD_FIELD'
ORDER BY location_id, object_key, field_name
""").fetchall()
out = defaultdict(list)
for r in rows:
out[(r["location_id"], r["object_key"])].append({
"id": r["field_id"],
"name": r["field_name"],
"fieldKey": r["field_key"],
"dataType": r["field_type"],
"options": extract_options(r["raw_json"]),
"position": field_position(r["raw_json"]),
})
return out
def load_accounts(conn):
cur = conn.cursor()
rows = cur.execute("SELECT location_id, nombre, type FROM accounts ORDER BY nombre").fetchall()
return [{"location_id": r["location_id"], "nombre": r["nombre"], "type": r["type"]} for r in rows]
# ───────────────────────────────────────────────────────────────────────────
# Comparaciones
# ───────────────────────────────────────────────────────────────────────────
def compare_branch_to_brand(brand_fields, branch_fields, object_key):
"""Genera findings por una sucursal vs Marca para un object_key dado."""
findings = []
brand_by_fk = {f["fieldKey"]: f for f in brand_fields if f.get("fieldKey")}
branch_by_fk = {f["fieldKey"]: f for f in branch_fields if f.get("fieldKey")}
for fk, bf in brand_by_fk.items():
if fk not in branch_by_fk:
# Probar match por nombre normalizado (por si el fieldKey divergió)
target_norm = normalize_name(bf.get("name"))
match = next((f for f in branch_fields
if normalize_name(f.get("name")) == target_norm), None)
if not match:
findings.append({
"category": "MISSING_IN_BRANCH",
"object": object_key,
"field_name": bf.get("name"),
"fieldKey": fk,
"dataType": bf.get("dataType"),
})
continue
findings.append({
"category": "FIELDKEY_DIVERGED",
"object": object_key,
"field_name": bf.get("name"),
"brand_fieldKey": fk,
"branch_fieldKey": match.get("fieldKey"),
})
# Igual comparar atributos
brf = match
else:
brf = branch_by_fk[fk]
if bf.get("name") != brf.get("name"):
findings.append({
"category": "NAME_MISMATCH",
"object": object_key,
"field_name": bf.get("name"),
"fieldKey": fk,
"brand_name": bf.get("name"),
"branch_name": brf.get("name"),
})
if bf.get("dataType") != brf.get("dataType"):
findings.append({
"category": "DATATYPE_MISMATCH",
"object": object_key,
"field_name": bf.get("name"),
"fieldKey": fk,
"brand_dataType": bf.get("dataType"),
"branch_dataType": brf.get("dataType"),
})
if list(bf.get("options") or []) != list(brf.get("options") or []):
findings.append({
"category": "OPTIONS_MISMATCH",
"object": object_key,
"field_name": bf.get("name"),
"fieldKey": fk,
"brand_options": bf.get("options") or [],
"branch_options": brf.get("options") or [],
})
# Position diff es cosmético — lo marcamos aparte
if bf.get("position") is not None and brf.get("position") is not None \
and bf["position"] != brf["position"]:
findings.append({
"category": "POSITION_MISMATCH",
"object": object_key,
"field_name": bf.get("name"),
"fieldKey": fk,
"brand_position": bf.get("position"),
"branch_position": brf.get("position"),
})
# Campos extra en branch (que no están en Marca por fieldKey ni por nombre)
brand_norms = {normalize_name(f.get("name")) for f in brand_fields}
for f in branch_fields:
fk = f.get("fieldKey")
if fk in brand_by_fk:
continue
if normalize_name(f.get("name")) in brand_norms:
continue
findings.append({
"category": "EXTRA_IN_BRANCH",
"object": object_key,
"field_name": f.get("name"),
"fieldKey": fk,
"dataType": f.get("dataType"),
})
return findings
def compare_cross_object(contact_fields, opp_fields, account_name):
"""Findings cross-object dentro de una cuenta."""
findings = []
contact_by_norm = defaultdict(list)
opp_by_norm = defaultdict(list)
for f in contact_fields:
contact_by_norm[normalize_name(f.get("name"))].append(f)
for f in opp_fields:
opp_by_norm[normalize_name(f.get("name"))].append(f)
for norm, cs in contact_by_norm.items():
if norm not in opp_by_norm:
continue
for c in cs:
for o in opp_by_norm[norm]:
# Excluir divergencias intencionales
if (norm, "contact", "opportunity") in INTENTIONAL_DIVERGENCES:
if c.get("dataType") != o.get("dataType"):
findings.append({
"category": "IGNORED_BY_RULE",
"field_norm": norm,
"rule": "sucursal_datatype_intentional",
"contact_name": c.get("name"),
"opportunity_name": o.get("name"),
"contact_dataType": c.get("dataType"),
"opportunity_dataType": o.get("dataType"),
})
continue
if c.get("name") != o.get("name"):
findings.append({
"category": "XOBJ_NAME_CASE",
"field_norm": norm,
"contact_name": c.get("name"),
"opportunity_name": o.get("name"),
})
if c.get("dataType") != o.get("dataType"):
findings.append({
"category": "XOBJ_DATATYPE_MISMATCH",
"field_norm": norm,
"contact_name": c.get("name"),
"opportunity_name": o.get("name"),
"contact_dataType": c.get("dataType"),
"opportunity_dataType": o.get("dataType"),
})
c_opts = list(c.get("options") or [])
o_opts = list(o.get("options") or [])
if c_opts and o_opts and c_opts != o_opts:
findings.append({
"category": "XOBJ_OPTIONS_DIFFER",
"field_norm": norm,
"contact_name": c.get("name"),
"opportunity_name": o.get("name"),
"contact_options": c_opts,
"opportunity_options": o_opts,
})
return findings
# ───────────────────────────────────────────────────────────────────────────
# Reporte
# ───────────────────────────────────────────────────────────────────────────
def write_xlsx(report, path):
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment
wb = openpyxl.Workbook()
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill("solid", fgColor="305496")
crit_fill = PatternFill("solid", fgColor="F8CBAD")
warn_fill = PatternFill("solid", fgColor="FFE699")
cosm_fill = PatternFill("solid", fgColor="DDEBF7")
info_fill = PatternFill("solid", fgColor="E2EFDA")
def hdr(ws, cols):
ws.append(cols)
for cell in ws[1]:
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal="center")
ws.freeze_panes = "A2"
severity_map = {
"MISSING_IN_BRANCH": ("CRITICAL", crit_fill),
"EXTRA_IN_BRANCH": ("CRITICAL", crit_fill),
"DATATYPE_MISMATCH": ("CRITICAL", crit_fill),
"FIELDKEY_DIVERGED": ("CRITICAL", crit_fill),
"OPTIONS_MISMATCH": ("WARNING", warn_fill),
"NAME_MISMATCH": ("WARNING", warn_fill),
"XOBJ_DATATYPE_MISMATCH": ("CRITICAL", crit_fill),
"XOBJ_NAME_CASE": ("WARNING", warn_fill),
"XOBJ_OPTIONS_DIFFER": ("WARNING", warn_fill),
"POSITION_MISMATCH": ("COSMETIC", cosm_fill),
"IGNORED_BY_RULE": ("INFO", info_fill),
}
# Hoja 1: Resumen ejecutivo
ws = wb.active
ws.title = "Resumen"
hdr(ws, ["Categoría", "Severidad", "Sucursales afectadas", "Total hallazgos"])
per_cat = defaultdict(lambda: {"branches": set(), "count": 0})
for loc_id, loc in report["branches"].items():
for f in loc["findings"]:
per_cat[f["category"]]["branches"].add(loc_id)
per_cat[f["category"]]["count"] += 1
for cat, agg in sorted(per_cat.items()):
sev, fill = severity_map.get(cat, ("?", None))
ws.append([cat, sev, len(agg["branches"]), agg["count"]])
if fill:
for cell in ws[ws.max_row]:
cell.fill = fill
ws.column_dimensions["A"].width = 32
ws.column_dimensions["C"].width = 22
ws.column_dimensions["D"].width = 22
# Hoja 2: Marca vs Sucursales (campo por campo)
ws2 = wb.create_sheet("Marca vs Sucursales")
hdr(ws2, [
"Sucursal", "Location ID", "Objeto", "Categoría", "Severidad",
"Campo", "fieldKey", "Detalle",
])
for loc_id, loc in report["branches"].items():
for f in loc["findings"]:
cat = f["category"]
if cat.startswith("XOBJ_") or cat == "IGNORED_BY_RULE":
continue
sev, fill = severity_map.get(cat, ("?", None))
detail_bits = []
for k in ("brand_name","branch_name","brand_dataType","branch_dataType",
"brand_options","branch_options","brand_position","branch_position",
"brand_fieldKey","branch_fieldKey"):
if k in f:
v = f[k]
if isinstance(v, list):
v = "; ".join(map(str, v))
detail_bits.append(f"{k}={v}")
ws2.append([
loc["name"], loc_id, f.get("object",""), cat, sev,
f.get("field_name",""), f.get("fieldKey","") or f.get("brand_fieldKey",""),
" | ".join(detail_bits),
])
if fill:
for cell in ws2[ws2.max_row]:
cell.fill = fill
for col, w in [("A", 32), ("D", 26), ("F", 30), ("G", 36), ("H", 80)]:
ws2.column_dimensions[col].width = w
# Hoja 3: Cross-object por cuenta
ws3 = wb.create_sheet("Cross-object")
hdr(ws3, [
"Cuenta", "Location ID", "Categoría", "Severidad", "Campo (norm)",
"Contact name", "Opportunity name", "Detalle",
])
for loc_id, loc in report["branches"].items():
for f in loc["findings"]:
cat = f["category"]
if not (cat.startswith("XOBJ_") or cat == "IGNORED_BY_RULE"):
continue
sev, fill = severity_map.get(cat, ("?", None))
detail_bits = []
for k in ("contact_dataType","opportunity_dataType",
"contact_options","opportunity_options","rule"):
if k in f:
v = f[k]
if isinstance(v, list):
v = "; ".join(map(str, v))
detail_bits.append(f"{k}={v}")
ws3.append([
loc["name"], loc_id, cat, sev,
f.get("field_norm",""),
f.get("contact_name",""), f.get("opportunity_name",""),
" | ".join(detail_bits),
])
if fill:
for cell in ws3[ws3.max_row]:
cell.fill = fill
for col, w in [("A", 32), ("C", 26), ("E", 22), ("F", 30), ("G", 30), ("H", 80)]:
ws3.column_dimensions[col].width = w
# Hoja 4: Inventario completo (Marca)
ws4 = wb.create_sheet("Inventario Marca")
hdr(ws4, ["Objeto", "Posición", "Campo", "fieldKey", "dataType", "Opciones"])
for obj, fields in [("contact", report["brand_inventory"]["contact"]),
("opportunity", report["brand_inventory"]["opportunity"])]:
for f in sorted(fields, key=lambda x: x.get("position") or 9999):
ws4.append([
obj, f.get("position",""), f.get("name",""), f.get("fieldKey",""),
f.get("dataType",""),
"; ".join(map(str, f.get("options") or [])),
])
for col, w in [("C", 30), ("D", 36), ("E", 18), ("F", 80)]:
ws4.column_dimensions[col].width = w
wb.save(path)
def main():
parser = argparse.ArgumentParser(description="Auditoría completa de custom fields desde SQLite.")
parser.add_argument("--json", help="Ruta de salida JSON.")
parser.add_argument("--xlsx", help="Ruta de salida XLSX.")
args = parser.parse_args()
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
schemas = load_schemas(conn)
accounts = load_accounts(conn)
accounts_by_id = {a["location_id"]: a for a in accounts}
brand = accounts_by_id.get(BRAND_LOCATION_ID)
if not brand:
raise SystemExit(f"Marca {BRAND_LOCATION_ID} no encontrada en SQLite.")
brand_contact = schemas.get((BRAND_LOCATION_ID, "contact"), [])
brand_opp = schemas.get((BRAND_LOCATION_ID, "opportunity"), [])
report = {
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"source": f"SQLite ({DB_PATH})",
"brand": {
"location_id": BRAND_LOCATION_ID,
"name": brand["nombre"],
"field_counts": {
"contact": len(brand_contact),
"opportunity": len(brand_opp),
},
},
"brand_inventory": {
"contact": brand_contact,
"opportunity": brand_opp,
},
"branches": {},
}
# Procesar cada cuenta (incluyendo Marca para validar cross-object)
for acc in accounts:
loc_id = acc["location_id"]
c = schemas.get((loc_id, "contact"), [])
o = schemas.get((loc_id, "opportunity"), [])
findings = []
if loc_id != BRAND_LOCATION_ID:
findings.extend(compare_branch_to_brand(brand_contact, c, "contact"))
findings.extend(compare_branch_to_brand(brand_opp, o, "opportunity"))
findings.extend(compare_cross_object(c, o, acc["nombre"]))
report["branches"][loc_id] = {
"name": acc["nombre"],
"type": acc["type"],
"field_counts": {"contact": len(c), "opportunity": len(o)},
"findings": findings,
}
# Stdout: resumen
total_branches = sum(1 for k in report["branches"] if k != BRAND_LOCATION_ID)
print(f"AUDITORÍA COMPLETA — Marca + {total_branches} sucursales\n")
print(f"Inventario Marca: contact={len(brand_contact)} cf, opportunity={len(brand_opp)} cf\n")
# Resumen por categoría (todos los hallazgos)
per_cat_branches = defaultdict(set)
per_cat_count = defaultdict(int)
for loc_id, loc in report["branches"].items():
for f in loc["findings"]:
per_cat_branches[f["category"]].add(loc_id)
per_cat_count[f["category"]] += 1
print("Hallazgos por categoría:")
if not per_cat_count:
print(" (sin hallazgos — schema 100% alineado)")
else:
for cat in sorted(per_cat_count):
print(f" {cat:32} {per_cat_count[cat]:>5} hallazgos {len(per_cat_branches[cat]):>3} cuentas")
# Sucursales 100% limpias (vs Marca + cross-object sin XOBJ críticos)
critical_cats = {
"MISSING_IN_BRANCH","EXTRA_IN_BRANCH","DATATYPE_MISMATCH",
"FIELDKEY_DIVERGED","NAME_MISMATCH","OPTIONS_MISMATCH",
"XOBJ_DATATYPE_MISMATCH","XOBJ_NAME_CASE","XOBJ_OPTIONS_DIFFER",
}
fully_aligned = []
with_issues = []
for loc_id, loc in report["branches"].items():
if loc_id == BRAND_LOCATION_ID: continue
crits = [f for f in loc["findings"] if f["category"] in critical_cats]
(fully_aligned if not crits else with_issues).append((loc["name"], loc_id, len(crits)))
print(f"\nSucursales 100% alineadas (sin hallazgos críticos): {len(fully_aligned)}/{total_branches}")
print(f"Sucursales con hallazgos críticos: {len(with_issues)}")
for n, i, c in sorted(with_issues, key=lambda x: -x[2]):
print(f" {c:>4} hallazgos — {n} ({i})")
# Outputs
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
os.makedirs(REPORT_AUDIT_CUSTOM_FIELDS, exist_ok=True)
json_path = args.json or os.path.join(REPORT_AUDIT_CUSTOM_FIELDS, f"audit_full_db_{ts}.json")
xlsx_path = args.xlsx or os.path.join(REPORT_AUDIT_CUSTOM_FIELDS, f"audit_full_db_{ts}.xlsx")
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(report, fh, ensure_ascii=False, indent=2, default=str)
write_xlsx(report, xlsx_path)
print(f"\nJSON: {json_path}")
print(f"XLSX: {xlsx_path}")
if __name__ == "__main__":
main()