Files
MP-Manager/sync_engine.py
T
2026-05-30 14:31:19 -06:00

489 lines
19 KiB
Python

import os
import csv
import pandas as pd
import concurrent.futures
from datetime import datetime
from ghl_client import GHLClient
import db
import error_logging
CSV_FILENAME = "Bucéfalo - Mesa de control - API Tokens - MP.csv"
CSV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), CSV_FILENAME)
# Default: sin tope artificial. El rate limit de GHL es por location, asi que
# paralelizar todas las cuentas no genera 429. El consumo de hilos/CPU es bajo
# porque casi todo el tiempo es I/O. La env var SYNC_ENGINE_MAX_WORKERS sigue
# disponible si en algun momento se quiere acotar a proposito.
DEFAULT_SYNC_MAX_WORKERS = 1000
ghl_client = GHLClient()
_location_name_cache = {}
# ============================================================================
# REFRESH HELPERS — single source of truth post-mutación
# ============================================================================
# Tras cualquier endpoint mutante (create/update/delete opp/contact) hay que
# garantizar que SQLite refleje la verdad de Bucéfalo. Estos helpers hacen
# fetch live del objeto recién tocado y lo upsertan localmente. Si falla, lo
# reportan en lugar de tragar el error.
def refresh_opportunity_in_db(token, opp_id, location_id):
"""GET /opportunities/{id} → save_single_opportunity. Devuelve dict con
{ok, opportunity, error}. NO levanta excepciones — el caller decide.
Si la opp ya no existe (404), borra la fila local."""
try:
res = ghl_client.get_opportunity(token, opp_id)
except Exception as exc:
# Si GHL devuelve 404, eliminar localmente para mantener consistencia.
status = None
response = getattr(exc, "response", None)
if response is not None:
try: status = response.status_code
except Exception: status = None
if status == 404:
try:
conn = db.get_db_connection()
try:
conn.execute("DELETE FROM opportunities WHERE id=? AND location_id=?",
(opp_id, location_id))
conn.commit()
finally:
conn.close()
return {"ok": True, "deleted_locally": True, "opportunity": None}
except Exception as db_exc:
return {"ok": False, "error": f"GHL 404 + cleanup local fallo: {db_exc}"}
return {"ok": False, "error": f"get_opportunity fallo: {exc}"}
opp = (res or {}).get("opportunity") or res or {}
if not opp.get("id"):
return {"ok": False, "error": f"Respuesta GHL sin id: {res}"}
try:
db.save_single_opportunity(location_id, opp)
except Exception as exc:
return {"ok": False, "error": f"save_single_opportunity fallo: {exc}", "opportunity": opp}
return {"ok": True, "opportunity": opp}
def refresh_contact_in_db(token, contact_id, location_id):
"""GET /contacts/{id} → save_single_contact. Similar a refresh_opportunity_in_db.
Si el contacto fue eliminado en Bucéfalo (404), también borra opps en cascade."""
try:
res = ghl_client._request("GET", f"/contacts/{contact_id}", token)
except Exception as exc:
status = None
response = getattr(exc, "response", None)
if response is not None:
try: status = response.status_code
except Exception: status = None
if status == 404:
try:
conn = db.get_db_connection()
try:
conn.execute("DELETE FROM opportunities WHERE contact_id=? AND location_id=?",
(contact_id, location_id))
conn.execute("DELETE FROM contacts WHERE id=? AND location_id=?",
(contact_id, location_id))
conn.commit()
finally:
conn.close()
return {"ok": True, "deleted_locally": True, "contact": None}
except Exception as db_exc:
return {"ok": False, "error": f"GHL 404 + cleanup local fallo: {db_exc}"}
return {"ok": False, "error": f"get_contact fallo: {exc}"}
contact = (res or {}).get("contact") or res or {}
if not contact.get("id"):
return {"ok": False, "error": f"Respuesta GHL sin id: {res}"}
try:
db.save_single_contact(location_id, contact)
except Exception as exc:
return {"ok": False, "error": f"save_single_contact fallo: {exc}", "contact": contact}
return {"ok": True, "contact": contact}
def refresh_opps_for_contact(token, contact_id, location_id):
"""Sincroniza TODAS las opps del contacto en una location.
IMPORTANTE: GHL ignora el filtro contactId en POST /opportunities/search
(bug confirmado: devuelve todas las opps de la location). Por eso paginamos
todas las opps y filtramos en Python por contactId real.
Devuelve {ok, refreshed_count, opps_in_ghl, error}."""
matched = []
try:
page = 1
while True:
body = {"locationId": location_id, "limit": 100, "page": page}
res = ghl_client._request("POST", "/opportunities/search", token, json=body)
batch = res.get("opportunities", []) if isinstance(res, dict) else []
if not batch:
break
for opp in batch:
if opp.get("contactId") == contact_id:
matched.append(opp)
if len(batch) < 100:
break
page += 1
if page > 100:
break
except Exception as exc:
return {"ok": False, "error": f"search_opportunities fallo: {exc}", "refreshed_count": 0}
refreshed = 0
for opp in matched:
try:
db.save_single_opportunity(location_id, opp)
refreshed += 1
except Exception as exc:
error_logging.log_error("refresh_opps_for_contact_save_failed", exc, {
"opp_id": opp.get("id"), "location_id": location_id, "contact_id": contact_id,
})
return {"ok": True, "refreshed_count": refreshed, "opps_in_ghl": len(matched)}
def get_sync_max_workers():
raw_value = os.getenv("SYNC_ENGINE_MAX_WORKERS", str(DEFAULT_SYNC_MAX_WORKERS))
try:
workers = int(raw_value)
except (TypeError, ValueError):
workers = DEFAULT_SYNC_MAX_WORKERS
return max(1, workers)
def extract_location_name(location_data):
"""
Extrae un nombre legible de las variantes comunes de respuesta de GHL.
"""
if not isinstance(location_data, dict):
return None
payload = location_data.get("location") if isinstance(location_data.get("location"), dict) else location_data
for key in ("name", "businessName", "companyName"):
value = payload.get(key)
if value and str(value).strip():
return str(value).strip()
business = payload.get("business")
if isinstance(business, dict):
value = business.get("name")
if value and str(value).strip():
return str(value).strip()
return None
def resolve_location_name(location_id, token, fallback=""):
if fallback and fallback.strip():
return fallback.strip()
if location_id in _location_name_cache:
return _location_name_cache[location_id]
try:
data = ghl_client.get_location(token, location_id)
name = extract_location_name(data) or location_id
except Exception as e:
error_id = error_logging.log_error("sync_resolve_location_name_failed", e, {"location_id": location_id})
print(f"Advertencia: no se pudo obtener nombre de location {location_id}: {e} | error_id={error_id}")
name = location_id
_location_name_cache[location_id] = name
return name
# --- CARGADOR DE CSV ---
def parse_accounts_csv():
"""
Parsea el archivo CSV de cuentas de Bucéfalo.
Aplica deduplicación, filtrado y clasificación en 'brand' y 'branch'.
"""
if not os.path.exists(CSV_PATH):
raise FileNotFoundError(f"No se encontró el archivo CSV en la ruta: {CSV_PATH}")
accounts = {}
with open(CSV_PATH, mode='r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
loc_id = row.get('Location_ID', '').strip()
nombre = row.get('Nombre', '').strip()
token = row.get('API_token', '').strip()
whatsapp = row.get('WhatsApp_idWidget', '').strip()
owner = row.get('Company Owner', '').strip()
# Filtrar filas vacías o sin Location_ID
if not loc_id or loc_id == "" or not token:
continue
# Normalizar Company Owner
if not owner or owner == "-" or owner == "":
owner = "Sin asignar"
# Determinar tipo
# Monte Providencia (Marca) es la cuenta principal
acc_type = "brand" if loc_id == "GbKkBpCmKu2QmloKFHy3" else "branch"
nombre = resolve_location_name(loc_id, token, nombre)
accounts[loc_id] = {
"location_id": loc_id,
"nombre": nombre,
"token": token,
"whatsapp_widget": whatsapp,
"company_owner": owner,
"type": acc_type
}
# Devolver como lista. La deduplicación se hace automáticamente al usar un diccionario por loc_id (último gana)
return list(accounts.values())
def get_tokens_map():
"""
Devuelve un mapeo simple de location_id -> token.
"""
accounts = parse_accounts_csv()
return {acc['location_id']: acc['token'] for acc in accounts}
# --- MOTOR DE SINCRONIZACIÓN INDIVIDUAL ---
def sync_account(location_id, token):
"""
Sincroniza contactos, oportunidades y pipelines de una sucursal en SQLite.
"""
log_id = db.create_sync_log(location_id)
print(f"[{datetime.now()}] Iniciando sync para cuenta: {location_id}")
try:
sync_step = "metadata"
sync_account_metadata(location_id, token)
sync_step = "pipelines"
# 1. Obtener Pipelines
pipelines = ghl_client.get_pipelines(token, location_id)
sync_step = "opportunities"
# 2. Obtener Oportunidades
opportunities = ghl_client.get_all_opportunities(token, location_id)
sync_step = "contacts"
# 3. Obtener Contactos
contacts = ghl_client.get_all_contacts(token, location_id)
sync_step = "synthetic_pipelines"
# 4. Fallback sintético de pipelines si GHL API devolvió []
if not pipelines and opportunities:
print(f"-> GHL devolvió 0 pipelines para {location_id}. Sintetizando desde oportunidades...")
stages_map = {}
for o in opportunities:
p_id = o.get("pipelineId")
s_id = o.get("pipelineStageId")
if p_id and s_id:
if p_id not in stages_map:
stages_map[p_id] = set()
stages_map[p_id].add(s_id)
pipelines = []
for p_id, stages_set in stages_map.items():
stages_list = [
{"id": s_id, "name": f"Etapa {s_id[:6]}...", "order": idx + 1}
for idx, s_id in enumerate(sorted(stages_set))
]
pipelines.append({
"id": p_id,
"name": f"Pipeline Sintético ({p_id[:6]})",
"stages": stages_list
})
sync_step = "save_to_sqlite"
# 5. Guardar en SQLite en bloque transaccional
db.save_pipelines(location_id, pipelines)
db.save_opportunities(location_id, opportunities)
db.save_contacts(location_id, contacts)
# 6. Actualizar log
db.update_sync_log(log_id, "success", len(contacts), len(opportunities))
print(f"[{datetime.now()}] Sincronización exitosa para {location_id}. Contactos: {len(contacts)}, Opps: {len(opportunities)}")
return True, len(contacts), len(opportunities), None
except Exception as e:
error_msg = str(e)
error_id = error_logging.log_error("sync_account_failed", e, {
"location_id": location_id,
"sync_step": locals().get("sync_step", "unknown"),
})
db.update_sync_log(log_id, "failed", 0, 0, error_msg)
print(f"[{datetime.now()}] Error sincronizando cuenta {location_id}: {error_msg} | error_id={error_id}")
return False, 0, 0, error_msg
def sync_account_metadata(location_id, token):
"""
Sincroniza schemas dinamicos de contact y opportunity en SQLite.
"""
for object_key in ("contact", "opportunity"):
fields = ghl_client.get_object_schema_fields(token, location_id, object_key)
if not fields:
print(f"Advertencia: schema vacio para {location_id}/{object_key}; se conserva metadata local existente.")
continue
db.save_object_schema(location_id, object_key, fields)
return True
# --- MOTOR DE SINCRONIZACIÓN GLOBAL ---
_sync_status = {
"is_running": False,
"total": 0,
"done": 0,
"success_count": 0,
"failed_count": 0,
"max_workers": 0,
"current_branch": ""
}
def get_sync_progress():
global _sync_status
return _sync_status
def sync_all_accounts():
"""
Sincroniza todas las cuentas cargadas en el CSV en paralelo con concurrencia regulada.
"""
global _sync_status
if _sync_status["is_running"]:
return {"message": "Sincronización global ya se encuentra en ejecución."}
# Inicializar Base de Datos
db.init_db()
try:
accounts = parse_accounts_csv()
# Guardar / Actualizar catálogo de cuentas en SQLite
db.save_accounts(accounts)
except Exception as e:
error_id = error_logging.log_error("sync_all_parse_accounts_failed", e)
print(f"No se pudo cargar el catálogo de cuentas: {e} | error_id={error_id}")
return {"error": f"No se pudo cargar el catálogo de cuentas: {str(e)}"}
total_accounts = len(accounts)
_sync_status.update({
"is_running": True,
"total": total_accounts,
"done": 0,
"success_count": 0,
"failed_count": 0,
"max_workers": 0,
"current_branch": "Inicializando..."
})
# Ejecutar en segundo plano en un hilo separado para no bloquear FastAPI
def run_in_background():
global _sync_status
# Al ser el límite de ráfaga de GHL por ubicación individual (recurso),
# podemos procesar múltiples sucursales en paralelo sin que interfieran entre sí.
concurrency = min(total_accounts, get_sync_max_workers()) if total_accounts else 1
_sync_status["max_workers"] = concurrency
print(f"Sync global usando hasta {concurrency} cuentas en paralelo.")
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_to_acc = {
executor.submit(sync_account, acc['location_id'], acc['token']): acc
for acc in accounts
}
for future in concurrent.futures.as_completed(future_to_acc):
acc = future_to_acc[future]
_sync_status["current_branch"] = acc['nombre']
try:
success, c_count, o_count, err = future.result()
if success:
_sync_status["success_count"] += 1
else:
_sync_status["failed_count"] += 1
except Exception as e:
_sync_status["failed_count"] += 1
error_id = error_logging.log_error("sync_thread_fatal", e, {
"location_id": acc.get("location_id"),
"account_name": acc.get("nombre"),
})
print(f"Excepción fatal en hilo de sync para {acc['nombre']}: {e} | error_id={error_id}")
_sync_status["done"] += 1
# Finalizado
_sync_status["is_running"] = False
_sync_status["current_branch"] = "Completado"
# Lanzar hilo secundario
import threading
t = threading.Thread(target=run_in_background)
t.daemon = True
t.start()
return {"message": "Sincronización global iniciada en segundo plano."}
def sync_all_metadata():
"""
Sincroniza solo schemas/metadata de todas las cuentas en paralelo.
"""
global _sync_status
if _sync_status["is_running"]:
return {"message": "Ya hay una sincronización global en ejecución."}
db.init_db()
try:
accounts = parse_accounts_csv()
db.save_accounts(accounts)
except Exception as e:
error_id = error_logging.log_error("sync_metadata_parse_accounts_failed", e)
print(f"No se pudo cargar el catálogo de cuentas para metadata: {e} | error_id={error_id}")
return {"error": f"No se pudo cargar el catálogo de cuentas: {str(e)}"}
total_accounts = len(accounts)
_sync_status.update({
"is_running": True,
"total": total_accounts,
"done": 0,
"success_count": 0,
"failed_count": 0,
"max_workers": 0,
"current_branch": "Inicializando metadata..."
})
def run_in_background():
global _sync_status
concurrency = min(total_accounts, get_sync_max_workers()) if total_accounts else 1
_sync_status["max_workers"] = concurrency
print(f"Sync metadata global usando hasta {concurrency} cuentas en paralelo.")
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_to_acc = {
executor.submit(sync_account_metadata, acc['location_id'], acc['token']): acc
for acc in accounts
}
for future in concurrent.futures.as_completed(future_to_acc):
acc = future_to_acc[future]
_sync_status["current_branch"] = acc['nombre']
try:
future.result()
_sync_status["success_count"] += 1
except Exception as e:
_sync_status["failed_count"] += 1
error_id = error_logging.log_error("sync_account_metadata_failed", e, {
"location_id": acc.get("location_id"),
"account_name": acc.get("nombre"),
})
print(f"Error sincronizando metadata para {acc['nombre']}: {e} | error_id={error_id}")
finally:
_sync_status["done"] += 1
_sync_status["is_running"] = False
_sync_status["current_branch"] = "Metadata completada"
import threading
t = threading.Thread(target=run_in_background)
t.daemon = True
t.start()
return {"message": "Sincronización de metadata iniciada en segundo plano."}