CHANGE: Enable training models
All checks were successful
Build docker container / Build image (push) Successful in 10m29s

This commit is contained in:
2025-12-21 00:26:15 +01:00
parent 2389e17b4c
commit ff51af276d
8 changed files with 225 additions and 87 deletions

View File

@@ -13,7 +13,7 @@ user=root ; default user
file = /tmp/supervisor.sock ; supervisor unix http server sock file file = /tmp/supervisor.sock ; supervisor unix http server sock file
[program:microservicios] [program:microservicios]
command=python3 flask.py command=python3 app.py
directory=/app directory=/app
stdout_logfile=/dev/fd/1 stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0 stdout_logfile_maxbytes=0

94
src/app.py Normal file
View File

@@ -0,0 +1,94 @@
from flask import Flask, jsonify, request
import joblib
import os
from datetime import date
from db import fetch_data
import threading
from model_registry import register, load_meta
from models_train import train_demand_model, train_nulo_model
app = Flask(__name__)
MODEL_DIR = "models"
def load_model(model_type):
meta = load_meta()
file = meta["current"].get(model_type)
if not file:
return None
return joblib.load(os.path.join(MODEL_DIR, file))
def background_train():
print("Fetching data...")
df = fetch_data()
print(f"Data fetched: {len(df)} rows")
today = date.today().isoformat()
os.makedirs(MODEL_DIR, exist_ok=True)
print("Training demand model...")
demand_model = train_demand_model(df)
demand_file = f"demand_xgb_{today}.joblib"
joblib.dump(demand_model, f"{MODEL_DIR}/{demand_file}")
register("demand", demand_file, len(df))
print("Demand model trained and saved.")
print("Training nulo model...")
nulo_model = train_nulo_model(df)
nulo_file = f"nulo_xgb_{today}.joblib"
joblib.dump(nulo_model, f"{MODEL_DIR}/{nulo_file}")
register("nulo", nulo_file, len(df))
print("Nulo model trained and saved.")
@app.route("/train", methods=["POST"])
def train():
threading.Thread(target=background_train).start()
return jsonify({"status": "training started"})
@app.route("/predict_demand", methods=["GET"])
def predict_demand():
h3 = int(request.args["h3"])
week = int(request.args["week"])
dow = int(request.args["dow"])
hour = int(request.args["hour"])
model = load_model("demand")
X = [[h3, week, dow, hour]]
pred = model.predict(X)[0]
return jsonify({"expected_demand": float(pred)})
@app.route("/predict_nulo", methods=["GET"])
def predict_nulo():
h3 = int(request.args["h3"])
week = int(request.args["week"])
dow = int(request.args["dow"])
hour = int(request.args["hour"])
model = load_model("nulo")
X = [[h3, week, dow, hour]]
prob = model.predict_proba(X)[0][1]
return jsonify({"nulo_probability": float(prob)})
@app.route("/predict", methods=["GET"])
def predict_legacy():
h3 = int(request.args["h3"])
week = int(request.args["week"])
dow = int(request.args["dow"])
hour = int(request.args["hour"])
model = load_model("nulo")
X = [[h3, week, dow, hour]]
prob = model.predict_proba(X)[0][1]
return jsonify({
"predicted_nulo_prob": float(prob)
})
@app.route("/models", methods=["GET"])
def models():
return jsonify(load_meta())
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)

37
src/db.py Normal file
View File

@@ -0,0 +1,37 @@
from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError
import pandas as pd
import os
# Configuración de la conexión (puedes usar variables de entorno)
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "tfmuocdfcarvajal")
DB_HOST = os.getenv("DB_HOST", "10.10.5.32")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "postgres")
DATABASE_URL = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
# Crear el engine de SQLAlchemy
engine = create_engine(DATABASE_URL, echo=False, future=True)
def fetch_data():
query = """
SELECT
h3,
week,
dow,
hour,
total_events,
total_nulos,
nulo_rate
FROM demanda_h3_hour_ml
"""
try:
with engine.connect() as conn:
df = pd.read_sql(text(query), conn)
return df
except SQLAlchemyError as e:
print(f"Error al ejecutar la consulta: {e}")
return pd.DataFrame()

28
src/db_warning.py Normal file
View File

@@ -0,0 +1,28 @@
import psycopg2
import pandas as pd
import os
DB_CONFIG = {
'dbname': os.getenv('DB_NAME', 'postgres'),
'user': os.getenv('DB_USER', 'postgres'),
'password': os.getenv('DB_PASSWORD', 'tfmuocdfcarvajal'),
'host': os.getenv('DB_HOST', '10.10.5.32'),
'port': os.getenv('DB_PORT', '5432')
}
def fetch_data():
conn = psycopg2.connect(**DB_CONFIG)
query = """
SELECT
h3,
week,
dow,
hour,
total_events,
total_nulos,
nulo_rate
FROM demanda_h3_hour_ml
"""
df = pd.read_sql(query, conn)
conn.close()
return df

View File

@@ -1,85 +0,0 @@
from flask import Flask, jsonify, request
import os
import psycopg2
import pandas as pd
import xgboost as xgb
import joblib
app = Flask(__name__)
# Configuración de la base de datos
DB_CONFIG = {
'dbname': os.getenv('DB_NAME', 'postgres'),
'user': os.getenv('DB_USER', 'postgres'),
'password': os.getenv('DB_PASSWORD', 'tfmuocdfcarvajal'),
'host': os.getenv('DB_HOST', '10.10.5.32'),
'port': os.getenv('DB_PORT', '5432')
}
# Ruta para entrenar o cargar el modelo
MODEL_PATH = 'modelo_xgb.joblib'
def get_db_connection():
conn = psycopg2.connect(**DB_CONFIG)
return conn
def fetch_data():
conn = get_db_connection()
query = """
SELECT h3, hour, dow, total_events, total_nulos, nulo_rate
FROM demanda_h3_hour
"""
df = pd.read_sql(query, conn)
conn.close()
return df
def train_model(df):
# Variables predictoras
X = df[['hour', 'dow', 'total_events']]
# Variable objetivo: nulo_rate > 0 -> 1, else 0
y = (df['nulo_rate'] > 0).astype(int)
model = xgb.XGBClassifier(
max_depth=4, n_estimators=100, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss'
)
model.fit(X, y)
joblib.dump(model, MODEL_PATH)
return model
def load_model():
try:
model = joblib.load(MODEL_PATH)
return model
except:
df = fetch_data()
return train_model(df)
model = load_model()
@app.route('/predict', methods=['GET'])
def predict():
# Parámetros de la consulta
hour = int(request.args.get('hour'))
dow = int(request.args.get('dow'))
total_events = int(request.args.get('total_events', 1)) # valor por defecto si no se pasa
X_pred = pd.DataFrame([[hour, dow, total_events]], columns=['hour', 'dow', 'total_events'])
prob = model.predict_proba(X_pred)[0][1] # Probabilidad de nulo
return jsonify({
'hour': hour,
'dow': dow,
'total_events': total_events,
'predicted_nulo_prob': float(prob)
})
@app.route('/demand', methods=['GET'])
def demand():
df = fetch_data()
X_pred = df[['hour', 'dow', 'total_events']]
df['predicted_nulo_prob'] = model.predict_proba(X_pred)[:,1].astype(float)
# Convertimos a JSON
result = df.to_dict(orient='records')
return jsonify(result)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

30
src/model_registry.py Normal file
View File

@@ -0,0 +1,30 @@
import json
import os
from datetime import datetime
MODEL_DIR = "models"
META_FILE = os.path.join(MODEL_DIR, "metadata.json")
def load_meta():
if not os.path.exists(META_FILE):
return {"current": {}, "history": []}
with open(META_FILE) as f:
return json.load(f)
def save_meta(meta):
os.makedirs(MODEL_DIR, exist_ok=True)
with open(META_FILE, "w") as f:
json.dump(meta, f, indent=2)
def register(model_type, file_name, rows):
meta = load_meta()
entry = {
"type": model_type,
"file": file_name,
"trained_at": datetime.utcnow().isoformat() + "Z",
"rows": rows
}
meta["history"].append(entry)
meta["current"][model_type] = file_name
save_meta(meta)
return entry

33
src/models_train.py Normal file
View File

@@ -0,0 +1,33 @@
import xgboost as xgb
def train_demand_model(df):
df["h3_int"] = df["h3"].apply(lambda x: int(x, 16))
X = df[["h3_int", "week", "dow", "hour"]]
y = df["total_events"]
model = xgb.XGBRegressor(
n_estimators=300,
max_depth=8,
learning_rate=0.05,
objective="reg:squarederror"
)
model.fit(X, y)
return model
def train_nulo_model(df):
df["h3_int"] = df["h3"].apply(lambda x: int(x, 16))
X = df[["h3_int", "week", "dow", "hour"]]
y = (df["nulo_rate"] > 0).astype(int)
model = xgb.XGBClassifier(
n_estimators=200,
max_depth=5,
learning_rate=0.1,
eval_metric="logloss"
)
model.fit(X, y)
return model

View File

@@ -16,6 +16,7 @@ authors = [
dependencies = [ dependencies = [
'flask', 'flask',
'psycopg2-binary', 'psycopg2-binary',
'sqlalchemy',
'pandas', 'pandas',
'xgboost', 'xgboost',
'scikit-learn', 'scikit-learn',
@@ -24,7 +25,7 @@ dependencies = [
[tool.setuptools] [tool.setuptools]
include-package-data = true include-package-data = true
py-modules = ["flask"] py-modules = ["app", "db", "model_registry", "models_train"]
[tool.setuptools.packages] [tool.setuptools.packages]
find = {} find = {}