TP4 - Build a Prediction API with Flask
Objectives
By the end of this lab, you will be able to:
- Build a Flask application that serves ML predictions
- Parse and validate JSON request data manually
- Implement prediction and health check endpoints
- Handle errors gracefully with proper HTTP status codes
- Compare your Flask implementation with the FastAPI version from TP3
Prerequisites
- Completed TP2 (serialized model
model_v1.joblib) - Completed TP3 (for comparison)
- Python 3.10+ installed
This lab builds the exact same API as TP3 (same endpoints, same input/output format) but uses Flask instead of FastAPI. This lets you directly compare both approaches.
Architecture Overview
Step 1 — Project Setup
1.1 Create the Project Structure
mkdir -p flask-ml-api/app
mkdir -p flask-ml-api/models
cd flask-ml-api
1.2 Create a Virtual Environment
python -m venv venv
# Windows
venv\Scripts\activate
# macOS/Linux
source venv/bin/activate
1.3 Install Dependencies
pip install flask flask-cors scikit-learn joblib numpy
Create requirements.txt:
flask>=3.0.0
flask-cors>=4.0.0
scikit-learn>=1.3.0
joblib>=1.3.0
numpy>=1.24.0
1.4 Copy Your Model
cp /path/to/tp2/model_v1.joblib models/model_v1.joblib
Step 2 — Create the Input Validator
Since Flask doesn't have built-in validation like Pydantic, create app/validators.py:
def validate_prediction_input(data):
"""
Validate prediction input data.
Returns (validated_data, None) on success
or (None, errors) on failure.
"""
if data is None:
return None, [{"message": "Request body must be valid JSON"}]
errors = []
validated = {}
schema = {
"age": {
"type": int,
"required": True,
"min": 18,
"max": 120,
"description": "Applicant age (18-120)",
},
"income": {
"type": float,
"required": True,
"min": 0,
"description": "Annual income (> 0)",
},
"credit_score": {
"type": int,
"required": True,
"min": 300,
"max": 850,
"description": "Credit score (300-850)",
},
"employment_years": {
"type": float,
"required": True,
"min": 0,
"description": "Years of employment (>= 0)",
},
"loan_amount": {
"type": float,
"required": True,
"min": 0,
"description": "Loan amount (> 0)",
},
}
for field, rules in schema.items():
if field not in data:
if rules["required"]:
errors.append({
"field": field,
"message": f"Missing required field: {field}",
"expected": rules["description"],
})
continue
try:
value = rules["type"](data[field])
except (ValueError, TypeError):
errors.append({
"field": field,
"message": f"Must be {rules['type'].__name__}",
"received": str(data[field]),
})
continue
if "min" in rules and value < rules["min"]:
errors.append({
"field": field,
"message": f"Must be >= {rules['min']}",
"received": value,
})
elif "max" in rules and value > rules["max"]:
errors.append({
"field": field,
"message": f"Must be <= {rules['max']}",
"received": value,
})
else:
validated[field] = value
if errors:
return None, errors
return validated, None
In TP3, all this validation logic was handled by a single Pydantic model (~20 lines). Here, you need ~70 lines of manual validation code. This is the key trade-off of Flask.
Step 3 — Create the ML Service
Create app/ml_service.py (identical to TP3):
import joblib
import numpy as np
from pathlib import Path
class MLService:
"""Handles model loading and inference."""
def __init__(self):
self.model = None
self.model_version = "unknown"
def load_model(self, model_path: str) -> None:
path = Path(model_path)
if not path.exists():
raise FileNotFoundError(f"Model not found: {model_path}")
self.model = joblib.load(path)
self.model_version = path.stem
print(f"[MLService] Model loaded: {self.model_version}")
def predict(self, features: dict) -> dict:
if self.model is None:
raise RuntimeError("Model is not loaded")
arr = np.array([[
features["age"],
features["income"],
features["credit_score"],
features["employment_years"],
features["loan_amount"],
]])
prediction = self.model.predict(arr)[0]
probabilities = self.model.predict_proba(arr)[0]
return {
"prediction": "approved" if prediction == 1 else "denied",
"probability": round(float(max(probabilities)), 4),
"model_version": self.model_version,
}
@property
def is_ready(self) -> bool:
return self.model is not None
ml_service = MLService()
Step 4 — Build the Flask Application
Create app/main.py:
from flask import Flask, request, jsonify
from flask_cors import CORS
from datetime import datetime
from app.ml_service import ml_service
from app.validators import validate_prediction_input
def create_app():
"""Application factory."""
app = Flask(__name__)
# CORS
CORS(app, origins=["http://localhost:3000"])
# Load model at startup
try:
ml_service.load_model("models/model_v1.joblib")
except FileNotFoundError as e:
print(f"[WARNING] {e}. Starting in degraded mode.")
# --- Routes ---
@app.route("/", methods=["GET"])
def root():
return jsonify({
"service": "Loan Prediction API (Flask)",
"version": "1.0.0",
"health": "/health",
})
@app.route("/health", methods=["GET"])
def health():
return jsonify({
"status": "healthy" if ml_service.is_ready else "degraded",
"model_loaded": ml_service.is_ready,
"model_version": ml_service.model_version,
"timestamp": datetime.utcnow().isoformat(),
})
@app.route("/api/v1/predict", methods=["POST"])
def predict():
# Parse JSON
data = request.get_json(silent=True)
# Validate input
validated, errors = validate_prediction_input(data)
if errors:
return jsonify({
"error_code": "VALIDATION_ERROR",
"message": "Invalid input data",
"details": errors,
}), 422
# Check model
if not ml_service.is_ready:
return jsonify({
"error_code": "MODEL_NOT_LOADED",
"message": "Model is not available. Service is degraded.",
}), 503
# Run prediction
try:
result = ml_service.predict(validated)
return jsonify({
"prediction": result["prediction"],
"probability": result["probability"],
"model_version": result["model_version"],
"timestamp": datetime.utcnow().isoformat(),
})
except Exception as e:
return jsonify({
"error_code": "PREDICTION_FAILED",
"message": f"Prediction failed: {str(e)}",
}), 500
# --- Error Handlers ---
@app.errorhandler(404)
def not_found(error):
return jsonify({
"error_code": "NOT_FOUND",
"message": "The requested endpoint does not exist",
}), 404
@app.errorhandler(405)
def method_not_allowed(error):
return jsonify({
"error_code": "METHOD_NOT_ALLOWED",
"message": "This HTTP method is not allowed for this endpoint",
}), 405
@app.errorhandler(500)
def internal_error(error):
return jsonify({
"error_code": "INTERNAL_ERROR",
"message": "An unexpected error occurred",
}), 500
return app
Create run.py at the project root:
from app.main import create_app
app = create_app()
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=5000)
Step 5 — Run and Test
5.1 Start the Server
python run.py
You should see:
[MLService] Model loaded: model_v1
* Running on http://0.0.0.0:5000
* Restarting with stat
* Debugger is active!
5.2 Test the Health Endpoint
curl http://localhost:5000/health
Expected:
{
"status": "healthy",
"model_loaded": true,
"model_version": "model_v1",
"timestamp": "2026-02-23T14:30:00.000000"
}
5.3 Test Prediction
curl -X POST http://localhost:5000/api/v1/predict \
-H "Content-Type: application/json" \
-d '{
"age": 35,
"income": 55000,
"credit_score": 720,
"employment_years": 8.5,
"loan_amount": 25000
}'
5.4 Test Validation Errors
# Missing fields
curl -X POST http://localhost:5000/api/v1/predict \
-H "Content-Type: application/json" \
-d '{"age": 35}'
# Invalid values
curl -X POST http://localhost:5000/api/v1/predict \
-H "Content-Type: application/json" \
-d '{"age": -5, "income": 55000, "credit_score": 720, "employment_years": 8, "loan_amount": 25000}'
# Not JSON
curl -X POST http://localhost:5000/api/v1/predict \
-H "Content-Type: text/plain" \
-d 'this is not json'
Step 6 — Compare TP3 (FastAPI) vs TP4 (Flask)
Now that you've built the same API in both frameworks, compare them:
| Aspect | TP3 (FastAPI) | TP4 (Flask) |
|---|---|---|
| Total files | 3 (main, schemas, ml_service) | 4 (main, validators, ml_service, run) |
| Validation code | ~25 lines (Pydantic model) | ~70 lines (manual validator) |
| Route code | ~15 lines per route | ~25 lines per route |
| Swagger docs | Auto-generated at /docs | Not available (needs extension) |
| Server command | uvicorn app.main:app --reload | python run.py |
| Error format | Auto-generated 422 details | Custom error format |
| Type safety | Full runtime validation | Manual type conversion |
| Startup | Lifespan context manager | Inside create_app() factory |
Verification Checklist
- Flask server starts on port 5000
-
GET /healthreturns healthy status -
POST /api/v1/predictwith valid data returns a prediction - Missing fields return 422 with specific error details
- Invalid values (negative age, credit_score=1000) return 422
- Non-JSON body returns 422
- Requesting a non-existent endpoint returns 404
Bonus Challenges
Challenge 1: Add Flask-RESTX for Swagger docs
pip install flask-restx
Replace the basic Flask app with Flask-RESTX to get Swagger docs at /docs:
from flask_restx import Api, Resource, fields
api = Api(app, title="Loan Prediction API", version="1.0",
doc="/docs")
ns = api.namespace("api/v1", description="Predictions")
input_model = api.model("PredictionInput", {
"age": fields.Integer(required=True, min=18, max=120),
"income": fields.Float(required=True, min=0),
"credit_score": fields.Integer(required=True, min=300, max=850),
"employment_years": fields.Float(required=True, min=0),
"loan_amount": fields.Float(required=True, min=0),
})
@ns.route("/predict")
class Predict(Resource):
@ns.expect(input_model, validate=True)
def post(self):
"""Get a loan approval prediction."""
data = api.payload
result = ml_service.predict(data)
return result
Challenge 2: Add request logging
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ml-api")
@app.before_request
def log_request():
logger.info(f"[{datetime.utcnow()}] {request.method} {request.path}")
@app.after_request
def log_response(response):
logger.info(f"[{datetime.utcnow()}] Response: {response.status_code}")
return response
Common Issues
| Issue | Solution |
|---|---|
ImportError: cannot import name 'create_app' | Make sure app/__init__.py exists |
jsonify returns HTML error page | You forgot the custom error handlers |
request.get_json() returns None | Missing Content-Type: application/json header |
| Port 5000 conflict (macOS) | macOS Monterey+ uses port 5000 for AirPlay. Use --port 5001 |
| Model not found | Run python run.py from the project root (where models/ is) |