MCP Error Handling Best Practices (2025 Guide)

In production, things break. APIs go down, databases timeout, users provide invalid input, and edge cases multiply. Good error handling is what separates a prototype from a production-ready MCP server.

This guide covers error handling patterns that make your MCP servers reliable and helpful. When errors occur, the AI using your tools should understand what went wrong and how to recover.

The Golden Rule

Never let exceptions bubble up unhandled. An unhandled exception in an MCP tool can crash the server or leave the AI confused. Always catch exceptions and return meaningful error messages.

# ❌ Bad - exceptions can crash or confuse
@mcp.tool()
async def fetch_data(url: str) -> str:
    response = await httpx.get(url)
    return response.json()["data"]

# ✅ Good - always handle errors
@mcp.tool()
async def fetch_data(url: str) -> str:
    """Fetch data from a URL."""
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.get(url)
            response.raise_for_status()
            return response.json()["data"]
    except httpx.TimeoutException:
        return "Error: Request timed out after 30 seconds. The server may be slow or unreachable."
    except httpx.HTTPStatusError as e:
        return f"Error: Server returned status {e.response.status_code}. URL may be invalid or access denied."
    except KeyError:
        return "Error: Response didn't contain expected 'data' field. API format may have changed."
    except Exception as e:
        return f"Error: Unexpected failure - {type(e).__name__}: {str(e)}"

Error Categories

Different errors need different handling strategies:

1. Input Validation Errors

Catch bad input before doing any work. Validate early, fail fast.

from typing import Literal

@mcp.tool()
def create_task(
    title: str,
    priority: str = "medium",
    due_date: str = None
) -> str:
    """
    Create a new task.
    
    Args:
        title: Task title (required, 1-200 characters).
        priority: Priority level (low, medium, high, urgent).
        due_date: Due date in YYYY-MM-DD format.
    """
    errors = []
    
    # Validate title
    if not title or not title.strip():
        errors.append("Title is required and cannot be empty")
    elif len(title) > 200:
        errors.append(f"Title too long ({len(title)} chars). Maximum is 200 characters")
    
    # Validate priority
    valid_priorities = ["low", "medium", "high", "urgent"]
    if priority not in valid_priorities:
        errors.append(f"Invalid priority '{priority}'. Must be one of: {', '.join(valid_priorities)}")
    
    # Validate date format
    if due_date:
        try:
            from datetime import datetime
            datetime.strptime(due_date, "%Y-%m-%d")
        except ValueError:
            errors.append(f"Invalid date format '{due_date}'. Use YYYY-MM-DD (e.g., 2026-02-15)")
    
    # Return all validation errors at once
    if errors:
        return "Validation failed:\n• " + "\n• ".join(errors)
    
    # Proceed with creation...
    return create_task_in_db(title.strip(), priority, due_date)

2. Network and External Service Errors

External services fail. Plan for it with retries and timeouts:

import asyncio
from typing import TypeVar, Callable
import httpx

T = TypeVar('T')

async def retry_with_backoff(
    func: Callable[[], T],
    max_retries: int = 3,
    base_delay: float = 1.0,
    max_delay: float = 30.0
) -> T:
    """Retry a function with exponential backoff."""
    last_exception = None
    
    for attempt in range(max_retries):
        try:
            return await func()
        except (httpx.TimeoutException, httpx.NetworkError) as e:
            last_exception = e
            if attempt < max_retries - 1:
                delay = min(base_delay * (2 ** attempt), max_delay)
                await asyncio.sleep(delay)
    
    raise last_exception


@mcp.tool()
async def fetch_weather(city: str) -> str:
    """Get current weather for a city."""
    async def _fetch():
        async with httpx.AsyncClient(timeout=10.0) as client:
            response = await client.get(
                f"https://wttr.in/{city}?format=j1",
                headers={"User-Agent": "MCP-Server/1.0"}
            )
            response.raise_for_status()
            return response.json()
    
    try:
        data = await retry_with_backoff(_fetch, max_retries=3)
        current = data["current_condition"][0]
        return f"Weather in {city}: {current['temp_F']}°F, {current['weatherDesc'][0]['value']}"
    
    except httpx.TimeoutException:
        return f"Error: Weather service timed out after 3 retries. Try again later."
    except httpx.HTTPStatusError as e:
        if e.response.status_code == 404:
            return f"Error: City '{city}' not found. Check spelling or try a larger nearby city."
        return f"Error: Weather service returned {e.response.status_code}. Service may be temporarily unavailable."
    except Exception as e:
        return f"Error: Could not fetch weather - {str(e)}"

3. Resource Not Found Errors

When an item doesn't exist, provide context and suggestions:

@mcp.tool()
def get_project(project_id: int) -> str:
    """Get project details by ID."""
    project = db.get_project(project_id)
    
    if not project:
        # Get similar projects to suggest
        all_projects = db.list_projects(limit=5)
        
        if all_projects:
            suggestions = ", ".join(
                f"'{p['name']}' (id={p['id']})" 
                for p in all_projects[:3]
            )
            return (
                f"Error: Project with ID {project_id} not found.\n"
                f"Available projects: {suggestions}\n"
                f"Use list_projects() to see all projects."
            )
        return (
            f"Error: Project with ID {project_id} not found.\n"
            f"No projects exist yet. Use create_project(name='...') to create one."
        )
    
    return format_project(project)

4. Permission and Authorization Errors

Be clear about what permissions are needed:

@mcp.tool()
def delete_project(project_id: int, confirm: bool = False) -> str:
    """
    Delete a project and all its tasks.
    
    Args:
        project_id: Project to delete.
        confirm: Must be True to confirm deletion.
    """
    if not confirm:
        return (
            "Error: Deletion requires confirmation.\n"
            "This will permanently delete the project and ALL its tasks.\n"
            "To confirm, call: delete_project(project_id={project_id}, confirm=True)"
        )
    
    project = db.get_project(project_id)
    if not project:
        return f"Error: Project {project_id} not found."
    
    if project["status"] == "archived":
        return (
            f"Error: Cannot delete archived project '{project['name']}'.\n"
            "Archived projects are protected. Contact an admin to delete."
        )
    
    task_count = db.count_tasks(project_id)
    db.delete_project(project_id)
    
    return f"Deleted project '{project['name']}' and {task_count} associated tasks."

Structured Error Responses

For complex tools, return structured error objects that the AI can parse:

import json
from dataclasses import dataclass, asdict
from typing import Optional, List


@dataclass
class ToolError:
    """Structured error response."""
    error: str
    code: str
    details: Optional[str] = None
    suggestions: Optional[List[str]] = None
    recoverable: bool = True
    
    def to_json(self) -> str:
        return json.dumps(asdict(self), indent=2)


@dataclass  
class ToolSuccess:
    """Structured success response."""
    data: dict
    message: Optional[str] = None
    
    def to_json(self) -> str:
        return json.dumps(asdict(self), indent=2)


@mcp.tool()
async def complex_operation(param: str) -> str:
    """A tool with structured responses."""
    
    # Validation error
    if not param:
        return ToolError(
            error="Missing required parameter",
            code="VALIDATION_ERROR",
            details="The 'param' argument is required",
            suggestions=["Provide a non-empty string for 'param'"],
            recoverable=True
        ).to_json()
    
    try:
        result = await do_operation(param)
        return ToolSuccess(
            data=result,
            message="Operation completed successfully"
        ).to_json()
        
    except RateLimitError:
        return ToolError(
            error="Rate limit exceeded",
            code="RATE_LIMIT",
            details="Too many requests in the past minute",
            suggestions=[
                "Wait 60 seconds before retrying",
                "Reduce request frequency"
            ],
            recoverable=True
        ).to_json()
        
    except ServiceUnavailableError:
        return ToolError(
            error="Service temporarily unavailable",
            code="SERVICE_DOWN",
            details="The backend service is not responding",
            suggestions=[
                "Try again in a few minutes",
                "Check service status page"
            ],
            recoverable=True
        ).to_json()

Logging for Debugging

Good logging helps you debug issues in production:

import logging
import sys
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stderr),  # MCP uses stdout for protocol
        logging.FileHandler('mcp-server.log')
    ]
)
logger = logging.getLogger("mcp-server")


def log_tool_call(tool_name: str):
    """Decorator to log tool calls."""
    def decorator(func):
        async def wrapper(*args, **kwargs):
            call_id = datetime.now().strftime("%H%M%S%f")[:10]
            logger.info(f"[{call_id}] {tool_name} called with kwargs={kwargs}")
            
            try:
                result = await func(*args, **kwargs)
                # Log truncated result
                result_preview = result[:200] + "..." if len(result) > 200 else result
                logger.info(f"[{call_id}] {tool_name} returned: {result_preview}")
                return result
            except Exception as e:
                logger.error(f"[{call_id}] {tool_name} failed: {e}", exc_info=True)
                raise
        
        return wrapper
    return decorator


@mcp.tool()
@log_tool_call("fetch_data")
async def fetch_data(url: str) -> str:
    """Fetch data from URL."""
    # ... implementation

Graceful Degradation

When part of a tool fails, return partial results rather than nothing:

@mcp.tool()
async def get_city_info(city: str) -> str:
    """Get comprehensive info about a city (weather, time, population)."""
    results = {"city": city}
    errors = []
    
    # Try weather - don't fail if this breaks
    try:
        results["weather"] = await fetch_weather(city)
    except Exception as e:
        errors.append(f"Weather unavailable: {str(e)}")
        results["weather"] = None
    
    # Try timezone
    try:
        results["local_time"] = await fetch_timezone(city)
    except Exception as e:
        errors.append(f"Timezone unavailable: {str(e)}")
        results["local_time"] = None
    
    # Try population
    try:
        results["population"] = await fetch_population(city)
    except Exception as e:
        errors.append(f"Population unavailable: {str(e)}")
        results["population"] = None
    
    # Format response with partial data
    output = [f"Information for {city}:"]
    
    if results["weather"]:
        output.append(f"  Weather: {results['weather']}")
    if results["local_time"]:
        output.append(f"  Local time: {results['local_time']}")
    if results["population"]:
        output.append(f"  Population: {results['population']:,}")
    
    if errors:
        output.append(f"\nNote: Some data unavailable:")
        for err in errors:
            output.append(f"  • {err}")
    
    return "\n".join(output)

Testing Error Handling

Write tests specifically for error cases:

import pytest
from unittest.mock import patch, AsyncMock


class TestErrorHandling:
    
    @pytest.mark.asyncio
    async def test_handles_timeout(self):
        """Tool returns helpful message on timeout."""
        with patch('httpx.AsyncClient.get', side_effect=httpx.TimeoutException("timeout")):
            result = await fetch_weather("London")
            assert "timed out" in result.lower()
            assert "try again" in result.lower()
    
    @pytest.mark.asyncio
    async def test_handles_invalid_city(self):
        """Tool provides suggestions for invalid city."""
        result = await fetch_weather("NotARealCity12345")
        assert "not found" in result.lower() or "error" in result.lower()
    
    def test_validates_empty_title(self):
        """Rejects empty task title with clear message."""
        result = create_task(title="", priority="high")
        assert "required" in result.lower()
        assert "title" in result.lower()
    
    def test_validates_invalid_priority(self):
        """Lists valid options when priority is invalid."""
        result = create_task(title="Test", priority="super-urgent")
        assert "invalid" in result.lower()
        assert "low" in result.lower()  # Shows valid options
        assert "high" in result.lower()

📚 Related Tutorials

Summary

Error handling in MCP servers is about helping the AI recover gracefully. When something goes wrong:

Catch all exceptions — Never let errors bubble up unhandled
Be specific — Tell the AI exactly what went wrong
Suggest fixes — Provide actionable next steps
Validate early — Catch bad input before doing work
Retry transient failures — Use exponential backoff for network issues
Degrade gracefully — Return partial results when possible
Log everything — You'll need it for debugging

Build these patterns into your MCP servers from the start, and they'll be production-ready from day one.

Questions? Reach out on Twitter or email kai@kaigritun.com.

MCP Error Handling Best Practices