Skip to content
← System Design · intermediate · 15 min · 07 / 26

Rate Limiting

Implement token bucket and sliding window rate limiters with Redis for production API protection.

rate limitingtoken bucketsliding windowRedismiddleware

Why Rate Limiting?

Without rate limiting, a single user or bot can overwhelm your API, deny service to everyone else, and rack up infrastructure costs. Rate limiting controls how many requests a client can make in a time window.

Real-World Analogy

Like an ATM daily withdrawal limit — you can only withdraw a fixed amount per day to prevent abuse. Hit the limit, and you’re told to try again tomorrow.

Rate Limiter as Middleware
Client
--->
Rate Limiter
Allow / Deny
--->
API Server

Algorithms Compared

AlgorithmProsCons
Token BucketSmooth, allows burstsComplex to implement right
Sliding WindowPrecise, no boundary issuesHigher memory usage
Fixed WindowSimpleAllows 2x burst at window edges
Leaky BucketSmooth output rateCan’t handle legitimate bursts

Production Rate Limiter with Redis

import http from "node:http";
import Redis from "ioredis";

const redis = new Redis(process.env.REDIS_URL || "redis://localhost:6379");

// --- Sliding Window Rate Limiter ---
class SlidingWindowRateLimiter {
  constructor(
    private maxRequests: number,
    private windowMs: number
  ) {}

  async isAllowed(key: string): Promise<{
    allowed: boolean;
    remaining: number;
    resetAt: number;
    retryAfter: number;
  }> {
    const now = Date.now();
    const windowStart = now - this.windowMs;
    const redisKey = `rl:${key}`;

    // Use a Redis pipeline for atomicity
    const pipeline = redis.pipeline();

    // Remove expired entries
    pipeline.zremrangebyscore(redisKey, 0, windowStart);

    // Count current entries
    pipeline.zcard(redisKey);

    // Add current request (we'll remove it if denied)
    pipeline.zadd(redisKey, now, `${now}:${Math.random()}`);

    // Set TTL on the key
    pipeline.pexpire(redisKey, this.windowMs);

    const results = await pipeline.exec();
    const currentCount = (results?.[1]?.[1] as number) || 0;

    if (currentCount >= this.maxRequests) {
      // Over limit — remove the entry we just added
      await redis.zremrangebyscore(redisKey, now, now);

      // Find when the oldest entry expires
      const oldest = await redis.zrange(redisKey, 0, 0, "WITHSCORES");
      const oldestTime = oldest.length >= 2 ? parseInt(oldest[1]) : now;
      const retryAfter = Math.ceil((oldestTime + this.windowMs - now) / 1000);

      return {
        allowed: false,
        remaining: 0,
        resetAt: oldestTime + this.windowMs,
        retryAfter: Math.max(retryAfter, 1),
      };
    }

    return {
      allowed: true,
      remaining: this.maxRequests - currentCount - 1,
      resetAt: now + this.windowMs,
      retryAfter: 0,
    };
  }
}

// --- Token Bucket Rate Limiter ---
class TokenBucketRateLimiter {
  constructor(
    private capacity: number,       // max tokens
    private refillRate: number,      // tokens added per second
  ) {}

  async isAllowed(key: string): Promise<{
    allowed: boolean;
    remaining: number;
    retryAfter: number;
  }> {
    const redisKey = `rl:tb:${key}`;
    const now = Date.now();

    // Lua script for atomic token bucket
    const script = `
      local key = KEYS[1]
      local capacity = tonumber(ARGV[1])
      local refill_rate = tonumber(ARGV[2])
      local now = tonumber(ARGV[3])

      local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
      local tokens = tonumber(bucket[1])
      local last_refill = tonumber(bucket[2])

      if tokens == nil then
        tokens = capacity
        last_refill = now
      end

      -- Refill tokens based on elapsed time
      local elapsed = (now - last_refill) / 1000
      tokens = math.min(capacity, tokens + (elapsed * refill_rate))
      last_refill = now

      local allowed = 0
      if tokens >= 1 then
        tokens = tokens - 1
        allowed = 1
      end

      redis.call('HMSET', key, 'tokens', tokens, 'last_refill', last_refill)
      redis.call('PEXPIRE', key, math.ceil(capacity / refill_rate) * 1000)

      return {allowed, math.floor(tokens)}
    `;

    const result = (await redis.eval(
      script, 1, redisKey,
      this.capacity, this.refillRate, now
    )) as number[];

    const allowed = result[0] === 1;
    const remaining = result[1];

    return {
      allowed,
      remaining,
      retryAfter: allowed ? 0 : Math.ceil(1 / this.refillRate),
    };
  }
}

// --- Rate Limit Middleware ---
type RateLimitTier = {
  maxRequests: number;
  windowMs: number;
};

const tiers: Record<string, RateLimitTier> = {
  free:       { maxRequests: 100, windowMs: 60 * 60 * 1000 },   // 100/hour
  pro:        { maxRequests: 1000, windowMs: 60 * 60 * 1000 },  // 1000/hour
  enterprise: { maxRequests: 10000, windowMs: 60 * 60 * 1000 }, // 10000/hour
};

function getUserTier(_req: http.IncomingMessage): string {
  // In production: look up user's plan from auth token
  return "free";
}

function getClientKey(req: http.IncomingMessage): string {
  // Use API key or IP address
  const apiKey = req.headers["x-api-key"];
  if (apiKey) return `api:${apiKey}`;
  return `ip:${req.socket.remoteAddress}`;
}

async function rateLimitMiddleware(
  req: http.IncomingMessage,
  res: http.ServerResponse
): Promise<boolean> {
  const tier = tiers[getUserTier(req)];
  const limiter = new SlidingWindowRateLimiter(tier.maxRequests, tier.windowMs);
  const key = getClientKey(req);

  const result = await limiter.isAllowed(key);

  // Always set rate limit headers
  res.setHeader("X-RateLimit-Limit", tier.maxRequests);
  res.setHeader("X-RateLimit-Remaining", result.remaining);
  res.setHeader("X-RateLimit-Reset", Math.ceil(result.resetAt / 1000));

  if (!result.allowed) {
    res.setHeader("Retry-After", result.retryAfter);
    res.writeHead(429, { "Content-Type": "application/json" });
    res.end(JSON.stringify({
      error: "Too many requests",
      retryAfter: result.retryAfter,
    }));
    return false;
  }

  return true;
}

// --- Server ---
const server = http.createServer(async (req, res) => {
  const allowed = await rateLimitMiddleware(req, res);
  if (!allowed) return;

  res.writeHead(200, { "Content-Type": "application/json" });
  res.end(JSON.stringify({ message: "OK", timestamp: new Date().toISOString() }));
});

server.listen(3000, () => console.log("Server with rate limiting on :3000"));
package main

import (
	"context"
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"strconv"
	"time"

	"github.com/redis/go-redis/v9"
)

var rdb = redis.NewClient(&redis.Options{Addr: "localhost:6379"})

// --- Sliding Window Rate Limiter ---
type SlidingWindowLimiter struct {
	MaxRequests int
	Window      time.Duration
}

type RateLimitResult struct {
	Allowed    bool
	Remaining  int
	ResetAt    int64
	RetryAfter int
}

func (l *SlidingWindowLimiter) IsAllowed(ctx context.Context, key string) (*RateLimitResult, error) {
	now := time.Now().UnixMilli()
	windowStart := now - l.Window.Milliseconds()
	redisKey := "rl:" + key

	pipe := rdb.Pipeline()
	pipe.ZRemRangeByScore(ctx, redisKey, "0", strconv.FormatInt(windowStart, 10))
	countCmd := pipe.ZCard(ctx, redisKey)
	member := fmt.Sprintf("%d:%d", now, time.Now().UnixNano())
	pipe.ZAdd(ctx, redisKey, redis.Z{Score: float64(now), Member: member})
	pipe.PExpire(ctx, redisKey, l.Window)

	if _, err := pipe.Exec(ctx); err != nil {
		return nil, fmt.Errorf("pipeline: %w", err)
	}

	count := int(countCmd.Val())

	if count >= l.MaxRequests {
		// Over limit — remove entry we just added
		rdb.ZRemRangeByScore(ctx, redisKey, strconv.FormatInt(now, 10), strconv.FormatInt(now, 10))

		// Calculate retry-after from oldest entry
		oldest, _ := rdb.ZRangeWithScores(ctx, redisKey, 0, 0).Result()
		retryAfter := 1
		if len(oldest) > 0 {
			oldestTime := int64(oldest[0].Score)
			retryMs := oldestTime + l.Window.Milliseconds() - now
			retryAfter = int(retryMs/1000) + 1
			if retryAfter < 1 {
				retryAfter = 1
			}
		}

		return &RateLimitResult{
			Allowed: false, Remaining: 0,
			ResetAt: now + l.Window.Milliseconds(), RetryAfter: retryAfter,
		}, nil
	}

	return &RateLimitResult{
		Allowed: true, Remaining: l.MaxRequests - count - 1,
		ResetAt: now + l.Window.Milliseconds(),
	}, nil
}

// --- Token Bucket (Lua-based, atomic) ---
type TokenBucketLimiter struct {
	Capacity   int
	RefillRate float64 // tokens per second
}

var tokenBucketScript = redis.NewScript(`
	local key = KEYS[1]
	local capacity = tonumber(ARGV[1])
	local refill_rate = tonumber(ARGV[2])
	local now = tonumber(ARGV[3])

	local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
	local tokens = tonumber(bucket[1])
	local last_refill = tonumber(bucket[2])

	if tokens == nil then
		tokens = capacity
		last_refill = now
	end

	local elapsed = (now - last_refill) / 1000
	tokens = math.min(capacity, tokens + (elapsed * refill_rate))
	last_refill = now

	local allowed = 0
	if tokens >= 1 then
		tokens = tokens - 1
		allowed = 1
	end

	redis.call('HMSET', key, 'tokens', tokens, 'last_refill', last_refill)
	redis.call('PEXPIRE', key, math.ceil(capacity / refill_rate) * 1000)

	return {allowed, math.floor(tokens)}
`)

func (l *TokenBucketLimiter) IsAllowed(ctx context.Context, key string) (bool, int, error) {
	result, err := tokenBucketScript.Run(ctx, rdb, []string{"rl:tb:" + key},
		l.Capacity, l.RefillRate, time.Now().UnixMilli(),
	).Int64Slice()
	if err != nil {
		return false, 0, err
	}
	return result[0] == 1, int(result[1]), nil
}

// --- Tier-based middleware ---
type RateLimitTier struct {
	MaxRequests int
	Window      time.Duration
}

var tiers = map[string]RateLimitTier{
	"free":       {MaxRequests: 100, Window: time.Hour},
	"pro":        {MaxRequests: 1000, Window: time.Hour},
	"enterprise": {MaxRequests: 10000, Window: time.Hour},
}

func rateLimitMiddleware(next http.Handler) http.Handler {
	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		tier := tiers["free"] // In production: from auth token
		limiter := &SlidingWindowLimiter{MaxRequests: tier.MaxRequests, Window: tier.Window}

		// Use API key or IP
		key := r.RemoteAddr
		if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
			key = "api:" + apiKey
		}

		result, err := limiter.IsAllowed(r.Context(), key)
		if err != nil {
			log.Printf("Rate limit error: %v", err)
			next.ServeHTTP(w, r) // fail open
			return
		}

		w.Header().Set("X-RateLimit-Limit", strconv.Itoa(tier.MaxRequests))
		w.Header().Set("X-RateLimit-Remaining", strconv.Itoa(result.Remaining))
		w.Header().Set("X-RateLimit-Reset", strconv.FormatInt(result.ResetAt/1000, 10))

		if !result.Allowed {
			w.Header().Set("Retry-After", strconv.Itoa(result.RetryAfter))
			w.Header().Set("Content-Type", "application/json")
			w.WriteHeader(http.StatusTooManyRequests)
			json.NewEncoder(w).Encode(map[string]interface{}{
				"error": "Too many requests", "retryAfter": result.RetryAfter,
			})
			return
		}

		next.ServeHTTP(w, r)
	})
}

func main() {
	mux := http.NewServeMux()
	mux.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
		w.Header().Set("Content-Type", "application/json")
		json.NewEncoder(w).Encode(map[string]string{"message": "OK"})
	})

	log.Println("Server with rate limiting on :3000")
	log.Fatal(http.ListenAndServe(":3000", rateLimitMiddleware(mux)))
}

Key Takeaways

  • Sliding window is the most accurate algorithm — no boundary burst issues
  • Token bucket is best when you want to allow short bursts above the average rate
  • Use Lua scripts for atomic Redis operations — prevents race conditions between check and increment
  • Always return X-RateLimit-* and Retry-After headers so clients can self-throttle
  • Fail open on rate limiter errors — don’t block all traffic because Redis is momentarily down

Real-World Usage

  • GitHub API uses 5,000 requests/hour for authenticated users, returns X-RateLimit-* headers
  • Stripe rate limits per API key with tiered limits based on account type
  • Cloudflare processes rate limiting at their edge network to block abuse before it reaches origin servers
  • Every public API needs rate limiting — it’s not optional in production