Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/zhcndoc/bun/llms.txt

Use this file to discover all available pages before exploring further.

Bun implements the HTMLRewriter API for parsing and transforming HTML documents using CSS selectors. It’s based on Cloudflare Workers’ HTMLRewriter and uses the lol-html parser.

Basic Usage

const rewriter = new HTMLRewriter();

rewriter.on("p", {
  element(element) {
    element.setInnerContent("Hello World!");
  },
});

const input = "<html><body><p>Original</p></body></html>";
const output = rewriter.transform(new Response(input));

console.log(await output.text());
// <html><body><p>Hello World!</p></body></html>

Selecting Elements

CSS Selectors

Use any valid CSS selector:
const rewriter = new HTMLRewriter();

// Tag selectors
rewriter.on("p", handlers);
rewriter.on("div", handlers);

// Class selectors
rewriter.on(".my-class", handlers);

// ID selectors
rewriter.on("#header", handlers);

// Attribute selectors
rewriter.on('[data-user-id="123"]', handlers);
rewriter.on('a[href^="https://"]', handlers);

// Combinators
rewriter.on("div > p", handlers); // Direct child
rewriter.on("div p", handlers);   // Descendant
rewriter.on("h1 + p", handlers);  // Adjacent sibling

// Pseudo-classes
rewriter.on("li:first-child", handlers);
rewriter.on("p:not(.ignore)", handlers);

Multiple Selectors

const rewriter = new HTMLRewriter();

// Different handlers for different selectors
rewriter.on("h1", {
  element(el) {
    el.setAttribute("class", "title");
  },
});

rewriter.on("p", {
  element(el) {
    el.setAttribute("class", "paragraph");
  },
});

Element Handlers

Element Handler

Called when an element is encountered:
rewriter.on("div", {
  element(element) {
    console.log("Found div:", element.tagName);
  },
});

Text Handler

Called for text nodes:
rewriter.on("p", {
  text(text) {
    if (text.text.includes("TODO")) {
      text.replace("[PENDING]");
    }
  },
});

Comment Handler

Called for HTML comments:
rewriter.on("div", {
  comments(comment) {
    if (comment.text.includes("debug")) {
      comment.remove();
    }
  },
});

Modifying Elements

Attributes

rewriter.on("img", {
  element(element) {
    // Get attribute
    const src = element.getAttribute("src");
    
    // Set attribute
    element.setAttribute("loading", "lazy");
    
    // Remove attribute
    element.removeAttribute("width");
    
    // Check if attribute exists
    if (element.hasAttribute("alt")) {
      console.log("Has alt text");
    }
  },
});

Content

rewriter.on("div", {
  element(element) {
    // Replace all content
    element.setInnerContent("New content");
    
    // Replace with HTML
    element.setInnerContent("<p>Paragraph</p>", { html: true });
    
    // Prepend content
    element.prepend("Start: ");
    element.prepend("<b>Bold</b>", { html: true });
    
    // Append content
    element.append(" :End");
    element.append("<i>Italic</i>", { html: true });
  },
});

Insertion

rewriter.on("h1", {
  element(element) {
    // Insert before element
    element.before("<nav>Navigation</nav>", { html: true });
    
    // Insert after element
    element.after("<hr>", { html: true });
  },
});

Removal

rewriter.on(".ad", {
  element(element) {
    // Remove element and its content
    element.remove();
  },
});

rewriter.on(".strip", {
  element(element) {
    // Remove element but keep content
    element.removeAndKeepContent();
  },
});

Text Manipulation

Text Nodes

rewriter.on("p", {
  text(text) {
    // Get text content
    console.log(text.text);
    
    // Check if last in element
    if (text.lastInTextNode) {
      text.after(" (end)");
    }
    
    // Replace text
    text.replace(text.text.toUpperCase());
    
    // Insert before/after
    text.before("[");
    text.after("]");
    
    // Remove text
    text.remove();
  },
});

Text Processing

rewriter.on("code", {
  text(text) {
    // Escape HTML entities
    const escaped = text.text
      .replace(/&/g, "&amp;")
      .replace(/</g, "&lt;")
      .replace(/>/g, "&gt;");
    text.replace(escaped);
  },
});

Document Handlers

Document-Level Events

rewriter.onDocument({
  doctype(doctype) {
    console.log("DOCTYPE:", doctype.name);
  },
  
  comments(comment) {
    // Handle document-level comments
    if (comment.text.includes("remove")) {
      comment.remove();
    }
  },
  
  text(text) {
    // Handle document-level text (outside elements)
  },
  
  end(end) {
    // Called at end of document
    console.log("Document processing complete");
  },
});

Transforming Responses

Basic Transform

const rewriter = new HTMLRewriter();

rewriter.on("title", {
  element(element) {
    element.setInnerContent("New Title");
  },
});

const response = await fetch("https://example.com");
const transformed = rewriter.transform(response);

const html = await transformed.text();

Streaming Transform

const rewriter = new HTMLRewriter();

rewriter.on("img", {
  element(element) {
    element.setAttribute("loading", "lazy");
  },
});

const response = await fetch("https://example.com");
const stream = rewriter.transform(response).body;

// Stream chunks as they're transformed
for await (const chunk of stream) {
  console.log("Chunk:", chunk.length, "bytes");
}

Common Use Cases

Add Analytics

const rewriter = new HTMLRewriter();

rewriter.on("head", {
  element(element) {
    element.append(
      `<script>
        // Analytics code
        gtag('config', 'GA_MEASUREMENT_ID');
      </script>`,
      { html: true },
    );
  },
});

Lazy Load Images

const rewriter = new HTMLRewriter();

rewriter.on("img", {
  element(element) {
    const src = element.getAttribute("src");
    
    if (src && !src.startsWith("data:")) {
      element.setAttribute("loading", "lazy");
      element.setAttribute("decoding", "async");
    }
  },
});
const rewriter = new HTMLRewriter();

rewriter.on('a[href^="http://"]', {
  element(element) {
    const href = element.getAttribute("href");
    
    // Upgrade to HTTPS
    if (href) {
      element.setAttribute("href", href.replace("http://", "https://"));
    }
    
    // Add external link indicator
    element.setAttribute("target", "_blank");
    element.setAttribute("rel", "noopener noreferrer");
  },
});

Content Security

const rewriter = new HTMLRewriter();

// Remove scripts from untrusted HTML
rewriter.on("script", {
  element(element) {
    element.remove();
  },
});

// Remove inline event handlers
rewriter.on("*", {
  element(element) {
    const dangerous = [
      "onclick", "onload", "onerror",
      "onmouseover", "onmouseout",
    ];
    
    for (const attr of dangerous) {
      if (element.hasAttribute(attr)) {
        element.removeAttribute(attr);
      }
    }
  },
});

Extract Metadata

const metadata: any = {};

const rewriter = new HTMLRewriter();

rewriter.on("title", {
  text(text) {
    metadata.title = text.text;
  },
});

rewriter.on('meta[name="description"]', {
  element(element) {
    metadata.description = element.getAttribute("content");
  },
});

rewriter.on('meta[property^="og:"]', {
  element(element) {
    const property = element.getAttribute("property");
    const content = element.getAttribute("content");
    metadata[property] = content;
  },
});

const response = await fetch(url);
await rewriter.transform(response).arrayBuffer();

console.log(metadata);

Syntax Highlighting

import { highlight } from "./highlighter";

const rewriter = new HTMLRewriter();

rewriter.on("code", {
  async element(element) {
    const lang = element.getAttribute("class")?.replace("language-", "");
    
    // Collect text content
    let code = "";
    element.onEndTag(() => {
      const highlighted = highlight(code, lang);
      element.setInnerContent(highlighted, { html: true });
    });
  },
  
  text(text) {
    code += text.text;
  },
});

Localization

const translations = {
  en: { hello: "Hello", goodbye: "Goodbye" },
  es: { hello: "Hola", goodbye: "Adiós" },
};

function localize(lang: string) {
  const rewriter = new HTMLRewriter();
  
  rewriter.on('[data-i18n]', {
    element(element) {
      const key = element.getAttribute("data-i18n");
      if (key) {
        const text = translations[lang]?.[key] || key;
        element.setInnerContent(text);
      }
    },
  });
  
  return rewriter;
}

const spanish = localize("es");
// <p data-i18n="hello">Hello</p> -> <p data-i18n="hello">Hola</p>

Performance

Streaming Benefits

HTMLRewriter processes HTML as it streams:
const rewriter = new HTMLRewriter();

rewriter.on("img", {
  element(element) {
    element.setAttribute("loading", "lazy");
  },
});

// Starts transforming before entire HTML is downloaded
const response = await fetch("https://example.com/large.html");
const transformed = rewriter.transform(response);

// Can start reading output immediately
const reader = transformed.body.getReader();
const { value } = await reader.read();
console.log("First chunk:", value);

Memory Efficiency

// Bad - loads entire HTML into memory
const html = await response.text();
const modified = html.replace(/<img/g, '<img loading="lazy"');

// Good - streams and transforms
const transformed = new HTMLRewriter()
  .on("img", {
    element(el) {
      el.setAttribute("loading", "lazy");
    },
  })
  .transform(response);

Error Handling

const rewriter = new HTMLRewriter();

rewriter.on("img", {
  element(element) {
    try {
      const src = element.getAttribute("src");
      if (!src) {
        console.warn("Image missing src attribute");
        return;
      }
      
      // Transform src
      element.setAttribute("src", transformUrl(src));
    } catch (err) {
      console.error("Error processing image:", err);
    }
  },
});

try {
  const response = await fetch(url);
  const transformed = rewriter.transform(response);
  const html = await transformed.text();
} catch (err) {
  console.error("Transform failed:", err);
}

Best Practices

  1. Use specific selectors
    // Good - specific
    rewriter.on("nav.main-menu > li > a", handlers);
    
    // Bad - too broad
    rewriter.on("a", handlers);
    
  2. Avoid blocking operations
    // Bad - blocks streaming
    rewriter.on("img", {
      async element(element) {
        const data = await fetch(element.getAttribute("src"));
        // ...
      },
    });
    
    // Good - non-blocking
    rewriter.on("img", {
      element(element) {
        const src = element.getAttribute("src");
        processLater(src);
      },
    });
    
  3. Minimize transformations
    // Good - single rewriter
    const rewriter = new HTMLRewriter()
      .on("img", imgHandler)
      .on("a", linkHandler);
    
    // Bad - multiple passes
    let html = original;
    html = new HTMLRewriter().on("img", imgHandler).transform(html);
    html = new HTMLRewriter().on("a", linkHandler).transform(html);
    
  4. Handle missing attributes
    rewriter.on("a", {
      element(element) {
        const href = element.getAttribute("href");
        if (!href) return; // Skip links without href
        
        // Process href
      },
    });
    

API Reference

HTMLRewriter

const rewriter = new HTMLRewriter();

rewriter.on()

rewriter.on(selector: string, handlers: ElementHandlers)

rewriter.onDocument()

rewriter.onDocument(handlers: DocumentHandlers)

rewriter.transform()

rewriter.transform(response: Response): Response

Element

interface Element {
  tagName: string;
  getAttribute(name: string): string | null;
  hasAttribute(name: string): boolean;
  setAttribute(name: string, value: string): void;
  removeAttribute(name: string): void;
  before(content: string, options?: ContentOptions): void;
  after(content: string, options?: ContentOptions): void;
  prepend(content: string, options?: ContentOptions): void;
  append(content: string, options?: ContentOptions): void;
  setInnerContent(content: string, options?: ContentOptions): void;
  remove(): void;
  removeAndKeepContent(): void;
}

Text

interface Text {
  text: string;
  lastInTextNode: boolean;
  before(content: string, options?: ContentOptions): void;
  after(content: string, options?: ContentOptions): void;
  replace(content: string, options?: ContentOptions): void;
  remove(): void;
}

Platform Support

HTMLRewriter works on:
  • Bun
  • Cloudflare Workers
  • Any environment with a compatible polyfill

Differences from Cloudflare Workers

Bun’s implementation is fully compatible with Cloudflare Workers’ HTMLRewriter API.

Build docs developers (and LLMs) love