Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/sepinf-inc/IPED/llms.txt

Use this file to discover all available pages before exploring further.

Practical examples demonstrating how to integrate with the IPED Web API using various programming languages and tools.

Setup

Start the IPED Web API server:
java -jar iped.jar --webapi \
  --host=localhost \
  --port=8080 \
  --sources=sources.json
Create sources.json:
[
  {
    "id": "case1",
    "path": "/data/iped-cases/case-2024-001"
  }
]

Python Examples

Basic Search and Download

import requests
import os

class IPEDClient:
    def __init__(self, base_url="http://localhost:8080"):
        self.base_url = base_url
    
    def search(self, query, source_id=None):
        """Search for items"""
        params = {'q': query}
        if source_id:
            params['sourceID'] = source_id
        
        response = requests.get(
            f"{self.base_url}/search",
            params=params
        )
        response.raise_for_status()
        return response.json()['docs']
    
    def get_properties(self, source_id, item_id):
        """Get item properties"""
        response = requests.get(
            f"{self.base_url}/sources/{source_id}/docs/{item_id}"
        )
        response.raise_for_status()
        return response.json()
    
    def download_content(self, source_id, item_id, output_path):
        """Download item content"""
        response = requests.get(
            f"{self.base_url}/sources/{source_id}/docs/{item_id}/content",
            stream=True
        )
        response.raise_for_status()
        
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
    
    def get_text(self, source_id, item_id):
        """Get extracted text"""
        response = requests.get(
            f"{self.base_url}/sources/{source_id}/docs/{item_id}/text"
        )
        response.raise_for_status()
        return response.text

# Usage
client = IPEDClient()

# Search for PDFs
results = client.search("type:pdf")
print(f"Found {len(results)} PDF documents")

# Get properties and download first result
if results:
    doc = results[0]
    props = client.get_properties(doc['source'], doc['id'])
    
    filename = props['properties']['name'][0]
    print(f"Downloading: {filename}")
    
    client.download_content(doc['source'], doc['id'], filename)
    print(f"Downloaded to: {filename}")

Advanced Search with Filtering

import requests
from datetime import datetime
import json

class AdvancedIPEDClient:
    def __init__(self, base_url="http://localhost:8080"):
        self.base_url = base_url
        self.session = requests.Session()
    
    def search_with_filters(self, query, min_size=None, max_size=None, 
                           file_types=None, categories=None):
        """Search with additional filters"""
        query_parts = [query] if query else []
        
        # Add size filters
        if min_size is not None:
            query_parts.append(f"length:[{min_size} TO *]")
        if max_size is not None:
            query_parts.append(f"length:[* TO {max_size}]")
        
        # Add file type filter
        if file_types:
            type_query = " OR ".join([f"type:{t}" for t in file_types])
            query_parts.append(f"({type_query})")
        
        # Add category filter
        if categories:
            cat_query = " OR ".join([f"category:{c}" for c in categories])
            query_parts.append(f"({cat_query})")
        
        final_query = " AND ".join(query_parts)
        
        response = self.session.get(
            f"{self.base_url}/search",
            params={'q': final_query}
        )
        response.raise_for_status()
        return response.json()['docs']
    
    def get_document_details(self, docs):
        """Get detailed properties for multiple documents"""
        details = []
        
        for doc in docs:
            try:
                response = self.session.get(
                    f"{self.base_url}/sources/{doc['source']}/docs/{doc['id']}"
                )
                response.raise_for_status()
                details.append(response.json())
            except requests.RequestException as e:
                print(f"Error fetching doc {doc['id']}: {e}")
        
        return details
    
    def export_results_to_csv(self, docs, output_file):
        """Export search results to CSV"""
        import csv
        
        details = self.get_document_details(docs)
        
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['Source', 'ID', 'Name', 'Type', 'Size', 'Hash', 'Path'])
            
            for detail in details:
                props = detail['properties']
                writer.writerow([
                    detail['source'],
                    detail['id'],
                    props.get('name', [''])[0],
                    props.get('type', [''])[0],
                    props.get('length', [''])[0],
                    props.get('hash', [''])[0],
                    props.get('path', [''])[0]
                ])

# Usage
client = AdvancedIPEDClient()

# Search large PDFs and Word documents
results = client.search_with_filters(
    query="content:confidential",
    min_size=1000000,  # > 1MB
    file_types=['pdf', 'doc', 'docx']
)

print(f"Found {len(results)} large documents with 'confidential'")

# Export to CSV
client.export_results_to_csv(results, 'confidential_docs.csv')
print("Exported to confidential_docs.csv")

Bookmark Management

import requests

class BookmarkManager:
    def __init__(self, base_url="http://localhost:8080"):
        self.base_url = base_url
    
    def list_bookmarks(self):
        """List all bookmarks"""
        response = requests.get(f"{self.base_url}/bookmarks")
        response.raise_for_status()
        return response.json()['data']
    
    def create_bookmark(self, name):
        """Create a new bookmark"""
        response = requests.post(f"{self.base_url}/bookmarks/{name}")
        response.raise_for_status()
    
    def add_to_bookmark(self, bookmark_name, docs):
        """Add documents to bookmark"""
        response = requests.put(
            f"{self.base_url}/bookmarks/{bookmark_name}/add",
            json=docs
        )
        response.raise_for_status()
    
    def get_bookmark_items(self, bookmark_name):
        """Get all items in a bookmark"""
        response = requests.get(
            f"{self.base_url}/bookmarks/{bookmark_name}"
        )
        response.raise_for_status()
        return response.json()['docs']
    
    def remove_from_bookmark(self, bookmark_name, docs):
        """Remove documents from bookmark"""
        response = requests.put(
            f"{self.base_url}/bookmarks/{bookmark_name}/remove",
            json=docs
        )
        response.raise_for_status()
    
    def delete_bookmark(self, bookmark_name):
        """Delete a bookmark"""
        response = requests.delete(
            f"{self.base_url}/bookmarks/{bookmark_name}"
        )
        response.raise_for_status()
    
    def rename_bookmark(self, old_name, new_name):
        """Rename a bookmark"""
        response = requests.put(
            f"{self.base_url}/bookmarks/{old_name}/rename/{new_name}"
        )
        response.raise_for_status()

# Usage
bm = BookmarkManager()

# Create bookmark for suspicious files
bm.create_bookmark("suspicious")

# Search and tag suspicious executables
client = IPEDClient()
results = client.search("type:exe AND (content:password OR content:encrypted)")

if results:
    print(f"Found {len(results)} suspicious executables")
    bm.add_to_bookmark("suspicious", results)
    print("Added to 'suspicious' bookmark")

# List bookmarks
bookmarks = bm.list_bookmarks()
print(f"Available bookmarks: {', '.join(bookmarks)}")

JavaScript/Node.js Examples

Basic Client

const axios = require('axios');
const fs = require('fs');
const path = require('path');

class IPEDClient {
  constructor(baseURL = 'http://localhost:8080') {
    this.client = axios.create({ baseURL });
  }

  async search(query, sourceID = null) {
    const params = { q: query };
    if (sourceID) params.sourceID = sourceID;
    
    const response = await this.client.get('/search', { params });
    return response.data.docs;
  }

  async getProperties(sourceID, itemID) {
    const response = await this.client.get(
      `/sources/${sourceID}/docs/${itemID}`
    );
    return response.data;
  }

  async downloadContent(sourceID, itemID, outputPath) {
    const response = await this.client.get(
      `/sources/${sourceID}/docs/${itemID}/content`,
      { responseType: 'stream' }
    );
    
    const writer = fs.createWriteStream(outputPath);
    response.data.pipe(writer);
    
    return new Promise((resolve, reject) => {
      writer.on('finish', resolve);
      writer.on('error', reject);
    });
  }

  async getText(sourceID, itemID) {
    const response = await this.client.get(
      `/sources/${sourceID}/docs/${itemID}/text`
    );
    return response.data;
  }
}

// Usage
(async () => {
  const client = new IPEDClient();
  
  // Search for images
  const results = await client.search('category:images');
  console.log(`Found ${results.length} images`);
  
  // Download first image
  if (results.length > 0) {
    const doc = results[0];
    const props = await client.getProperties(doc.source, doc.id);
    const filename = props.properties.name[0];
    
    await client.downloadContent(doc.source, doc.id, filename);
    console.log(`Downloaded: ${filename}`);
  }
})();

Express.js Proxy Server

const express = require('express');
const axios = require('axios');
const app = express();

const IPED_API = 'http://localhost:8080';

app.use(express.json());

// Search endpoint with caching
const cache = new Map();

app.get('/api/search', async (req, res) => {
  try {
    const { q, sourceID } = req.query;
    const cacheKey = `${q}-${sourceID}`;
    
    // Check cache
    if (cache.has(cacheKey)) {
      return res.json(cache.get(cacheKey));
    }
    
    // Query IPED
    const response = await axios.get(`${IPED_API}/search`, {
      params: { q, sourceID }
    });
    
    // Cache for 5 minutes
    cache.set(cacheKey, response.data);
    setTimeout(() => cache.delete(cacheKey), 5 * 60 * 1000);
    
    res.json(response.data);
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

// Document details with enrichment
app.get('/api/documents/:source/:id', async (req, res) => {
  try {
    const { source, id } = req.params;
    
    const response = await axios.get(
      `${IPED_API}/sources/${source}/docs/${id}`
    );
    
    const doc = response.data;
    
    // Add enrichment
    doc.enriched = {
      sizeReadable: formatBytes(doc.properties.length[0]),
      hasBookmarks: doc.bookmarks.length > 0,
      isSelected: doc.selected
    };
    
    res.json(doc);
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

function formatBytes(bytes) {
  if (bytes === 0) return '0 Bytes';
  const k = 1024;
  const sizes = ['Bytes', 'KB', 'MB', 'GB'];
  const i = Math.floor(Math.log(bytes) / Math.log(k));
  return Math.round(bytes / Math.pow(k, i) * 100) / 100 + ' ' + sizes[i];
}

app.listen(3000, () => {
  console.log('Proxy server running on http://localhost:3000');
});

Java Examples

Complete Client Implementation

import java.net.URI;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonArray;

public class IPEDWebClient {
    
    private final String baseUrl;
    private final HttpClient client;
    private final Gson gson;
    
    public IPEDWebClient(String baseUrl) {
        this.baseUrl = baseUrl;
        this.client = HttpClient.newHttpClient();
        this.gson = new Gson();
    }
    
    public List<DocRef> search(String query) throws Exception {
        return search(query, null);
    }
    
    public List<DocRef> search(String query, String sourceID) throws Exception {
        String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8);
        String url = baseUrl + "/search?q=" + encodedQuery;
        
        if (sourceID != null) {
            url += "&sourceID=" + sourceID;
        }
        
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create(url))
            .GET()
            .build();
        
        HttpResponse<String> response = client.send(
            request,
            HttpResponse.BodyHandlers.ofString()
        );
        
        JsonObject json = gson.fromJson(response.body(), JsonObject.class);
        JsonArray docs = json.getAsJsonArray("docs");
        
        return gson.fromJson(docs, 
            new com.google.gson.reflect.TypeToken<List<DocRef>>(){}.getType());
    }
    
    public DocumentProperties getProperties(String sourceID, int itemID) 
            throws Exception {
        String url = String.format(
            "%s/sources/%s/docs/%d",
            baseUrl, sourceID, itemID
        );
        
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create(url))
            .GET()
            .build();
        
        HttpResponse<String> response = client.send(
            request,
            HttpResponse.BodyHandlers.ofString()
        );
        
        return gson.fromJson(response.body(), DocumentProperties.class);
    }
    
    public void downloadContent(String sourceID, int itemID, Path outputPath) 
            throws Exception {
        String url = String.format(
            "%s/sources/%s/docs/%d/content",
            baseUrl, sourceID, itemID
        );
        
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create(url))
            .GET()
            .build();
        
        HttpResponse<Path> response = client.send(
            request,
            HttpResponse.BodyHandlers.ofFile(outputPath)
        );
    }
    
    public String getText(String sourceID, int itemID) throws Exception {
        String url = String.format(
            "%s/sources/%s/docs/%d/text",
            baseUrl, sourceID, itemID
        );
        
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create(url))
            .GET()
            .build();
        
        HttpResponse<String> response = client.send(
            request,
            HttpResponse.BodyHandlers.ofString()
        );
        
        return response.body();
    }
    
    // Data classes
    public static class DocRef {
        public String source;
        public int id;
    }
    
    public static class DocumentProperties {
        public String source;
        public int id;
        public int luceneId;
        public java.util.Map<String, String[]> properties;
        public String[] bookmarks;
        public boolean selected;
    }
    
    // Usage example
    public static void main(String[] args) throws Exception {
        IPEDWebClient client = new IPEDWebClient("http://localhost:8080");
        
        // Search for PDFs
        List<DocRef> results = client.search("type:pdf");
        System.out.println("Found " + results.size() + " PDFs");
        
        // Get properties and download first result
        if (!results.isEmpty()) {
            DocRef doc = results.get(0);
            DocumentProperties props = client.getProperties(
                doc.source, doc.id
            );
            
            String filename = props.properties.get("name")[0];
            System.out.println("Downloading: " + filename);
            
            client.downloadContent(
                doc.source, 
                doc.id, 
                Paths.get(filename)
            );
            
            System.out.println("Downloaded to: " + filename);
        }
    }
}

Bash/cURL Examples

Batch Download Script

#!/bin/bash

API_URL="http://localhost:8080"
QUERY="$1"
OUTPUT_DIR="$2"

if [ -z "$QUERY" ] || [ -z "$OUTPUT_DIR" ]; then
    echo "Usage: $0 <query> <output_dir>"
    exit 1
fi

mkdir -p "$OUTPUT_DIR"

# Search
echo "Searching for: $QUERY"
RESULTS=$(curl -s "${API_URL}/search?q=${QUERY}")

# Parse and download each result
echo "$RESULTS" | jq -r '.docs[] | "\(.source) \(.id)"' | while read source id; do
    echo "Downloading item $id from $source..."
    
    # Get filename
    PROPS=$(curl -s "${API_URL}/sources/${source}/docs/${id}")
    FILENAME=$(echo "$PROPS" | jq -r '.properties.name[0]')
    
    # Download content
    curl -s "${API_URL}/sources/${source}/docs/${id}/content" \
        -o "${OUTPUT_DIR}/${FILENAME}"
    
    echo "  Saved: ${FILENAME}"
done

echo "Download complete!"

Report Generation Script

#!/bin/bash

API_URL="http://localhost:8080"
QUERY="$1"
REPORT_FILE="report.html"

cat > "$REPORT_FILE" << EOF
<!DOCTYPE html>
<html>
<head>
    <title>IPED Search Report</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #4CAF50; color: white; }
        tr:nth-child(even) { background-color: #f2f2f2; }
    </style>
</head>
<body>
    <h1>Search Results: $QUERY</h1>
    <table>
        <tr>
            <th>Source</th>
            <th>ID</th>
            <th>Name</th>
            <th>Type</th>
            <th>Size</th>
            <th>Hash</th>
        </tr>
EOF

# Search and build report
curl -s "${API_URL}/search?q=${QUERY}" | \
    jq -r '.docs[] | "\(.source) \(.id)"' | \
    while read source id; do
        PROPS=$(curl -s "${API_URL}/sources/${source}/docs/${id}")
        
        NAME=$(echo "$PROPS" | jq -r '.properties.name[0]')
        TYPE=$(echo "$PROPS" | jq -r '.properties.type[0]')
        SIZE=$(echo "$PROPS" | jq -r '.properties.length[0]')
        HASH=$(echo "$PROPS" | jq -r '.properties.hash[0]')
        
        cat >> "$REPORT_FILE" << EOF
        <tr>
            <td>$source</td>
            <td>$id</td>
            <td>$NAME</td>
            <td>$TYPE</td>
            <td>$SIZE</td>
            <td>$HASH</td>
        </tr>
EOF
    done

cat >> "$REPORT_FILE" << EOF
    </table>
</body>
</html>
EOF

echo "Report generated: $REPORT_FILE"

See Also

Build docs developers (and LLMs) love