Search images using AI-generated summaries and vision embeddings
The Search Thing indexes images by generating structured AI summaries using a vision model, then creating searchable embeddings. This enables natural language search over visual content.
Summaries include multiple dimensions of information:
# From backend/indexer/image_indexer.py:182def _normalize_summary_content(content_str: str) -> dict: parsed_obj = json.loads(text) return { "summary": parsed_obj.get("summary"), # Natural language description "objects": parsed_obj.get("objects", []), # Detected objects "actions": parsed_obj.get("actions", []), # Activities or actions "setting": parsed_obj.get("setting", ""), # Location or scene type "ocr": parsed_obj.get("ocr", ""), # Visible text in image "quality": parsed_obj.get("quality", ""), # Image quality assessment }
Example image summary
{ "summary": "A laptop computer on a wooden desk with a cup of coffee and notebook.", "objects": ["laptop", "desk", "coffee cup", "notebook", "pen"], "actions": ["working", "studying"], "setting": "home office or workspace", "ocr": "MacBook Pro", "quality": "good"}
The structured summary is converted to a flat text representation for embedding:
# From backend/indexer/image_indexer.py:240def _build_embedding_text(summary: dict) -> str: parts: list[str] = [] def add(label: str, value: object) -> None: if value is None: return if isinstance(value, list): value = ", ".join([str(v) for v in value if v]) if isinstance(value, str): value = value.strip() if value: parts.append(f"{label}: {value}") add("summary", summary.get("summary")) add("objects", summary.get("objects")) add("actions", summary.get("actions")) add("setting", summary.get("setting")) add("ocr", summary.get("ocr")) add("quality", summary.get("quality")) return " | ".join(parts)
Example embedding text
summary: A laptop computer on a wooden desk with a cup of coffee and notebook. | objects: laptop, desk, coffee cup, notebook, pen | actions: working, studying | setting: home office or workspace | ocr: MacBook Pro | quality: good
from backend.indexer.image_indexer import img_indexer# Index single imageresults = await img_indexer("/path/to/image.jpg")# Index multiple imagesresults = await img_indexer([ "/path/to/image1.jpg", "/path/to/image2.png", "/path/to/screenshots/error.png"])for result in results: if result["indexed"]: print(f"✓ {result['path']} (ID: {result['image_id']})") else: print(f"✗ {result['path']} — {result.get('error', 'Unknown error')}")