Method Signature
client.ocr.process(
document: OCRDocumentParam,
model: Optional[str] = None
) -> OCRResponse
Parameters
Document input for OCR processing.
Optional model identifier to use for OCR processing.
Response
The model used for OCR processing.
Usage statistics for the OCR request.
Examples
from dedalus_labs import DedalusLabs
import base64
client = DedalusLabs()
# Process a PDF document
with open("document.pdf", "rb") as f:
pdf_data = base64.b64encode(f.read()).decode("utf-8")
response = client.ocr.process(
document={
"document_url": f"data:application/pdf;base64,{pdf_data}"
}
)
for page in response.pages:
print(f"Page {page.index}:")
print(page.markdown)
print("---")
# Process an image file
with open("receipt.png", "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
response = client.ocr.process(
document={
"document_url": f"data:image/png;base64,{image_data}",
"type": "receipt"
}
)
print("Extracted text:")
print(response.pages[0].markdown)
# Process multiple pages and save markdown output
with open("report.pdf", "rb") as f:
pdf_data = base64.b64encode(f.read()).decode("utf-8")
response = client.ocr.process(
document={
"document_url": f"data:application/pdf;base64,{pdf_data}"
}
)
with open("output.md", "w") as f:
for page in response.pages:
f.write(f"# Page {page.index + 1}\n\n")
f.write(page.markdown)
f.write("\n\n")
print(f"Processed {len(response.pages)} pages")
print(f"Model used: {response.model}")
# Process a JPEG image
import base64
from pathlib import Path
image_path = Path("scan.jpg")
image_data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
response = client.ocr.process(
document={
"document_url": f"data:image/jpeg;base64,{image_data}"
},
model="mistral-ocr-latest"
)
markdown_text = response.pages[0].markdown
print(markdown_text)
# Helper function to process any file
def process_document_ocr(file_path: str):
"""Process a document file with OCR and return markdown text."""
from pathlib import Path
import base64
import mimetypes
path = Path(file_path)
mime_type = mimetypes.guess_type(file_path)[0] or "application/octet-stream"
file_data = base64.b64encode(path.read_bytes()).decode("utf-8")
response = client.ocr.process(
document={
"document_url": f"data:{mime_type};base64,{file_data}"
}
)
return "\n\n".join(page.markdown for page in response.pages)
# Use the helper function
text = process_document_ocr("invoice.pdf")
print(text)