OpenAI models currently require fence_output=True and use_schema_constraints=False because LangExtract doesn’t yet implement schema constraints for OpenAI.
result = lx.extract( text="Complex problem requiring reasoning", model_id="o1", api_key=os.environ.get('OPENAI_API_KEY'), prompt_description="Extract and reason about entities", examples=[...], fence_output=True, use_schema_constraints=False, reasoning_effort="high" # or "medium", "low")
The OpenAI provider automatically parallelizes multiple prompts:
result = lx.extract( text="Your long document", model_id="gpt-4o-mini", api_key=os.environ.get('OPENAI_API_KEY'), prompt_description="Extract entities", examples=[...], fence_output=True, use_schema_constraints=False, max_workers=20, # Process up to 20 chunks in parallel max_chunk_size=3000 # Split document into 3000-char chunks)
import langextract as lximport os# Define your taskprompt = "Extract person names, locations, and dates in order of appearance."examples = [ lx.data.ExampleData( text="Dr. Jane Smith visited Paris on March 15, 2024.", extractions=[ lx.data.Extraction( extraction_class="person", extraction_text="Dr. Jane Smith", attributes={"title": "Dr."} ), lx.data.Extraction( extraction_class="location", extraction_text="Paris", attributes={"type": "city"} ), lx.data.Extraction( extraction_class="date", extraction_text="March 15, 2024", attributes={"format": "full date"} ) ] )]# Run extractionresult = lx.extract( text="Prof. John Doe traveled to London on April 20, 2024.", model_id="gpt-4o-mini", api_key=os.environ.get('OPENAI_API_KEY'), prompt_description=prompt, examples=examples, fence_output=True, use_schema_constraints=False)print(f"Found {len(result.extractions)} extractions")for ext in result.extractions: print(f"{ext.extraction_class}: {ext.extraction_text}")
import langextract as lximport os# Process a long document with optimal settingsresult = lx.extract( text="https://example.com/long-document.txt", # Or pass text directly model_id="gpt-4o-mini", api_key=os.environ.get('OPENAI_API_KEY'), prompt_description="Extract all medication mentions", examples=[...], fence_output=True, use_schema_constraints=False, # Chunking and parallelization: max_chunk_size=3000, # Smaller chunks for accuracy max_workers=20, # High parallelism for speed extraction_passes=3, # Multiple passes for recall # Provider configuration: temperature=0.0, # Deterministic output max_output_tokens=2000, # Allow longer responses)
from langextract.providers.openai import OpenAILanguageModelmodel = OpenAILanguageModel( model_id="gpt-4o", api_key="your-key", base_url="https://your-azure-endpoint.openai.azure.com/", organization="your-org-id" # Optional)# Use with lx.extract by passing the model instanceresult = lx.extract( text="Your text", model=model, prompt_description="Extract data", examples=[...], fence_output=True, use_schema_constraints=False)