Steps
Execute predefined workflow steps for document processing
Steps
Steps are preconfigured document processing pipelines. Each step defines a specific processing workflow — you provide a document and DocuTray executes the step's pipeline, returning structured results. Steps are always executed asynchronously.
Quick Start
from pathlib import Path
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
# Execute a step
status = client.steps.run_async(
step_id="step_abc123",
file=Path("document.pdf")
)
# Wait for completion
result = status.wait()
if result.is_success():
print(result.data)import DocuTray from 'docutray';
import { readFileSync } from 'fs';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
// Execute a step
const status = await client.steps.runAsync({
stepId: 'step_abc123',
file: readFileSync('document.pdf'),
filename: 'document.pdf',
});
// Wait for completion
const result = await status.wait();
if (result.isSuccess()) {
console.log(result.data);
}# Start step execution
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@document.pdf"
# Response:
# {
# "execution_id": "exec_abc123",
# "status": "ENQUEUED"
# }
# Poll for status
curl https://app.docutray.com/api/steps-async/status/exec_abc123 \
-H "Authorization: Bearer YOUR_API_KEY"Response
# status is a StepExecutionStatus
print(status.execution_id) # "exec_abc123"
print(status.status) # "SUCCESS"
print(status.data) # Extracted data
print(status.original_filename) # "document.pdf"
print(status.request_timestamp) # When execution started
print(status.response_timestamp) # When execution completed// status is a StepExecutionStatus
console.log(status.id); // "exec_abc123"
console.log(status.status); // "SUCCESS"
console.log(status.data); // Extracted data
console.log(status.original_filename); // "document.pdf"
console.log(status.request_timestamp); // When execution started
console.log(status.response_timestamp); // When execution completed{
"execution_id": "exec_abc123",
"status": "SUCCESS",
"data": {
"invoice_number": "INV-2024-001",
"total": 1160.00
},
"original_filename": "document.pdf",
"request_timestamp": "2024-01-15T10:30:00.000Z",
"response_timestamp": "2024-01-15T10:30:45.000Z"
}Polling and Status
Steps are always asynchronous. You can poll for status manually or use the SDK's built-in wait() method.
Using wait() (Recommended)
status = client.steps.run_async(
step_id="step_abc123",
file=Path("document.pdf")
)
# Wait with automatic polling
result = status.wait()
if result.is_success():
print("Step completed successfully")
print(result.data)
elif result.is_error():
print(f"Step failed: {result.error}")const status = await client.steps.runAsync({
stepId: 'step_abc123',
file: readFileSync('document.pdf'),
});
// Wait with status callback
const result = await status.wait({
onStatus: (s) => console.log(`Status: ${s.status}`),
pollInterval: 2000,
timeout: 300_000,
});
if (result.isSuccess()) {
console.log('Step completed successfully');
console.log(result.data);
} else if (result.isFailed()) {
console.log(`Step failed: ${result.error}`);
}Manual Polling
status = client.steps.get_status("exec_abc123")
if status.is_success():
print(status.data)
elif status.status == "ENQUEUED" or status.status == "PROCESSING":
print("Still processing...")const status = await client.steps.getStatus('exec_abc123');
if (status.isSuccess()) {
console.log(status.data);
} else if (status.status === 'ENQUEUED' || status.status === 'PROCESSING') {
console.log('Still processing...');
}curl https://app.docutray.com/api/steps-async/status/exec_abc123 \
-H "Authorization: Bearer YOUR_API_KEY"
# Status transitions: ENQUEUED → PROCESSING → SUCCESS | ERRORInput Methods
Steps support the same input methods as Convert and Identify: file upload, URL, and base64.
# File upload
status = client.steps.run_async(
step_id="step_abc123",
file=Path("document.pdf")
)
# URL
status = client.steps.run_async(
step_id="step_abc123",
url="https://example.com/document.pdf"
)
# Base64
import base64
with open("document.pdf", "rb") as f:
encoded = base64.b64encode(f.read()).decode()
status = client.steps.run_async(
step_id="step_abc123",
file_base64=encoded,
content_type="application/pdf"
)// File upload
const status = await client.steps.runAsync({
stepId: 'step_abc123',
file: readFileSync('document.pdf'),
filename: 'document.pdf',
});
// URL
const status2 = await client.steps.runAsync({
stepId: 'step_abc123',
url: 'https://example.com/document.pdf',
});
// Base64
const encoded = readFileSync('document.pdf').toString('base64');
const status3 = await client.steps.runAsync({
stepId: 'step_abc123',
base64: encoded,
contentType: 'application/pdf',
});# File upload
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@document.pdf"
# URL
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{"image_url": "https://example.com/document.pdf"}'
# Base64
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"image_base64\": \"$(base64 -i document.pdf)\", \"image_content_type\": \"application/pdf\"}"Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
step_id / stepId | string | Yes | ID of the step to execute |
file | File | No | File to process (path, bytes, or file object) |
url | string | No | Public URL of the document |
file_base64 / base64 | string | No | Base64-encoded document content |
content_type | string | No | MIME type (auto-detected if not provided) |
document_metadata | object | No | Custom metadata returned in status responses |
You must provide exactly one of file, url, or file_base64/base64.
Complete Code
from pathlib import Path
from docutray import Client, NotFoundError, DocuTrayError
client = Client(api_key="YOUR_API_KEY")
try:
# Execute a step with metadata
status = client.steps.run_async(
step_id="step_abc123",
file=Path("invoice.pdf"),
document_metadata={"source": "email", "customer_id": "cust_456"}
)
print(f"Execution started: {status.execution_id}")
# Wait for completion
result = status.wait()
if result.is_success():
print("Step completed!")
print(f"Result: {result.data}")
elif result.is_error():
print(f"Step failed: {result.error}")
except NotFoundError:
print("Step not found — check the step ID")
except DocuTrayError as e:
print(f"Error: {e.message}")
finally:
client.close()import DocuTray, { NotFoundError, DocuTrayError } from 'docutray';
import { readFileSync } from 'fs';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
try {
// Execute a step with metadata
const status = await client.steps.runAsync({
stepId: 'step_abc123',
file: readFileSync('invoice.pdf'),
filename: 'invoice.pdf',
documentMetadata: { source: 'email', customer_id: 'cust_456' },
});
console.log(`Execution started: ${status.id}`);
// Wait for completion
const result = await status.wait({
onStatus: (s) => console.log(`Status: ${s.status}`),
});
if (result.isSuccess()) {
console.log('Step completed!');
console.log('Result:', result.data);
} else if (result.isFailed()) {
console.log(`Step failed: ${result.error}`);
}
} catch (error) {
if (error instanceof NotFoundError) {
console.error('Step not found — check the step ID');
} else if (error instanceof DocuTrayError) {
console.error(`Error: ${error.message}`);
}
}SDK Reference
For detailed class and method documentation: