Steps

Execute predefined workflow steps for document processing

Steps

Steps are preconfigured document processing pipelines. Each step defines a specific processing workflow — you provide a document and DocuTray executes the step's pipeline, returning structured results. Steps are always executed asynchronously.

Quick Start

from pathlib import Path
from docutray import Client

client = Client(api_key="YOUR_API_KEY")

# Execute a step
status = client.steps.run_async(
    step_id="step_abc123",
    file=Path("document.pdf")
)

# Wait for completion
result = status.wait()

if result.is_success():
    print(result.data)
import DocuTray from 'docutray';
import { readFileSync } from 'fs';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

// Execute a step
const status = await client.steps.runAsync({
  stepId: 'step_abc123',
  file: readFileSync('document.pdf'),
  filename: 'document.pdf',
});

// Wait for completion
const result = await status.wait();

if (result.isSuccess()) {
  console.log(result.data);
}
# Start step execution
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "image=@document.pdf"

# Response:
# {
#   "execution_id": "exec_abc123",
#   "status": "ENQUEUED"
# }

# Poll for status
curl https://app.docutray.com/api/steps-async/status/exec_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY"

Response

# status is a StepExecutionStatus
print(status.execution_id)        # "exec_abc123"
print(status.status)              # "SUCCESS"
print(status.data)                # Extracted data
print(status.original_filename)   # "document.pdf"
print(status.request_timestamp)   # When execution started
print(status.response_timestamp)  # When execution completed
// status is a StepExecutionStatus
console.log(status.id);                   // "exec_abc123"
console.log(status.status);               // "SUCCESS"
console.log(status.data);                 // Extracted data
console.log(status.original_filename);    // "document.pdf"
console.log(status.request_timestamp);    // When execution started
console.log(status.response_timestamp);   // When execution completed
{
  "execution_id": "exec_abc123",
  "status": "SUCCESS",
  "data": {
    "invoice_number": "INV-2024-001",
    "total": 1160.00
  },
  "original_filename": "document.pdf",
  "request_timestamp": "2024-01-15T10:30:00.000Z",
  "response_timestamp": "2024-01-15T10:30:45.000Z"
}

Polling and Status

Steps are always asynchronous. You can poll for status manually or use the SDK's built-in wait() method.

status = client.steps.run_async(
    step_id="step_abc123",
    file=Path("document.pdf")
)

# Wait with automatic polling
result = status.wait()

if result.is_success():
    print("Step completed successfully")
    print(result.data)
elif result.is_error():
    print(f"Step failed: {result.error}")
const status = await client.steps.runAsync({
  stepId: 'step_abc123',
  file: readFileSync('document.pdf'),
});

// Wait with status callback
const result = await status.wait({
  onStatus: (s) => console.log(`Status: ${s.status}`),
  pollInterval: 2000,
  timeout: 300_000,
});

if (result.isSuccess()) {
  console.log('Step completed successfully');
  console.log(result.data);
} else if (result.isFailed()) {
  console.log(`Step failed: ${result.error}`);
}

Manual Polling

status = client.steps.get_status("exec_abc123")

if status.is_success():
    print(status.data)
elif status.status == "ENQUEUED" or status.status == "PROCESSING":
    print("Still processing...")
const status = await client.steps.getStatus('exec_abc123');

if (status.isSuccess()) {
  console.log(status.data);
} else if (status.status === 'ENQUEUED' || status.status === 'PROCESSING') {
  console.log('Still processing...');
}
curl https://app.docutray.com/api/steps-async/status/exec_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY"

# Status transitions: ENQUEUED → PROCESSING → SUCCESS | ERROR

Input Methods

Steps support the same input methods as Convert and Identify: file upload, URL, and base64.

# File upload
status = client.steps.run_async(
    step_id="step_abc123",
    file=Path("document.pdf")
)

# URL
status = client.steps.run_async(
    step_id="step_abc123",
    url="https://example.com/document.pdf"
)

# Base64
import base64
with open("document.pdf", "rb") as f:
    encoded = base64.b64encode(f.read()).decode()

status = client.steps.run_async(
    step_id="step_abc123",
    file_base64=encoded,
    content_type="application/pdf"
)
// File upload
const status = await client.steps.runAsync({
  stepId: 'step_abc123',
  file: readFileSync('document.pdf'),
  filename: 'document.pdf',
});

// URL
const status2 = await client.steps.runAsync({
  stepId: 'step_abc123',
  url: 'https://example.com/document.pdf',
});

// Base64
const encoded = readFileSync('document.pdf').toString('base64');
const status3 = await client.steps.runAsync({
  stepId: 'step_abc123',
  base64: encoded,
  contentType: 'application/pdf',
});
# File upload
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "image=@document.pdf"

# URL
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"image_url": "https://example.com/document.pdf"}'

# Base64
curl -X POST https://app.docutray.com/api/steps-async/step_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d "{\"image_base64\": \"$(base64 -i document.pdf)\", \"image_content_type\": \"application/pdf\"}"

Parameters

ParameterTypeRequiredDescription
step_id / stepIdstringYesID of the step to execute
fileFileNoFile to process (path, bytes, or file object)
urlstringNoPublic URL of the document
file_base64 / base64stringNoBase64-encoded document content
content_typestringNoMIME type (auto-detected if not provided)
document_metadataobjectNoCustom metadata returned in status responses

You must provide exactly one of file, url, or file_base64/base64.

Complete Code

from pathlib import Path
from docutray import Client, NotFoundError, DocuTrayError

client = Client(api_key="YOUR_API_KEY")

try:
    # Execute a step with metadata
    status = client.steps.run_async(
        step_id="step_abc123",
        file=Path("invoice.pdf"),
        document_metadata={"source": "email", "customer_id": "cust_456"}
    )

    print(f"Execution started: {status.execution_id}")

    # Wait for completion
    result = status.wait()

    if result.is_success():
        print("Step completed!")
        print(f"Result: {result.data}")
    elif result.is_error():
        print(f"Step failed: {result.error}")

except NotFoundError:
    print("Step not found — check the step ID")
except DocuTrayError as e:
    print(f"Error: {e.message}")
finally:
    client.close()
import DocuTray, { NotFoundError, DocuTrayError } from 'docutray';
import { readFileSync } from 'fs';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

try {
  // Execute a step with metadata
  const status = await client.steps.runAsync({
    stepId: 'step_abc123',
    file: readFileSync('invoice.pdf'),
    filename: 'invoice.pdf',
    documentMetadata: { source: 'email', customer_id: 'cust_456' },
  });

  console.log(`Execution started: ${status.id}`);

  // Wait for completion
  const result = await status.wait({
    onStatus: (s) => console.log(`Status: ${s.status}`),
  });

  if (result.isSuccess()) {
    console.log('Step completed!');
    console.log('Result:', result.data);
  } else if (result.isFailed()) {
    console.log(`Step failed: ${result.error}`);
  }
} catch (error) {
  if (error instanceof NotFoundError) {
    console.error('Step not found — check the step ID');
  } else if (error instanceof DocuTrayError) {
    console.error(`Error: ${error.message}`);
  }
}

SDK Reference

For detailed class and method documentation:

On this page