Document Types

List, create, update, inspect, and validate document types and their schemas

Document Types

Document types define what data DocuTray extracts from your documents. Each type has a JSON schema that describes the fields to extract. Use the Document Types API to list available types, create custom types, update existing ones, inspect their schemas, and validate extracted data.

List Document Types

Retrieve all document types accessible to your organization, including public types and your custom types.

from docutray import Client

client = Client(api_key="YOUR_API_KEY")

# List all document types
page = client.document_types.list()

for doc_type in page.data:
    print(f"{doc_type.codeType}: {doc_type.name}")

# Auto-paginate through all results
for doc_type in client.document_types.list().auto_paging_iter():
    print(f"{doc_type.codeType}: {doc_type.name}")
import DocuTray from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

// List all document types
const page = await client.documentTypes.list();

for (const docType of page.data) {
  console.log(`${docType.codeType}: ${docType.name}`);
}

// Auto-paginate through all results
for await (const docType of client.documentTypes.list().autoPagingIter()) {
  console.log(`${docType.codeType}: ${docType.name}`);
}
curl https://app.docutray.com/api/document-types \
  -H "Authorization: Bearer YOUR_API_KEY"

# With search and pagination
curl "https://app.docutray.com/api/document-types?search=invoice&page=1&limit=20" \
  -H "Authorization: Bearer YOUR_API_KEY"

Response

# page is a Page[DocumentType]
print(f"Total: {page.pagination.total}")
print(f"Page: {page.pagination.page}")

for dt in page.data:
    print(f"  {dt.name} ({dt.codeType})")
    print(f"  Public: {dt.isPublic}, Draft: {dt.isDraft}")
// page is a Page<DocumentType>
console.log(`Total: ${page.pagination.total}`);
console.log(`Page: ${page.pagination.page}`);

for (const dt of page.data) {
  console.log(`  ${dt.name} (${dt.codeType})`);
  console.log(`  Public: ${dt.isPublic}, Draft: ${dt.isDraft}`);
}
{
  "data": [
    {
      "id": "cm5vm9hx30001m5cgh0p9v8qa",
      "name": "Invoice",
      "codeType": "invoice",
      "description": "Standard invoice document",
      "isPublic": true,
      "isDraft": false,
      "createdAt": "2024-01-15T10:30:00.000Z",
      "updatedAt": "2024-01-15T10:30:00.000Z"
    }
  ],
  "pagination": {
    "total": 50,
    "page": 1,
    "limit": 20
  }
}

Search and Pagination

# Search by name or code
page = client.document_types.list(search="invoice")

# Manual pagination
page = client.document_types.list(page=1, limit=10)

# Iterate through all pages
for page_chunk in client.document_types.list().iter_pages():
    print(f"Page {page_chunk.page}: {len(page_chunk.data)} items")
// Search by name or code
const page = await client.documentTypes.list({ search: 'invoice' });

// Manual pagination
const page2 = await client.documentTypes.list({ page: 1, limit: 10 });

// Iterate through all pages
for await (const pageChunk of client.documentTypes.list().iterPages()) {
  console.log(`Page: ${pageChunk.data.length} items`);
}
# Search by name
curl "https://app.docutray.com/api/document-types?search=invoice" \
  -H "Authorization: Bearer YOUR_API_KEY"

# Page 2 with 10 results per page
curl "https://app.docutray.com/api/document-types?page=2&limit=10" \
  -H "Authorization: Bearer YOUR_API_KEY"

Get Document Type

Retrieve a specific document type by ID, including its full JSON schema.

doc_type = client.document_types.get("dt_abc123")

print(f"Name: {doc_type.name}")
print(f"Code: {doc_type.codeType}")
print(f"Description: {doc_type.description}")
print(f"Schema: {doc_type.schema_}")
const docType = await client.documentTypes.get('dt_abc123');

console.log(`Name: ${docType.name}`);
console.log(`Code: ${docType.codeType}`);
console.log(`Description: ${docType.description}`);
console.log('Schema:', JSON.stringify(docType.schema, null, 2));
curl https://app.docutray.com/api/document-types/dt_abc123 \
  -H "Authorization: Bearer YOUR_API_KEY"

Create Document Type

Create a new document type with a JSON schema that defines the fields to extract.

from docutray import Client

client = Client(api_key="YOUR_API_KEY")

doc_type = client.document_types.create(
    name="Purchase Order",
    code_type="myorg_purchase_order",
    description="Standard purchase order document",
    json_schema={
        "type": "object",
        "properties": {
            "po_number": {"type": "string", "description": "Purchase order number"},
            "vendor": {"type": "string", "description": "Vendor name"},
            "total": {"type": "number", "description": "Total amount"},
        },
    },
    is_draft=True,
    conversion_mode="json",
)

print(f"Created: {doc_type.name} ({doc_type.codeType})")
print(f"Status: {doc_type.status}")
import DocuTray from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

const docType = await client.documentTypes.create({
  name: 'Purchase Order',
  codeType: 'myorg_purchase_order',
  description: 'Standard purchase order document',
  jsonSchema: {
    type: 'object',
    properties: {
      po_number: { type: 'string', description: 'Purchase order number' },
      vendor: { type: 'string', description: 'Vendor name' },
      total: { type: 'number', description: 'Total amount' },
    },
  },
  isDraft: true,
  conversionMode: 'json',
});

console.log(`Created: ${docType.name} (${docType.codeType})`);
console.log(`Status: ${docType.status}`);
curl -X POST https://app.docutray.com/api/document-types \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Purchase Order",
    "codeType": "myorg_purchase_order",
    "description": "Standard purchase order document",
    "jsonSchema": {
      "type": "object",
      "properties": {
        "po_number": {"type": "string", "description": "Purchase order number"},
        "vendor": {"type": "string", "description": "Vendor name"},
        "total": {"type": "number", "description": "Total amount"}
      }
    },
    "isDraft": true,
    "conversionMode": "json"
  }'

Response

{
  "data": {
    "id": "cm5vm9hx30001m5cgh0p9v8qa",
    "codeType": "myorg_purchase_order",
    "name": "Purchase Order",
    "description": "Standard purchase order document",
    "isPublic": false,
    "isDraft": true,
    "status": "draft",
    "createdAt": "2024-03-15T10:30:00.000Z",
    "updatedAt": "2024-03-15T10:30:00.000Z"
  }
}

Admin vs Non-Admin Behavior

BehaviorNon-Admin UsersAdmin Users
codeType prefixMust start with org slug (e.g., myorg_)No prefix required
codeType min lengthAt least 3 characters after prefixNo minimum after prefix
isPublicForced to falseCan set to true
sourceSet to USERDefaults to ADMIN

Error Handling

from docutray import Client, ConflictError, BadRequestError, PermissionDeniedError

client = Client(api_key="YOUR_API_KEY")

try:
    doc_type = client.document_types.create(
        name="Purchase Order",
        code_type="myorg_purchase_order",
        description="Standard purchase order document",
        json_schema={"type": "object", "properties": {}},
    )
except ConflictError:
    # 409: codeType already exists
    print("A document type with this codeType already exists")
except BadRequestError as e:
    # 400: Invalid request body
    print(f"Validation error: {e.message}")
except PermissionDeniedError:
    # 403: Insufficient permissions
    print("You don't have permission to create this document type")
import DocuTray, { ConflictError, BadRequestError, PermissionDeniedError } from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

try {
  const docType = await client.documentTypes.create({
    name: 'Purchase Order',
    codeType: 'myorg_purchase_order',
    description: 'Standard purchase order document',
    jsonSchema: { type: 'object', properties: {} },
  });
} catch (error) {
  if (error instanceof ConflictError) {
    // 409: codeType already exists
    console.error('A document type with this codeType already exists');
  } else if (error instanceof BadRequestError) {
    // 400: Invalid request body
    console.error(`Validation error: ${error.message}`);
  } else if (error instanceof PermissionDeniedError) {
    // 403: Insufficient permissions
    console.error("You don't have permission to create this document type");
  }
}

Update Document Type

Update an existing document type. All fields are optional. The codeType field is immutable and will be ignored if provided.

from docutray import Client

client = Client(api_key="YOUR_API_KEY")

doc_type = client.document_types.update(
    "cm5vm9hx30001m5cgh0p9v8qa",
    name="Updated Purchase Order",
    description="Updated description",
    is_draft=False,  # Publish the document type
    prompt_hints="Focus on extracting line items and totals",
)

print(f"Updated: {doc_type.name}")
print(f"Status: {doc_type.status}")  # "published" when isDraft=False
import DocuTray from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

const docType = await client.documentTypes.update('cm5vm9hx30001m5cgh0p9v8qa', {
  name: 'Updated Purchase Order',
  description: 'Updated description',
  isDraft: false, // Publish the document type
  promptHints: 'Focus on extracting line items and totals',
});

console.log(`Updated: ${docType.name}`);
console.log(`Status: ${docType.status}`); // "published" when isDraft=false
curl -X PUT https://app.docutray.com/api/document-types/cm5vm9hx30001m5cgh0p9v8qa \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Updated Purchase Order",
    "description": "Updated description",
    "isDraft": false,
    "promptHints": "Focus on extracting line items and totals"
  }'

Response

{
  "data": {
    "id": "cm5vm9hx30001m5cgh0p9v8qa",
    "codeType": "myorg_purchase_order",
    "name": "Updated Purchase Order",
    "description": "Updated description",
    "isPublic": false,
    "isDraft": false,
    "status": "published",
    "createdAt": "2024-03-15T10:30:00.000Z",
    "updatedAt": "2024-03-16T14:20:00.000Z"
  }
}

Permissions

  • Non-admin users can only update document types they created.
  • Non-admin users cannot set isPublic to true.
  • When isDraft changes, the status field is automatically updated.
  • A version snapshot is created before each update.

Error Handling

from docutray import Client, NotFoundError, PermissionDeniedError, BadRequestError

client = Client(api_key="YOUR_API_KEY")

try:
    doc_type = client.document_types.update(
        "cm5vm9hx30001m5cgh0p9v8qa",
        name="Updated Name",
    )
except NotFoundError:
    # 404: Document type not found
    print("Document type not found")
except PermissionDeniedError:
    # 403: Can only update your own document types
    print("You don't have permission to update this document type")
except BadRequestError as e:
    # 400: Invalid request body
    print(f"Validation error: {e.message}")
import DocuTray, { NotFoundError, PermissionDeniedError, BadRequestError } from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

try {
  const docType = await client.documentTypes.update('cm5vm9hx30001m5cgh0p9v8qa', {
    name: 'Updated Name',
  });
} catch (error) {
  if (error instanceof NotFoundError) {
    // 404: Document type not found
    console.error('Document type not found');
  } else if (error instanceof PermissionDeniedError) {
    // 403: Can only update your own document types
    console.error("You don't have permission to update this document type");
  } else if (error instanceof BadRequestError) {
    // 400: Invalid request body
    console.error(`Validation error: ${error.message}`);
  }
}

Validate Data

Validate extracted data against a document type's schema. Useful for checking data quality after conversion or before submitting to downstream systems.

result = client.document_types.validate(
    "dt_invoice",
    {"invoice_number": "INV-001", "total": 100}
)

if result.is_valid():
    print("Data is valid!")
else:
    for error in result.errors.messages:
        print(f"Error: {error}")
    for warning in result.warnings.messages:
        print(f"Warning: {warning}")
const result = await client.documentTypes.validate('dt_invoice', {
  invoice_number: 'INV-001',
  total: 100,
});

if (result.errors.count === 0) {
  console.log('Schema is valid!');
} else {
  for (const error of result.errors.messages) {
    console.log(`Error: ${error}`);
  }
}

if (result.warnings?.count > 0) {
  for (const warning of result.warnings.messages) {
    console.log(`Warning: ${warning}`);
  }
}

Understanding Schemas

Each document type has a JSON schema that defines the fields DocuTray extracts. For example, an invoice schema might look like:

{
  "type": "object",
  "properties": {
    "invoice_number": {
      "type": "string",
      "description": "Invoice number or identifier"
    },
    "issue_date": {
      "type": "string",
      "format": "date",
      "description": "Date the invoice was issued"
    },
    "vendor_name": {
      "type": "string",
      "description": "Name of the issuing company"
    },
    "line_items": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "description": { "type": "string" },
          "quantity": { "type": "number" },
          "unit_price": { "type": "number" },
          "amount": { "type": "number" }
        }
      }
    },
    "subtotal": { "type": "number" },
    "tax": { "type": "number" },
    "total": { "type": "number" }
  }
}

The schema determines which fields are extracted during conversion and what data types they should have.

Parameters

List Parameters

ParameterTypeRequiredDescription
searchstringNoSearch by name, code, or description
pageintegerNoPage number (default: 1)
limitintegerNoItems per page, 1-100 (default: 20)

Get Parameters

ParameterTypeRequiredDescription
idstringYesDocument type ID

Create Parameters (Request Body)

ParameterTypeRequiredDescription
namestringYesDocument type name (min 2 characters)
codeTypestringYesUnique code identifier (^[a-z0-9_]+$). Non-admin users must prefix with org slug.
descriptionstringYesDocument type description
jsonSchemaobjectYesJSON Schema for document validation
isDraftbooleanNoWhether the document type is a draft (default: true)
promptHintsstringNoHints for the OCR prompt
identifyPromptHintsstringNoHints for the document identification prompt
conversionModestringNoConversion mode: json, toon, or multi_prompt (default: json)
keepPropertyOrderingbooleanNoPreserve property ordering in schema (default: false)
isPublicbooleanNoWhether the document type is public (admin only)

Update Parameters

Path Parameters

ParameterTypeRequiredDescription
idstringYesDocument type ID

Request Body

All fields are optional. The codeType field is immutable and cannot be changed.

ParameterTypeRequiredDescription
namestringNoDocument type name
descriptionstringNoDocument type description
jsonSchemaobjectNoJSON Schema for document validation
isDraftbooleanNoWhether the document type is a draft
promptHintsstringNoHints for the OCR prompt
identifyPromptHintsstringNoHints for the document identification prompt
conversionModestringNoConversion mode: json, toon, or multi_prompt
keepPropertyOrderingbooleanNoPreserve property ordering in schema
isPublicbooleanNoWhether the document type is public (admin only)

Complete Code

from docutray import Client, NotFoundError, ConflictError, DocuTrayError

client = Client(api_key="YOUR_API_KEY")

try:
    # List available document types
    print("Available document types:")
    for doc_type in client.document_types.list().auto_paging_iter():
        status = "published" if not doc_type.isDraft else "draft"
        scope = "public" if doc_type.isPublic else "private"
        print(f"  [{status}/{scope}] {doc_type.name} ({doc_type.codeType})")

    # Get details for a specific type
    invoice_type = client.document_types.get("dt_abc123")
    print(f"\nSchema for {invoice_type.name}:")
    print(invoice_type.schema_)

    # Create a new document type
    new_type = client.document_types.create(
        name="Purchase Order",
        code_type="myorg_purchase_order",
        description="Standard purchase order document",
        json_schema={
            "type": "object",
            "properties": {
                "po_number": {"type": "string"},
                "total": {"type": "number"},
            },
        },
    )
    print(f"\nCreated: {new_type.name} ({new_type.codeType})")

    # Update the document type
    updated = client.document_types.update(
        new_type.id,
        name="Updated Purchase Order",
        is_draft=False,
    )
    print(f"Updated: {updated.name}, Status: {updated.status}")

except ConflictError:
    print("Document type with this codeType already exists")
except NotFoundError:
    print("Document type not found")
except DocuTrayError as e:
    print(f"Error: {e.message}")
finally:
    client.close()
import DocuTray, { NotFoundError, ConflictError, DocuTrayError } from 'docutray';

const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });

try {
  // List available document types
  console.log('Available document types:');
  for await (const docType of client.documentTypes.list().autoPagingIter()) {
    const status = docType.isDraft ? 'draft' : 'published';
    const scope = docType.isPublic ? 'public' : 'private';
    console.log(`  [${status}/${scope}] ${docType.name} (${docType.codeType})`);
  }

  // Get details for a specific type
  const invoiceType = await client.documentTypes.get('dt_abc123');
  console.log(`\nSchema for ${invoiceType.name}:`);
  console.log(JSON.stringify(invoiceType.schema, null, 2));

  // Create a new document type
  const newType = await client.documentTypes.create({
    name: 'Purchase Order',
    codeType: 'myorg_purchase_order',
    description: 'Standard purchase order document',
    jsonSchema: {
      type: 'object',
      properties: {
        po_number: { type: 'string' },
        total: { type: 'number' },
      },
    },
  });
  console.log(`\nCreated: ${newType.name} (${newType.codeType})`);

  // Update the document type
  const updated = await client.documentTypes.update(newType.id, {
    name: 'Updated Purchase Order',
    isDraft: false,
  });
  console.log(`Updated: ${updated.name}, Status: ${updated.status}`);
} catch (error) {
  if (error instanceof ConflictError) {
    console.error('Document type with this codeType already exists');
  } else if (error instanceof NotFoundError) {
    console.error('Document type not found');
  } else if (error instanceof DocuTrayError) {
    console.error(`Error: ${error.message}`);
  }
}

SDK Reference

For detailed class and method documentation:

On this page