Document Types
List, create, update, inspect, and validate document types and their schemas
Document Types
Document types define what data DocuTray extracts from your documents. Each type has a JSON schema that describes the fields to extract. Use the Document Types API to list available types, create custom types, update existing ones, inspect their schemas, and validate extracted data.
List Document Types
Retrieve all document types accessible to your organization, including public types and your custom types.
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
# List all document types
page = client.document_types.list()
for doc_type in page.data:
print(f"{doc_type.codeType}: {doc_type.name}")
# Auto-paginate through all results
for doc_type in client.document_types.list().auto_paging_iter():
print(f"{doc_type.codeType}: {doc_type.name}")import DocuTray from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
// List all document types
const page = await client.documentTypes.list();
for (const docType of page.data) {
console.log(`${docType.codeType}: ${docType.name}`);
}
// Auto-paginate through all results
for await (const docType of client.documentTypes.list().autoPagingIter()) {
console.log(`${docType.codeType}: ${docType.name}`);
}curl https://app.docutray.com/api/document-types \
-H "Authorization: Bearer YOUR_API_KEY"
# With search and pagination
curl "https://app.docutray.com/api/document-types?search=invoice&page=1&limit=20" \
-H "Authorization: Bearer YOUR_API_KEY"Response
# page is a Page[DocumentType]
print(f"Total: {page.pagination.total}")
print(f"Page: {page.pagination.page}")
for dt in page.data:
print(f" {dt.name} ({dt.codeType})")
print(f" Public: {dt.isPublic}, Draft: {dt.isDraft}")// page is a Page<DocumentType>
console.log(`Total: ${page.pagination.total}`);
console.log(`Page: ${page.pagination.page}`);
for (const dt of page.data) {
console.log(` ${dt.name} (${dt.codeType})`);
console.log(` Public: ${dt.isPublic}, Draft: ${dt.isDraft}`);
}{
"data": [
{
"id": "cm5vm9hx30001m5cgh0p9v8qa",
"name": "Invoice",
"codeType": "invoice",
"description": "Standard invoice document",
"isPublic": true,
"isDraft": false,
"createdAt": "2024-01-15T10:30:00.000Z",
"updatedAt": "2024-01-15T10:30:00.000Z"
}
],
"pagination": {
"total": 50,
"page": 1,
"limit": 20
}
}Search and Pagination
# Search by name or code
page = client.document_types.list(search="invoice")
# Manual pagination
page = client.document_types.list(page=1, limit=10)
# Iterate through all pages
for page_chunk in client.document_types.list().iter_pages():
print(f"Page {page_chunk.page}: {len(page_chunk.data)} items")// Search by name or code
const page = await client.documentTypes.list({ search: 'invoice' });
// Manual pagination
const page2 = await client.documentTypes.list({ page: 1, limit: 10 });
// Iterate through all pages
for await (const pageChunk of client.documentTypes.list().iterPages()) {
console.log(`Page: ${pageChunk.data.length} items`);
}# Search by name
curl "https://app.docutray.com/api/document-types?search=invoice" \
-H "Authorization: Bearer YOUR_API_KEY"
# Page 2 with 10 results per page
curl "https://app.docutray.com/api/document-types?page=2&limit=10" \
-H "Authorization: Bearer YOUR_API_KEY"Get Document Type
Retrieve a specific document type by ID, including its full JSON schema.
doc_type = client.document_types.get("dt_abc123")
print(f"Name: {doc_type.name}")
print(f"Code: {doc_type.codeType}")
print(f"Description: {doc_type.description}")
print(f"Schema: {doc_type.schema_}")const docType = await client.documentTypes.get('dt_abc123');
console.log(`Name: ${docType.name}`);
console.log(`Code: ${docType.codeType}`);
console.log(`Description: ${docType.description}`);
console.log('Schema:', JSON.stringify(docType.schema, null, 2));curl https://app.docutray.com/api/document-types/dt_abc123 \
-H "Authorization: Bearer YOUR_API_KEY"Create Document Type
Create a new document type with a JSON schema that defines the fields to extract.
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
doc_type = client.document_types.create(
name="Purchase Order",
code_type="myorg_purchase_order",
description="Standard purchase order document",
json_schema={
"type": "object",
"properties": {
"po_number": {"type": "string", "description": "Purchase order number"},
"vendor": {"type": "string", "description": "Vendor name"},
"total": {"type": "number", "description": "Total amount"},
},
},
is_draft=True,
conversion_mode="json",
)
print(f"Created: {doc_type.name} ({doc_type.codeType})")
print(f"Status: {doc_type.status}")import DocuTray from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
const docType = await client.documentTypes.create({
name: 'Purchase Order',
codeType: 'myorg_purchase_order',
description: 'Standard purchase order document',
jsonSchema: {
type: 'object',
properties: {
po_number: { type: 'string', description: 'Purchase order number' },
vendor: { type: 'string', description: 'Vendor name' },
total: { type: 'number', description: 'Total amount' },
},
},
isDraft: true,
conversionMode: 'json',
});
console.log(`Created: ${docType.name} (${docType.codeType})`);
console.log(`Status: ${docType.status}`);curl -X POST https://app.docutray.com/api/document-types \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"name": "Purchase Order",
"codeType": "myorg_purchase_order",
"description": "Standard purchase order document",
"jsonSchema": {
"type": "object",
"properties": {
"po_number": {"type": "string", "description": "Purchase order number"},
"vendor": {"type": "string", "description": "Vendor name"},
"total": {"type": "number", "description": "Total amount"}
}
},
"isDraft": true,
"conversionMode": "json"
}'Response
{
"data": {
"id": "cm5vm9hx30001m5cgh0p9v8qa",
"codeType": "myorg_purchase_order",
"name": "Purchase Order",
"description": "Standard purchase order document",
"isPublic": false,
"isDraft": true,
"status": "draft",
"createdAt": "2024-03-15T10:30:00.000Z",
"updatedAt": "2024-03-15T10:30:00.000Z"
}
}Admin vs Non-Admin Behavior
| Behavior | Non-Admin Users | Admin Users |
|---|---|---|
codeType prefix | Must start with org slug (e.g., myorg_) | No prefix required |
codeType min length | At least 3 characters after prefix | No minimum after prefix |
isPublic | Forced to false | Can set to true |
source | Set to USER | Defaults to ADMIN |
Error Handling
from docutray import Client, ConflictError, BadRequestError, PermissionDeniedError
client = Client(api_key="YOUR_API_KEY")
try:
doc_type = client.document_types.create(
name="Purchase Order",
code_type="myorg_purchase_order",
description="Standard purchase order document",
json_schema={"type": "object", "properties": {}},
)
except ConflictError:
# 409: codeType already exists
print("A document type with this codeType already exists")
except BadRequestError as e:
# 400: Invalid request body
print(f"Validation error: {e.message}")
except PermissionDeniedError:
# 403: Insufficient permissions
print("You don't have permission to create this document type")import DocuTray, { ConflictError, BadRequestError, PermissionDeniedError } from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
try {
const docType = await client.documentTypes.create({
name: 'Purchase Order',
codeType: 'myorg_purchase_order',
description: 'Standard purchase order document',
jsonSchema: { type: 'object', properties: {} },
});
} catch (error) {
if (error instanceof ConflictError) {
// 409: codeType already exists
console.error('A document type with this codeType already exists');
} else if (error instanceof BadRequestError) {
// 400: Invalid request body
console.error(`Validation error: ${error.message}`);
} else if (error instanceof PermissionDeniedError) {
// 403: Insufficient permissions
console.error("You don't have permission to create this document type");
}
}Update Document Type
Update an existing document type. All fields are optional. The codeType field is immutable and will be ignored if provided.
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
doc_type = client.document_types.update(
"cm5vm9hx30001m5cgh0p9v8qa",
name="Updated Purchase Order",
description="Updated description",
is_draft=False, # Publish the document type
prompt_hints="Focus on extracting line items and totals",
)
print(f"Updated: {doc_type.name}")
print(f"Status: {doc_type.status}") # "published" when isDraft=Falseimport DocuTray from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
const docType = await client.documentTypes.update('cm5vm9hx30001m5cgh0p9v8qa', {
name: 'Updated Purchase Order',
description: 'Updated description',
isDraft: false, // Publish the document type
promptHints: 'Focus on extracting line items and totals',
});
console.log(`Updated: ${docType.name}`);
console.log(`Status: ${docType.status}`); // "published" when isDraft=falsecurl -X PUT https://app.docutray.com/api/document-types/cm5vm9hx30001m5cgh0p9v8qa \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"name": "Updated Purchase Order",
"description": "Updated description",
"isDraft": false,
"promptHints": "Focus on extracting line items and totals"
}'Response
{
"data": {
"id": "cm5vm9hx30001m5cgh0p9v8qa",
"codeType": "myorg_purchase_order",
"name": "Updated Purchase Order",
"description": "Updated description",
"isPublic": false,
"isDraft": false,
"status": "published",
"createdAt": "2024-03-15T10:30:00.000Z",
"updatedAt": "2024-03-16T14:20:00.000Z"
}
}Permissions
- Non-admin users can only update document types they created.
- Non-admin users cannot set
isPublictotrue. - When
isDraftchanges, thestatusfield is automatically updated. - A version snapshot is created before each update.
Error Handling
from docutray import Client, NotFoundError, PermissionDeniedError, BadRequestError
client = Client(api_key="YOUR_API_KEY")
try:
doc_type = client.document_types.update(
"cm5vm9hx30001m5cgh0p9v8qa",
name="Updated Name",
)
except NotFoundError:
# 404: Document type not found
print("Document type not found")
except PermissionDeniedError:
# 403: Can only update your own document types
print("You don't have permission to update this document type")
except BadRequestError as e:
# 400: Invalid request body
print(f"Validation error: {e.message}")import DocuTray, { NotFoundError, PermissionDeniedError, BadRequestError } from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
try {
const docType = await client.documentTypes.update('cm5vm9hx30001m5cgh0p9v8qa', {
name: 'Updated Name',
});
} catch (error) {
if (error instanceof NotFoundError) {
// 404: Document type not found
console.error('Document type not found');
} else if (error instanceof PermissionDeniedError) {
// 403: Can only update your own document types
console.error("You don't have permission to update this document type");
} else if (error instanceof BadRequestError) {
// 400: Invalid request body
console.error(`Validation error: ${error.message}`);
}
}Validate Data
Validate extracted data against a document type's schema. Useful for checking data quality after conversion or before submitting to downstream systems.
result = client.document_types.validate(
"dt_invoice",
{"invoice_number": "INV-001", "total": 100}
)
if result.is_valid():
print("Data is valid!")
else:
for error in result.errors.messages:
print(f"Error: {error}")
for warning in result.warnings.messages:
print(f"Warning: {warning}")const result = await client.documentTypes.validate('dt_invoice', {
invoice_number: 'INV-001',
total: 100,
});
if (result.errors.count === 0) {
console.log('Schema is valid!');
} else {
for (const error of result.errors.messages) {
console.log(`Error: ${error}`);
}
}
if (result.warnings?.count > 0) {
for (const warning of result.warnings.messages) {
console.log(`Warning: ${warning}`);
}
}Understanding Schemas
Each document type has a JSON schema that defines the fields DocuTray extracts. For example, an invoice schema might look like:
{
"type": "object",
"properties": {
"invoice_number": {
"type": "string",
"description": "Invoice number or identifier"
},
"issue_date": {
"type": "string",
"format": "date",
"description": "Date the invoice was issued"
},
"vendor_name": {
"type": "string",
"description": "Name of the issuing company"
},
"line_items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"description": { "type": "string" },
"quantity": { "type": "number" },
"unit_price": { "type": "number" },
"amount": { "type": "number" }
}
}
},
"subtotal": { "type": "number" },
"tax": { "type": "number" },
"total": { "type": "number" }
}
}The schema determines which fields are extracted during conversion and what data types they should have.
Parameters
List Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
search | string | No | Search by name, code, or description |
page | integer | No | Page number (default: 1) |
limit | integer | No | Items per page, 1-100 (default: 20) |
Get Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
id | string | Yes | Document type ID |
Create Parameters (Request Body)
| Parameter | Type | Required | Description |
|---|---|---|---|
name | string | Yes | Document type name (min 2 characters) |
codeType | string | Yes | Unique code identifier (^[a-z0-9_]+$). Non-admin users must prefix with org slug. |
description | string | Yes | Document type description |
jsonSchema | object | Yes | JSON Schema for document validation |
isDraft | boolean | No | Whether the document type is a draft (default: true) |
promptHints | string | No | Hints for the OCR prompt |
identifyPromptHints | string | No | Hints for the document identification prompt |
conversionMode | string | No | Conversion mode: json, toon, or multi_prompt (default: json) |
keepPropertyOrdering | boolean | No | Preserve property ordering in schema (default: false) |
isPublic | boolean | No | Whether the document type is public (admin only) |
Update Parameters
Path Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
id | string | Yes | Document type ID |
Request Body
All fields are optional. The codeType field is immutable and cannot be changed.
| Parameter | Type | Required | Description |
|---|---|---|---|
name | string | No | Document type name |
description | string | No | Document type description |
jsonSchema | object | No | JSON Schema for document validation |
isDraft | boolean | No | Whether the document type is a draft |
promptHints | string | No | Hints for the OCR prompt |
identifyPromptHints | string | No | Hints for the document identification prompt |
conversionMode | string | No | Conversion mode: json, toon, or multi_prompt |
keepPropertyOrdering | boolean | No | Preserve property ordering in schema |
isPublic | boolean | No | Whether the document type is public (admin only) |
Complete Code
from docutray import Client, NotFoundError, ConflictError, DocuTrayError
client = Client(api_key="YOUR_API_KEY")
try:
# List available document types
print("Available document types:")
for doc_type in client.document_types.list().auto_paging_iter():
status = "published" if not doc_type.isDraft else "draft"
scope = "public" if doc_type.isPublic else "private"
print(f" [{status}/{scope}] {doc_type.name} ({doc_type.codeType})")
# Get details for a specific type
invoice_type = client.document_types.get("dt_abc123")
print(f"\nSchema for {invoice_type.name}:")
print(invoice_type.schema_)
# Create a new document type
new_type = client.document_types.create(
name="Purchase Order",
code_type="myorg_purchase_order",
description="Standard purchase order document",
json_schema={
"type": "object",
"properties": {
"po_number": {"type": "string"},
"total": {"type": "number"},
},
},
)
print(f"\nCreated: {new_type.name} ({new_type.codeType})")
# Update the document type
updated = client.document_types.update(
new_type.id,
name="Updated Purchase Order",
is_draft=False,
)
print(f"Updated: {updated.name}, Status: {updated.status}")
except ConflictError:
print("Document type with this codeType already exists")
except NotFoundError:
print("Document type not found")
except DocuTrayError as e:
print(f"Error: {e.message}")
finally:
client.close()import DocuTray, { NotFoundError, ConflictError, DocuTrayError } from 'docutray';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
try {
// List available document types
console.log('Available document types:');
for await (const docType of client.documentTypes.list().autoPagingIter()) {
const status = docType.isDraft ? 'draft' : 'published';
const scope = docType.isPublic ? 'public' : 'private';
console.log(` [${status}/${scope}] ${docType.name} (${docType.codeType})`);
}
// Get details for a specific type
const invoiceType = await client.documentTypes.get('dt_abc123');
console.log(`\nSchema for ${invoiceType.name}:`);
console.log(JSON.stringify(invoiceType.schema, null, 2));
// Create a new document type
const newType = await client.documentTypes.create({
name: 'Purchase Order',
codeType: 'myorg_purchase_order',
description: 'Standard purchase order document',
jsonSchema: {
type: 'object',
properties: {
po_number: { type: 'string' },
total: { type: 'number' },
},
},
});
console.log(`\nCreated: ${newType.name} (${newType.codeType})`);
// Update the document type
const updated = await client.documentTypes.update(newType.id, {
name: 'Updated Purchase Order',
isDraft: false,
});
console.log(`Updated: ${updated.name}, Status: ${updated.status}`);
} catch (error) {
if (error instanceof ConflictError) {
console.error('Document type with this codeType already exists');
} else if (error instanceof NotFoundError) {
console.error('Document type not found');
} else if (error instanceof DocuTrayError) {
console.error(`Error: ${error.message}`);
}
}SDK Reference
For detailed class and method documentation: