Identify Documents
Automatically detect document types using AI classification
Identify Documents
The Identify operation automatically detects which type of document you have. Given a document and a list of possible types, DocuTray returns the best match with a confidence score and ranked alternatives.
Quick Start
from pathlib import Path
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
result = client.identify.run(
file=Path("document.pdf"),
document_type_code_options=["invoice", "receipt", "contract"]
)
print(f"Type: {result.document_type.name}")
print(f"Confidence: {result.document_type.confidence:.0%}")import DocuTray from 'docutray';
import { readFileSync } from 'fs';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
const result = await client.identify.run({
file: readFileSync('document.pdf'),
filename: 'document.pdf',
documentTypeCodeOptions: ['invoice', 'receipt', 'contract'],
});
console.log(`Type: ${result.document_type.name}`);
console.log(`Confidence: ${(result.document_type.confidence * 100).toFixed(0)}%`);curl -X POST https://app.docutray.com/api/identify \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@document.pdf" \
-F 'document_type_code_options=["invoice", "receipt", "contract"]'Response
# result is an IdentificationResult
print(result.document_type.code) # "invoice"
print(result.document_type.name) # "Invoice"
print(result.document_type.confidence) # 0.95
# View alternatives ranked by confidence
for alt in result.alternatives:
print(f" {alt.name}: {alt.confidence:.0%}")// result is an IdentificationResult
console.log(result.document_type.code); // "invoice"
console.log(result.document_type.name); // "Invoice"
console.log(result.document_type.confidence); // 0.95
// View alternatives ranked by confidence
for (const alt of result.alternatives) {
console.log(` ${alt.name}: ${(alt.confidence * 100).toFixed(0)}%`);
}{
"document_type": {
"code": "invoice",
"name": "Invoice",
"confidence": 0.95
},
"alternatives": [
{
"code": "receipt",
"name": "Receipt",
"confidence": 0.04
},
{
"code": "contract",
"name": "Contract",
"confidence": 0.01
}
]
}Async Identification
For large documents, use async identification to process in the background.
# Start async identification
status = client.identify.run_async(
file=Path("document.pdf"),
document_type_code_options=["invoice", "receipt"]
)
# Wait for completion
result = status.wait()
if result.is_success():
print(f"Type: {result.document_type.code}")// Start async identification
const status = await client.identify.runAsync({
file: readFileSync('document.pdf'),
filename: 'document.pdf',
documentTypeCodeOptions: ['invoice', 'receipt'],
});
// Wait for completion
const result = await status.wait({
onStatus: (s) => console.log(`Status: ${s.status}`),
});
if (result.isSuccess()) {
console.log(`Type: ${result.document_type.code}`);
}# Start async identification
curl -X POST https://app.docutray.com/api/identify-async \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@document.pdf" \
-F 'document_type_code_options=["invoice", "receipt"]'
# Poll for status
curl https://app.docutray.com/api/identify-async/status/IDENTIFICATION_ID \
-H "Authorization: Bearer YOUR_API_KEY"Identify Then Convert
A common pattern is to first identify a document, then convert it using the detected type. This is useful when you receive documents of unknown types.
from pathlib import Path
from docutray import Client
client = Client(api_key="YOUR_API_KEY")
document = Path("unknown_document.pdf")
# Step 1: Identify the document type
identification = client.identify.run(
file=document,
document_type_code_options=["invoice", "receipt", "contract"]
)
detected_type = identification.document_type.code
confidence = identification.document_type.confidence
print(f"Detected: {detected_type} ({confidence:.0%})")
# Step 2: Convert using the detected type
if confidence > 0.8:
result = client.convert.run(
file=document,
document_type_code=detected_type
)
print(result.data)
else:
print("Low confidence — review manually")import DocuTray from 'docutray';
import { readFileSync } from 'fs';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
const document = readFileSync('unknown_document.pdf');
// Step 1: Identify the document type
const identification = await client.identify.run({
file: document,
documentTypeCodeOptions: ['invoice', 'receipt', 'contract'],
});
const detectedType = identification.document_type.code;
const confidence = identification.document_type.confidence;
console.log(`Detected: ${detectedType} (${(confidence * 100).toFixed(0)}%)`);
// Step 2: Convert using the detected type
if (confidence > 0.8) {
const result = await client.convert.run({
file: document,
documentTypeCode: detectedType,
});
console.log(result.data);
} else {
console.log('Low confidence — review manually');
}# Step 1: Identify the document type
IDENTIFY_RESULT=$(curl -s -X POST https://app.docutray.com/api/identify \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@unknown_document.pdf" \
-F 'document_type_code_options=["invoice", "receipt", "contract"]')
# Extract the detected type code
DOC_TYPE=$(echo $IDENTIFY_RESULT | jq -r '.document_type.code')
echo "Detected type: $DOC_TYPE"
# Step 2: Convert using the detected type
curl -X POST https://app.docutray.com/api/convert \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "image=@unknown_document.pdf" \
-F "document_type_code=$DOC_TYPE"Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
file | File | No | File to identify (path, bytes, or file object) |
url | string | No | Public URL of the document to download and identify |
file_base64 / base64 | string | No | Base64-encoded document content |
document_type_code_options | string[] | Yes | List of document type codes to consider |
content_type | string | No | MIME type of the document (auto-detected if not provided) |
document_metadata | object | No | Custom metadata to attach to the identification |
You must provide exactly one of file, url, or file_base64/base64.
Complete Code
End-to-end example with the identify-then-convert pattern and error handling.
from pathlib import Path
from docutray import Client, NotFoundError, DocuTrayError
client = Client(api_key="YOUR_API_KEY")
DOCUMENT_TYPES = ["invoice", "receipt", "contract", "id_card"]
try:
document = Path("incoming_document.pdf")
# Identify document type
identification = client.identify.run(
file=document,
document_type_code_options=DOCUMENT_TYPES
)
best_match = identification.document_type
print(f"Identified as: {best_match.name} ({best_match.confidence:.0%})")
# Show alternatives if confidence is moderate
if best_match.confidence < 0.9:
print("Alternatives:")
for alt in identification.alternatives:
print(f" - {alt.name}: {alt.confidence:.0%}")
# Convert if confidence is sufficient
if best_match.confidence >= 0.7:
result = client.convert.run(
file=document,
document_type_code=best_match.code
)
print(f"Extracted {len(result.data)} fields")
else:
print("Confidence too low for automatic conversion")
except NotFoundError:
print("One or more document types not found")
except DocuTrayError as e:
print(f"Error: {e.message}")
finally:
client.close()import DocuTray, { NotFoundError, DocuTrayError } from 'docutray';
import { readFileSync } from 'fs';
const client = new DocuTray({ apiKey: 'YOUR_API_KEY' });
const DOCUMENT_TYPES = ['invoice', 'receipt', 'contract', 'id_card'];
try {
const document = readFileSync('incoming_document.pdf');
// Identify document type
const identification = await client.identify.run({
file: document,
documentTypeCodeOptions: DOCUMENT_TYPES,
});
const bestMatch = identification.document_type;
console.log(`Identified as: ${bestMatch.name} (${(bestMatch.confidence * 100).toFixed(0)}%)`);
// Show alternatives if confidence is moderate
if (bestMatch.confidence < 0.9) {
console.log('Alternatives:');
for (const alt of identification.alternatives) {
console.log(` - ${alt.name}: ${(alt.confidence * 100).toFixed(0)}%`);
}
}
// Convert if confidence is sufficient
if (bestMatch.confidence >= 0.7) {
const result = await client.convert.run({
file: document,
documentTypeCode: bestMatch.code,
});
console.log(`Extracted ${Object.keys(result.data).length} fields`);
} else {
console.log('Confidence too low for automatic conversion');
}
} catch (error) {
if (error instanceof NotFoundError) {
console.error('One or more document types not found');
} else if (error instanceof DocuTrayError) {
console.error(`Error: ${error.message}`);
}
}SDK Reference
For detailed class and method documentation: