SDK Reference
Data structures and types for plugin development
Overview
The Plugin SDK provides standardized data structures for plugin output. Using the SDK ensures your plugin's output matches what MachineFabric expects.
Available SDKs
| Rust | machfab-plugin-sdk |
Reference implementation |
| Go | machfab-plugin-sdk-go |
Go module |
| Objective-C | MACINAPluginSDK |
Swift Package |
Core types
- FileMetadata - Document metadata (title, authors, page count)
- DocumentOutline - Table of contents / hierarchical structure
- DisboundPage[] - Extracted text content by page (array)
- ExtractionInfo - Metadata about the extraction process
- CapManifest - Plugin manifest with capabilities
FileMetadata
Represents metadata extracted from a document. Used by the
extract-metadata capability.
Fields
FileMetadata {
// Required
file_path: String, // Path to source file
file_size_bytes: u64, // File size
document_type: String, // "pdf", "epub", "xyz"
// Common metadata
title: Option<String>,
authors: Vec<String>,
contributors: Vec<String>,
keywords: Vec<String>,
// Dates (ISO 8601 format)
creation_date: Option<String>,
modification_date: Option<String>,
// Content info
page_count: Option<usize>,
chapter_count: Option<usize>,
word_count: Option<usize>,
character_count: Option<usize>,
// Format-specific
mime_type: Option<String>,
encoding: Option<String>,
format_version: Option<String>,
language: Option<String>,
// PDF-specific
pdf_version: Option<String>,
has_forms: bool,
is_encrypted: bool,
is_linearized: bool,
attachment_count: usize,
// EPUB-specific
epub_version: Option<String>,
publisher: Option<String>,
publication_date: Option<String>,
rights: Option<String>,
has_drm: bool,
// Extensible
extended_metadata: HashMap<String, Value>,
}
Rust usage
use machfab_plugin_sdk::FileMetadata;
// Create with required fields
let mut metadata = FileMetadata::new(
"/path/to/file.xyz".to_string(),
"xyz".to_string(),
1024 * 1024, // 1 MB
);
// Set optional fields
metadata.title = Some("Document Title".into());
metadata.add_author("Author One");
metadata.add_author("Author Two");
metadata.add_keyword("tag1");
metadata.page_count = Some(42);
// Add custom fields
metadata.set_extended("custom_field", json!("value"));
metadata.set_extended("nested", json!({"key": "value"}));
// Serialize to JSON
let json = serde_json::to_string_pretty(&metadata)?;
JSON output
{
"file_path": "/path/to/file.xyz",
"file_size_bytes": 1048576,
"document_type": "xyz",
"title": "Document Title",
"authors": ["Author One", "Author Two"],
"keywords": ["tag1"],
"page_count": 42,
"extended_metadata": {
"custom_field": "value",
"nested": {"key": "value"}
}
}
DocumentOutline
Represents a hierarchical table of contents. Used by the
extract-outline capability.
Fields
DocumentOutline {
source_file: String,
document_title: Option<String>,
document_type: String,
total_pages: usize,
entries: Vec<OutlineEntry>,
has_outline: bool,
extraction_info: ExtractionInfo,
}
OutlineEntry {
title: String,
level: usize, // 0 = top level
page: Option<usize>, // 1-indexed page number
source_ref: Option<String>, // Anchor, ID, or filename
children: Vec<OutlineEntry>, // Nested entries
}
Rust usage
use machfab_plugin_sdk::{DocumentOutline, OutlineEntry, ExtractionInfo};
// Create outline
let mut outline = DocumentOutline::new(
"/path/to/file.xyz",
"xyz",
100, // total pages
);
outline.document_title = Some("Book Title".into());
// Add entries with nesting
let mut chapter1 = OutlineEntry::new("Chapter 1: Introduction", 0)
.with_page(1);
let section1 = OutlineEntry::new("1.1 Background", 1)
.with_page(3);
let section2 = OutlineEntry::new("1.2 Overview", 1)
.with_page(8);
chapter1.add_child(section1);
chapter1.add_child(section2);
outline.add_entry(chapter1);
let chapter2 = OutlineEntry::new("Chapter 2: Methods", 0)
.with_page(15);
outline.add_entry(chapter2);
// Set extraction info
outline.extraction_info = ExtractionInfo::new("myplugin", "1.0.0");
JSON output
{
"source_file": "/path/to/file.xyz",
"document_title": "Book Title",
"document_type": "xyz",
"total_pages": 100,
"has_outline": true,
"entries": [
{
"title": "Chapter 1: Introduction",
"level": 0,
"page": 1,
"children": [
{
"title": "1.1 Background",
"level": 1,
"page": 3,
"children": []
},
{
"title": "1.2 Overview",
"level": 1,
"page": 8,
"children": []
}
]
},
{
"title": "Chapter 2: Methods",
"level": 0,
"page": 15,
"children": []
}
],
"extraction_info": {
"extractor_name": "myplugin",
"extractor_version": "1.0.0"
}
}
DisboundPage (Array)
Contains extracted text content organized by page. Used by the
disbind capability. Output is a simple array of DisboundPage objects.
Fields
DisboundPage {
order_index: usize, // 1-indexed page number
text_content: String,
source_ref: Option<String>, // Section name or anchor
word_count: Option<usize>, // Auto-calculated
character_count: Option<usize>,
}
Rust usage
use machfab_plugin_sdk::DisboundPage;
// Create pages as a simple Vec
let mut pages: Vec<DisboundPage> = Vec::new();
// Add pages
let page1 = DisboundPage::new_with_text(
1,
"This is the content of page 1. It contains multiple sentences.".into(),
);
pages.push(page1);
let page2 = DisboundPage::new_with_text(
2,
"Page 2 has different content...".into(),
);
pages.push(page2);
// Word/character counts are available per page
let page1_words = pages[0].word_count();
let page1_chars = pages[0].character_count();
JSON output
[
{
"order_index": 1,
"text_content": "This is the content of page 1...",
"word_count": 11,
"character_count": 65
},
{
"order_index": 2,
"text_content": "Page 2 has different content...",
"word_count": 5,
"character_count": 31
}
]
ExtractionInfo
Metadata about the extraction process itself. Include this in DocumentOutline output.
Fields
ExtractionInfo {
extractor_name: String, // Plugin name
extractor_version: String, // Plugin version
extracted_at: Option<String>, // ISO 8601 timestamp
warnings: Vec<String>, // Non-fatal issues
}
Rust usage
use machfab_plugin_sdk::ExtractionInfo;
let mut info = ExtractionInfo::new("myplugin", "1.0.0");
info.extracted_at = Some(chrono::Utc::now().to_rfc3339());
info.add_warning("Page 3 contained unreadable text");
info.add_warning("OCR confidence low for images");
CapManifest
The plugin manifest returned by the manifest command.
Describes the plugin and its capabilities.
Fields
CapManifest {
name: String,
version: String,
description: String,
authors: Vec<String>,
caps: Vec<Cap>,
}
Rust usage
use capdag::{Cap, CapManifest, CapUrnBuilder};
// Build capabilities
let extract_metadata_cap = Cap::new(
CapUrnBuilder::new()
.tag("action", "extract")
.tag("format", "xyz")
.tag("target", "metadata")
.build()?,
"Extract XYZ Metadata".into(),
"extract-metadata".into(),
);
// Create manifest
let manifest = CapManifest::new(
"myplugin".into(),
env!("CARGO_PKG_VERSION").into(),
"Processes XYZ files".into(),
vec![extract_metadata_cap],
).with_author("Your Name".into());
Standard Caps
The SDK provides helpers to create standard capability definitions.
Standard capability URNs
// Extract metadata
cap:op=extract;format={ext};target=metadata
// Extract outline
cap:op=extract;format={ext};target=outline
// Grind
cap:op=extract;format={ext};target=pages
// Generate thumbnail
cap:op=generate;format={ext};target=thumbnail
Rust helpers
use capdag::{
extract_metadata_cap,
extract_outline_cap,
disbind_cap,
generate_thumbnail_cap,
};
// These fetch full capability definitions from the registry
let cap = extract_metadata_cap(registry.clone(), Some("xyz")).await?;
let cap = extract_outline_cap(registry.clone(), Some("xyz")).await?;
let cap = disbind_cap(registry.clone(), Some("xyz")).await?;
let cap = generate_thumbnail_cap(registry.clone(), Some("xyz")).await?;
Go constants
import sdk "github.com/machfab/machfab-plugin-sdk-go" sdk.StandardCaps.ExtractMetadata // "extract-metadata" sdk.StandardCaps.ExtractOutline // "extract-outline" sdk.StandardCaps.Grind // "grind" sdk.StandardCaps.GenerateThumbnail // "generate-thumbnail"
Go SDK
The Go SDK provides the same data structures with Go-native APIs.
Installation
go get github.com/machfab/machfab-plugin-sdk-go
Usage
package main
import (
"encoding/json"
"fmt"
sdk "github.com/machfab/machfab-plugin-sdk-go"
)
func main() {
// Create metadata
metadata := sdk.NewFileMetadata("/path/to/file.xyz", "xyz", 1024)
metadata.Title = "Document Title"
metadata.AddAuthor("Author Name")
metadata.PageCount = 42
// Create outline
outline := sdk.NewDocumentOutline("/path/to/file.xyz", "xyz", 100)
entry := sdk.NewOutlineEntry("Chapter 1", 0).WithPage(1)
outline.AddEntry(entry)
// Create pages (simple slice)
var pages []sdk.DisboundPage
page := sdk.NewDisboundPageWithText(1, "Content...")
pages = append(pages, *page)
// Serialize
output, _ := json.MarshalIndent(metadata, "", " ")
fmt.Println(string(output))
}
Plugin registry
import sdk "github.com/machfab/machfab-plugin-sdk-go"
// Create registry
registry := sdk.NewPluginRegistry()
// Check if capability is available
caller, err := registry.Can("cap:op=extract;format=xyz;target=metadata")
if err != nil {
log.Fatal(err)
}
// Call the capability
response, err := caller.Call(ctx, []string{"/path/to/file.xyz"}, nil, nil)
if err != nil {
log.Fatal(err)
}
// Parse response
var metadata sdk.FileMetadata
response.AsType(&metadata)
Objective-C SDK
The Objective-C SDK provides Swift-compatible types for macOS plugins.
Installation
// Package.swift
dependencies: [
.package(url: "https://github.com/machfab/machfab-plugin-sdk-objc", from: "1.0.0")
]
Swift usage
import MACINAPluginSDK
// Create metadata
let metadata = MACINADocumentMetadata()
metadata.filePath = "/path/to/file.xyz"
metadata.documentType = "xyz"
metadata.fileSizeBytes = 1024
metadata.title = "Document Title"
metadata.authors.add("Author Name")
// Create outline
let outline = MACINADocumentOutline()
outline.sourceFile = "/path/to/file.xyz"
outline.documentType = "xyz"
outline.totalPages = 100
let entry = MACINAOutlineEntry()
entry.title = "Chapter 1"
entry.level = 0
entry.page = 1
outline.outlineEntries.add(entry)
// Create pages (simple array)
var pages = [MACINADisboundPage]()
let page = MACINADisboundPage(orderIndex: 1)
page.textContent = "Content..."
pages.append(page)
Standard caps
import MACINAPluginSDK // Get standard cap URN strings let extractMetadata = MACINAStandardizedCaps.extractMetadata let extractOutline = MACINAStandardizedCaps.extractOutline let grind = MACINAStandardizedCaps.grind let generateThumbnail = MACINAStandardizedCaps.generateThumbnail