External Tool Validation
Validate DBOM documents against the published Makoto schema using popular tools across five ecosystems — no Makoto SDK required. All examples fetch the schema at runtime with an offline fallback.
On this page
Sample DBOM Document
Save this as example.dbom.json — all examples below use it.
example.dbom.json
{
"schema_version": "0.1",
"id": "dbom-550e8400-e29b-41d4-a716-446655440000",
"created_at": "2024-01-15T10:30:00Z",
"source": {
"uri": "s3://my-bucket/data/customers-2024-01.parquet",
"hash": {
"algorithm": "sha256",
"value": "a3f5c2d1e8b4907634f12e9a0c5d8b72e1f4a6c9d2e7b0f3a8c5d1e4b7f2a9c6"
},
"format": "parquet"
},
"signature": {
"algorithm": "sha256",
"value": "b4e6d3c2f9a1805745023fba1d6e9c83f2a5b7da3f8c1e4b7f2a9c6d1e4b7f2a9",
"signer": "github:data-platform-bot"
},
"lineage": [
{
"step": 1,
"description": "Raw ingestion from CRM export",
"tool": "acme-ingester/2.1.0",
"input_hash": "n/a",
"output_hash": "a3f5c2d1e8b4907634f12e9a0c5d8b72e1f4a6c9d2e7b0f3a8c5d1e4b7f2a9c6"
},
{
"step": 2,
"description": "PII redaction via presidio",
"tool": "presidio/2.2.354",
"input_hash": "a3f5c2d1e8b4907634f12e9a0c5d8b72e1f4a6c9d2e7b0f3a8c5d1e4b7f2a9c6",
"output_hash": "c5f7e4d3b2a1908856134acb2e7f0d94a3b6c8eb4a9d2f5c8f3b0a4a7e1b8a3b0"
}
]
}bash jq
validate-dbom.sh
#!/usr/bin/env bash
# validate-dbom.sh — exits 0 on valid, 1 on any violation
DBOM_FILE="${1:-example.dbom.json}"
# jq doesn't implement full JSON Schema, but enforces all required fields
# and key constraints as a lightweight pre-flight check.
# For full JSON Schema compliance, use the Python or Node.js examples below.
jq --exit-status '
has("schema_version") and
has("id") and
has("created_at") and
has("source") and
has("signature") and
has("lineage") and
.schema_version == "0.1" and
(.id | startswith("dbom-")) and
(.source | has("uri") and has("hash") and has("format")) and
(.source.hash.algorithm == "sha256") and
(.source.hash.value | test("^[a-f0-9]{64}$")) and
(.signature | has("algorithm") and has("value") and has("signer")) and
(.signature.algorithm == "sha256") and
(.lineage | length >= 1) and
(.lineage | all(
has("step") and has("description") and
has("tool") and has("input_hash") and has("output_hash")
))
' "$DBOM_FILE" > /dev/null
if [ $? -eq 0 ]; then
echo "✓ $DBOM_FILE is valid"
else
echo "✗ $DBOM_FILE failed validation"
exit 1
fichmod +x validate-dbom.sh ./validate-dbom.sh example.dbom.json # → ✓ example.dbom.json is valid # Use in a pipeline for f in *.dbom.json; do ./validate-dbom.sh "$f" || exit 1; done
Note:
jq doesn't implement the full JSON Schema spec — no format validation on date-time, no $ref resolution. Use it for fast field-presence checks; use Python or Node.js for full compliance.
Python jsonschema
validate_dbom.py
#!/usr/bin/env python3
"""Validates a DBOM JSON file against the published Makoto schema."""
import json, sys
from pathlib import Path
import jsonschema, requests
SCHEMA_URL = "https://usemakoto.dev/schema/v0.1.json"
# Offline? Pre-download: curl -o schema.json https://usemakoto.dev/schema/v0.1.json
OFFLINE_SCHEMA = Path("schema.json")
def fetch_schema(url, timeout=5):
try:
r = requests.get(url, timeout=timeout)
r.raise_for_status()
return r.json()
except Exception as e:
if OFFLINE_SCHEMA.exists():
print(f" Warning: network error ({e}), using local schema.json", file=sys.stderr)
return json.loads(OFFLINE_SCHEMA.read_text())
raise RuntimeError(f"Cannot fetch schema and no local schema.json found: {e}")
def validate(dbom_path):
schema = fetch_schema(SCHEMA_URL)
dbom = json.loads(Path(dbom_path).read_text())
v = jsonschema.Draft202012Validator(schema, format_checker=jsonschema.FormatChecker())
errors = list(v.iter_errors(dbom))
if not errors:
print(f"✓ {dbom_path} is valid")
return True
print(f"✗ {dbom_path} failed validation ({len(errors)} error(s)):")
for err in errors:
path = " → ".join(str(p) for p in err.absolute_path) or "(root)"
print(f" [{path}] {err.message}")
return False
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else "example.dbom.json"
sys.exit(0 if validate(path) else 1)python validate_dbom.py example.dbom.json # → ✓ example.dbom.json is valid # In CI python validate_dbom.py artifact.dbom.json || exit 1
Python pydantic
dbom_model.py
#!/usr/bin/env python3
"""Pydantic models for DBOM v0.1 — parse-and-validate with typed result."""
import re, sys, json
from datetime import datetime
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, field_validator, model_validator
SHA256_RE = re.compile(r"^[a-f0-9]{64}$")
class HashInfo(BaseModel):
algorithm: Literal["sha256"]
value: str
@field_validator("value")
@classmethod
def must_be_hex64(cls, v):
if not SHA256_RE.match(v):
raise ValueError("hash value must be 64 lowercase hex characters")
return v
class Source(BaseModel):
uri: str
hash: HashInfo
format: str
class Signature(BaseModel):
algorithm: Literal["sha256"]
value: str
signer: str
@field_validator("value")
@classmethod
def must_be_hex64(cls, v):
if not SHA256_RE.match(v):
raise ValueError("signature value must be 64 lowercase hex characters")
return v
class LineageStep(BaseModel):
step: int
description: str
tool: str
input_hash: str
output_hash: str
@model_validator(mode="after")
def step_positive(self):
if self.step < 1:
raise ValueError("step must be >= 1")
return self
class DBOM(BaseModel):
schema_version: Literal["0.1"]
id: str
created_at: datetime
source: Source
signature: Signature
lineage: list[LineageStep]
@field_validator("id")
@classmethod
def id_prefix(cls, v):
if not v.startswith("dbom-"):
raise ValueError('id must start with "dbom-"')
return v
@field_validator("lineage")
@classmethod
def lineage_nonempty(cls, v):
if len(v) < 1:
raise ValueError("lineage must contain at least one step")
return v
def load_dbom(path: str) -> DBOM:
"""Parse and validate. Raises ValidationError on failure."""
return DBOM.model_validate(json.loads(Path(path).read_text()))
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else "example.dbom.json"
try:
dbom = load_dbom(path)
print(f"✓ {path} is valid")
print(f" id: {dbom.id}")
print(f" created: {dbom.created_at.isoformat()}")
print(f" source: {dbom.source.uri}")
print(f" steps: {len(dbom.lineage)}")
except Exception as e:
print(f"✗ {path} failed validation:\n {e}", file=sys.stderr)
sys.exit(1)python dbom_model.py example.dbom.json # → ✓ example.dbom.json is valid # id: dbom-550e8400-... # created: 2024-01-15T10:30:00+00:00 # source: s3://my-bucket/data/customers-2024-01.parquet # steps: 2
JS Node.js · ajv
validate-dbom.mjs
#!/usr/bin/env node
// Usage: node validate-dbom.mjs example.dbom.json
// Offline? Pre-download: curl -o schema.json https://usemakoto.dev/schema/v0.1.json
import Ajv2020 from "ajv/dist/2020.js";
import addFormats from "ajv-formats";
import { readFileSync, existsSync } from "fs";
const SCHEMA_URL = "https://usemakoto.dev/schema/v0.1.json";
const OFFLINE_SCHEMA = "schema.json";
async function fetchSchema(url) {
try {
const res = await fetch(url, { signal: AbortSignal.timeout(5000) });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json();
} catch (err) {
if (existsSync(OFFLINE_SCHEMA)) {
console.warn(` Warning: network error (${err.message}), using local schema.json`);
return JSON.parse(readFileSync(OFFLINE_SCHEMA, "utf8"));
}
throw new Error(`Cannot fetch schema and no local schema.json found: ${err.message}`);
}
}
async function validate(dbomPath) {
const schema = await fetchSchema(SCHEMA_URL);
const dbom = JSON.parse(readFileSync(dbomPath, "utf8"));
const ajv = new Ajv2020({ allErrors: true });
addFormats(ajv); // enables "date-time" format
const valid = ajv.validate(schema, dbom);
if (valid) {
console.log(`✓ ${dbomPath} is valid`);
return true;
}
console.error(`✗ ${dbomPath} failed validation (${ajv.errors.length} error(s)):`);
for (const err of ajv.errors) {
console.error(` [${err.instancePath || "(root)"}] ${err.message}`);
}
return false;
}
const path = process.argv[2] ?? "example.dbom.json";
process.exit((await validate(path)) ? 0 : 1);node validate-dbom.mjs example.dbom.json # → ✓ example.dbom.json is valid # Validate all DBOMs in a directory for f in *.dbom.json; do node validate-dbom.mjs "$f" || exit 1; done
Go santhosh-tekuri/jsonschema
validate_dbom.go
// Usage: go run validate_dbom.go example.dbom.json
// Offline? Pre-download: curl -o schema.json https://usemakoto.dev/schema/v0.1.json
package main
import (
"fmt"
"io"
"net/http"
"os"
"time"
"github.com/santhosh-tekuri/jsonschema/v6"
)
const schemaURL = "https://usemakoto.dev/schema/v0.1.json"
const offlineFile = "schema.json"
func fetchSchema() (io.ReadCloser, error) {
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get(schemaURL)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
resp.Body.Close()
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
return resp.Body, nil
}
func compileSchema() (*jsonschema.Schema, error) {
c := jsonschema.NewCompiler()
r, err := fetchSchema()
if err != nil {
fmt.Fprintf(os.Stderr, " Warning: network error (%v), trying local %s\n", err, offlineFile)
f, ferr := os.Open(offlineFile)
if ferr != nil {
return nil, fmt.Errorf("cannot fetch schema and no local %s found: %w", offlineFile, err)
}
r = f
}
defer r.Close()
if err := c.AddResource(schemaURL, r); err != nil {
return nil, fmt.Errorf("adding schema: %w", err)
}
return c.Compile(schemaURL)
}
func validate(dbomPath string) error {
schema, err := compileSchema()
if err != nil {
return fmt.Errorf("compiling schema: %w", err)
}
f, err := os.Open(dbomPath)
if err != nil {
return fmt.Errorf("opening file: %w", err)
}
defer f.Close()
inst, err := jsonschema.UnmarshalJSON(f)
if err != nil {
return fmt.Errorf("parsing JSON: %w", err)
}
return schema.Validate(inst)
}
func main() {
path := "example.dbom.json"
if len(os.Args) > 1 {
path = os.Args[1]
}
if err := validate(path); err != nil {
fmt.Fprintf(os.Stderr, "✗ %s failed validation:\n %v\n", path, err)
os.Exit(1)
}
fmt.Printf("✓ %s is valid\n", path)
}go run validate_dbom.go example.dbom.json # → ✓ example.dbom.json is valid # Build a static binary for CI containers go build -o dbom-validate validate_dbom.go ./dbom-validate artifact.dbom.json
Comparison
| Tool | JSON Schema 2020-12 | date-time format |
Offline fallback | Best for |
|---|---|---|---|---|
| jq (CLI) | ✗ field checks only | ✗ | ✓ | Shell pipelines, smoke tests |
| Python jsonschema | ✓ | ✓ (with [format-nongpl]) |
✓ | CI gates, scripting |
| Python pydantic | ✓ (via model) | ✓ | ✓ | Typed ingestion into downstream code |
| Node.js ajv | ✓ | ✓ (with ajv-formats) |
✓ | JS/TS services, tooling |
| Go jsonschema | ✓ | ✓ | ✓ | Long-running services, static binaries |
CI Integration
Validate every DBOM file on push with GitHub Actions:
.github/workflows/validate-dbom.yml
name: Validate DBOM
on:
push:
paths: ["**/*.dbom.json"]
jobs:
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Python deps
run: pip install jsonschema[format-nongpl] requests
- name: Validate all DBOM files
run: |
find . -name "*.dbom.json" | while read f; do
python validate_dbom.py "$f" || exit 1
done