diff --git a/docs/html-protocol.md b/docs/html-protocol.md index 0de34eb..2b93022 100644 --- a/docs/html-protocol.md +++ b/docs/html-protocol.md @@ -96,39 +96,95 @@ Both forms are valid. Verifying clients should handle either case. Before hashing, content MUST be canonicalized: -1. Strip all HTML tags (extract text content only) -2. Collapse all whitespace sequences to a single space -3. Trim leading and trailing whitespace -4. Encode as UTF-8 +1. Parse the HTML and extract text nodes in document order +2. Strip all HTML markup (tags and attributes); only the text content contributes to the hash +3. Collapse all whitespace sequences to a single space +4. Trim leading and trailing whitespace +5. Apply the text normalization defined by the `@htmltrust/canonicalization` library (NFKC, quote normalization, dash normalization, invisible character stripping) +6. Encode as UTF-8 -The resulting string is hashed with SHA-256 and prefixed: `sha256:`. +The resulting string is hashed with SHA-256 and expressed as `sha256:`, where `` is the unpadded Base64 encoding of the 32-byte digest. + +### Text-only scope + +The canonicalization hashes **text content only**, not the HTML markup or attributes that surround it. This means an adversary with possession of signed text MAY: + +- Rewrap the text in misleading block elements (e.g., change an `

` to a `` strikethrough) +- Alter link destinations (`href` values) on `` elements surrounding the signed text +- Introduce, remove, or swap images and other media around the signed text + +These are **semantic integrity concerns**, not cryptographic ones. HTMLTrust addresses them through a layered design: + +1. **Domain binding** (see Signature Data Format below): signatures bind the content to a specific publication origin. A reader or crawler encountering signed content at an unexpected origin is alerted by signature check failure. +2. **Research and reputation path**: crawlers and researchers can trace signed content back to its canonical publication origin through the trust directory, flag imposter copies, and mark manipulated surrounding context. Over time the directory's reputation and reports surface altered copies to any consumer whose trust policy considers them. + +The layered design keeps cryptographic verification simple and portable across language implementations, while delegating semantic-integrity detection to the research ecosystem where it can evolve without breaking existing signatures. + +**Open design question**: a future revision MAY extend the hash to cover particularly meaningful attributes, especially `href` on `` elements (since link-swap within the original publication origin is a phishing vector that domain-binding and research cannot address alone). Feedback on which attributes to cover is explicitly welcome. ## Signature Data Format -The signature binds three values, concatenated with `:` separators: +The signature binds four values, concatenated with `:` separators: ``` -{contentHash}:{domain}:{authorId} +{content-hash}:{claims-hash}:{domain}:{signed-at} ``` +- `content-hash` — hash of the canonicalized text content (see above) +- `claims-hash` — SHA-256 hash of the canonical serialization of all inner `` claim elements, ordered lexically by name (ensures tamper-evident claim metadata) +- `domain` — the origin where the content is authoritatively published (anti-theft binding) +- `signed-at` — the ISO-8601 timestamp from the `` element + For example: ``` -sha256:a591a6d40bf420404a...146e:example.com:123e4567-e89b-12d3-a456-426614174000 +sha256:RAyBCvKT...:sha256:eFgHiJkL...:example.com:2025-05-01T10:30:00Z ``` -This string is signed with the author's private key using the specified algorithm. +The author's identity is **not** included in the binding because it is implicit in the keyid resolution step: any attempt to claim a signature under a different identity would resolve to a different public key and fail verification. This string is signed with the author's private key using the algorithm declared in the `algorithm` attribute. + +**Hash encoding (open feedback)**: hashes are encoded as unpadded Base64, which is shorter than hexadecimal by roughly one-third. Community feedback on alternative encodings (hex, Base32) for ecosystem alignment is welcome. ## Verification Flow -A verifying client (browser extension, crawler, etc.) performs these steps: +HTMLTrust separates verification into two distinct layers, per the specification: + +### Layer 1: Cryptographic verification (local, deterministic) + +A verifying client (browser extension, crawler, library) performs these steps **locally**, with no network calls beyond the key resolution step: 1. **Discover** `` elements in the page DOM 2. **Read** the `signature`, `keyid`, `algorithm`, and `content-hash` attributes -3. **Fetch** the author's public key from the URL in `keyid` -4. **Canonicalize** the adjacent or wrapped content and compute its SHA-256 hash -5. **Compare** the computed hash with `content-hash` (integrity check) -6. **Verify** the cryptographic signature against the public key (authenticity check) -7. **Optionally** query a trust directory for the author's reputation and endorsements +3. **Resolve** the `keyid` to a public key. The `keyid` may be a DID (e.g., `did:web:author.example`), a direct URL to a public key JSON document, or a trust directory reference. Implementations MUST accept multiple resolution methods. +4. **Canonicalize** the inner text content per the rules above and compute its hash +5. **Compare** the computed hash with the `content-hash` attribute (content integrity check) +6. **Compute** the `claims-hash` from the canonical serialization of inner `` claim elements +7. **Construct** the binding string `{content-hash}:{claims-hash}:{domain}:{signed-at}` +8. **Verify** the cryptographic signature over the binding string using the resolved public key and the declared `algorithm` + +This layer produces a deterministic yes/no result: either the signature is cryptographically valid or it is not. No server or directory is required for this step beyond whatever key resolution demands. + +### Layer 2: Trust decision (client policy) + +Given a cryptographically valid signature, the client then applies the **user's trust policy** to decide how to present the content. This layer is entirely client-side and may draw on: + +- A personal list of trusted keyids (option A) +- Trusted origin domains (option B) +- Endorsements from designated third parties (fetched from trust directories and independently verified) +- Reputation scores from one or more user-selected trust directories +- Local or cached revocation state +- Any combination of the above, weighted as the user configures + +The output is a trust score or ranking, **not** a binary verdict. User interfaces SHOULD present the outcome as a graduated signal (for example a red/yellow/green score) with hover or detail views exposing which inputs contributed to the final score. + +### Optional directory queries + +In addition to the two layers above, a client MAY query one or more trust directories for: + +- Author reputation (signer-level trust, ongoing curatorial opinion) +- Content endorsements (point-in-time attestations from third parties) +- Key revocation and reports + +These queries enrich the trust decision but are never required for signature verification itself. ## Multiple Signatures diff --git a/wordpress/admin/class-content-signing-admin-author-profiles.php b/wordpress/admin/class-content-signing-admin-author-profiles.php index 936a93f..3420f0d 100644 --- a/wordpress/admin/class-content-signing-admin-author-profiles.php +++ b/wordpress/admin/class-content-signing-admin-author-profiles.php @@ -73,7 +73,12 @@ public function render_page() { // Show the authors list $this->render_authors_list($authors); } -/** + ?> + + - - +`sign_post()`: + +1. WordPress fires `publish_post` (or `transition_post_status`). +2. `ContentSigning_Hooks::on_publish_post` -> `Signing_Service::process_post` + -> `Signing_Service::sign_post`. +3. `prepare_content_data()` strips HTML, runs the canonicalization PHP + binding (`HTMLTrust\Canonicalization\Canonicalize::normalize`), computes + `sha256:`, attaches the post author's claims. +4. `ContentSigning_API_Client::sign_content()` POSTs `{contentHash, domain, + claims}` to `/api/content/sign` with the author's API key + in `X-AUTHOR-API-KEY`. +5. The trust server holds the author's signing key, signs server-side, and + returns `{signature, ...}`. +6. We persist the signature row in `wp_content_signing_signatures` and + (separately, via the public/display class) emit a `` + wrapper around the content on render. + +### What flows to the trust server + +Per request: content hash, domain, claims object, author API key (bearer +auth). The trust server learns: + +- That this author is publishing right now. +- The hash (and indirectly the size) of the content being published. +- The claim values for this content. +- Implicitly, the publication cadence and topic distribution of every + author on every site that uses this plugin. + +The trust server signs with a key it owns on behalf of the author. The +author never sees the private key. If the trust server is compromised, an +attacker can forge signatures from any of its authors, and there is no +way for the author to prove they did not author a forged piece. + +### Where the keys live + +In the trust server's database. The plugin only stores an encrypted *API +key* (`api_key_encrypted` / `author_api_key_encrypted` columns), which is +the bearer token used to authenticate signing requests, not the signing +key itself. + +## Spec Gap + +§2.2 (Identity and Key Resolution) makes the trust directory's role purely +optional convenience: `keyid` is pluggable across DIDs, direct URLs, and +trust-directory references; "none" is canonical; verification is local. + +§3.1 (Browser Behavior) reinforces that cryptographic verification is a +local operation in the user agent with no required network calls beyond +key resolution. A symmetric reading -- and the design intent -- is that +**signing is also a local operation**, performed by the author (or the +author's tooling) using a key the author controls. Anything else recreates +exactly the central-authority pattern the spec is built to avoid. + +The plugin as shipped today fails this test in three ways: + +1. **Key custody is wrong.** Authors do not hold their own keys; the trust + server does. +2. **Signing has a remote-call dependency.** Publishing fails closed if + the trust server is unreachable, even though the spec contemplates no + such dependency. +3. **The trust server learns about every publication.** That coupling is + incompatible with the "MAY submit to one or more trust directories" + language in §2.4 and the federated-by-design framing throughout. + +## Proposed Design + +### End-state: the editor signs + +Signing happens at publish time, in PHP on the WordPress server, using a +private key that the author (or the site) controls and that the plugin +loads from a configured key store. The trust directory is contacted only +if the site has opted in to publication notification, and only with the +already-signed blob. + +### Where the private key lives (three options, listed in deployment +preference order) + +**1. WebAuthn / hardware-backed key (best UX-to-security ratio for +single-author sites).** The author registers a hardware key (TouchID, +Yubikey, platform authenticator) via a JS flow in the post editor. The +key never leaves the device. Signing happens in the browser via the +WebAuthn `sign` operation, and the signed blob is POSTed back to the +WordPress REST API as part of the publish payload. PHP server-side never +sees the private key. + +Limitations: requires a recent browser; doesn't compose with non-browser +publishing paths (XML-RPC, mobile app, REST clients, scheduled posts, +cron-driven imports). For those, fall back to (2) or (3). + +**2. Encrypted key file in `wp-content/uploads/htmltrust/` (server-side +fallback).** The site admin generates an Ed25519 keypair via the plugin +UI or the bundled CLI tool, stores the encrypted private key on disk +(passphrase-derived KEK held in a `wp-config.php` constant, *not* the DB), +and configures author->key mappings in plugin settings. PHP loads the key +on demand for signing. + +Limitations: trust boundary is "anyone with filesystem access to +wp-content can attempt offline brute force on the passphrase". Acceptable +for managed VPS / single-tenant; not great for shared hosting. Mitigate +with sodium `crypto_secretbox` + Argon2id, and document that the +passphrase must not live in the database. + +**3. CLI / external signer (escape hatch).** Author runs an `htmltrust +sign` CLI tool on their laptop against an exported draft, pastes the +resulting signed blob into the post. No keys on the server at all. +Suitable for highly security-conscious authors and for the initial +phase-1 rollout where the editor integration doesn't exist yet. + +### Where canonicalization runs + +The PHP canonicalization binding (`@htmltrust/canonicalization` / +`HTMLTrust\Canonicalization\Canonicalize`) already exists and is used in +`Signing_Service::normalize_content`. **Keep it.** PHP-side +canonicalization is the canonical (sic) place to compute the content hash +because: + +- WordPress mutates content in non-trivial ways at publish time (shortcode + expansion, oEmbed substitution, `the_content` filters). The hash must + match what's actually rendered. +- A JS-side canonicalizer in the editor can compute a *preview* hash for + WebAuthn-style signing, but the authoritative hash is what PHP computes + after all filters have run, and that's what must be signed. + +For the WebAuthn path specifically, this means signing has to happen in +two passes: + +1. PHP server runs filters, computes canonical hash, returns hash to the + browser. +2. Browser presents the hash to the WebAuthn authenticator, gets a + signature, posts the signature back. +3. PHP server stores the signature alongside the hash and emits the + `` wrapper. + +This is fiddly but workable; the alternative (signing arbitrary +client-rendered preview) lets a malicious filter or plugin inject +unsigned content. + +### Publish-time flow (proposed) + +The proposed `wp_insert_post` / `transition_post_status` flow becomes: + +1. WordPress applies all `the_content` filters, shortcode expansion, etc. + (We piggyback on `the_content` to capture the rendered HTML at the + right moment, not the raw post body.) +2. PHP canonicalizes the rendered content; computes `content-hash`. +3. PHP gathers the claims (default + post-specific, same as today). +4. PHP builds the **claims-binding** structure (the canonicalized + key-sorted serialization of `{contentHash, domain, claims, signedAt, + keyid}` per the canonicalization spec). +5. Local signer signs the claims-binding: + - WebAuthn path: PHP returns the binding bytes to the editor JS, JS + calls `navigator.credentials.get(...)`, posts signature back. + - Server-key path: PHP loads encrypted key, decrypts with KEK from + `wp-config`, signs with `sodium_crypto_sign_detached`. + - CLI path: blob already signed at draft import time; PHP just stores + it. +6. PHP stores `{signature, keyid, content-hash, signed-at, claims}` in + post meta (one row per signature in the existing + `wp_content_signing_signatures` table; schema is already mostly + right). +7. The public-facing display layer reads post meta and emits the + `` wrapper at render time. (This already exists; + `ContentSigning_Display` just needs to read from a different source.) +8. *Optional*: notify a configured trust directory by POSTing + `{contentHash, signature, keyid, domain}` to its + `/api/content/notify` endpoint. The notification is fire-and-forget; + publish does not block on it. + +### What the trust directory still receives + +In the proposed design the trust directory is reduced from "co-signer" to +"index" -- and only when the site opts in. It receives: + +- The signed blob (already-signed; the directory cannot forge or alter + it). +- Optionally: a publication URL so the directory can crawl and verify. +- Optionally: claim values, for indexing/search. + +It does **not** receive private keys, API keys with signing authority, +content prior to signing, or anything that would let it impersonate the +author. + +The plugin should be able to operate with the trust directory disabled +entirely. + +## Risks and Open Questions + +- **Key storage UX is the hardest problem.** Authors don't want to think + about keys. The CLI escape hatch is for power users; the WebAuthn path + is for typical authors; the server-key path is for everyone who can't + use either. We will ship all three but we need a clear default. +- **Multi-author sites: per-author keys vs. site key?** §2.2 implies + per-author; in practice, many WP sites have one editor publishing on + behalf of many "authors". Recommendation: support both, with explicit + UI for the choice. The site-key case is functionally an endorsement + pattern (site endorses author) -- align with §2.5. +- **Key rotation.** If a key is rotated, all previously-signed content + remains validly signed under the old key (signatures don't expire just + because the key did). We need (a) a way to publish key-rotation + metadata at the keyid resolution endpoint, and (b) UI to make this + obvious to authors. +- **Backup and recovery.** WebAuthn keys can't be exported; if the + hardware is lost, signed history remains valid but new content can't + be signed under that key. Server-key path needs documented backup + procedure (encrypted key file + KEK passphrase recovery sheet). +- **Mobile app authoring (Jetpack, WP mobile app).** These use the REST + API and don't have access to a local signer. Options: (a) have the + mobile app implement WebAuthn-equivalent signing; (b) accept that + mobile-published posts get signed server-side with a per-site key; (c) + delay signing until the next desktop edit. Probably (b) with explicit + UI labeling. +- **REST API authoring (headless, Gutenberg over REST).** Same shape as + mobile. Recommendation: REST publishers must include a `signature` + field in the publish payload, OR opt in to server-key signing. Reject + unsigned publishes only if "strict signing" is enabled. +- **What happens on republish / edit?** Re-signs with current timestamp. + The previous signature row is preserved (we already have a one-row-per- + attempt schema). UI should expose the signature history. +- **Compatibility with existing installs.** Anyone who already created + trust-server-side keys via the current plugin will need a migration + path: (a) generate a new local key, (b) re-sign their existing content + under the new key, (c) deprecate the old server-side key. Make this a + one-click flow in the admin UI. + +## Suggested Rollout + +### Phase 1 — Ship a CLI signer (size: S) + +Build a standalone PHP/Node CLI tool (`htmltrust-sign`) that takes a file +or URL, canonicalizes the content, prompts for a passphrase, signs with +a local Ed25519 key, and emits a `` blob. The plugin +gains a "paste signed blob" workflow in the post meta box. + +This lets us validate the signed-blob shape end-to-end without touching +the trust-server signing endpoint at all, and gives security-conscious +authors a path that doesn't trust the server with anything. + +**Done when:** CLI exists, plugin accepts pasted blobs, e2e test passes +with CLI-signed content. + +### Phase 2 — Server-key signing (size: M) + +Implement option (2) from "where the private key lives": encrypted key +file in `wp-content/uploads/htmltrust/`, KEK from `wp-config.php` +constant, plugin settings UI for key generation / per-author mapping. + +The trust-server signing call becomes opt-in (off by default), and the +plugin signs locally with `sodium_crypto_sign_detached`. Schema changes: +add `key_id` and `key_fingerprint` columns to +`wp_content_signing_authors`; deprecate `author_api_key_encrypted`. + +Migration: existing installs get a one-click "switch to local signing" +flow that generates a new key and re-signs the latest revision of each +post. + +**Done when:** A site with no trust-server configured can publish signed +content; the trust-server signing endpoint is deprecated in admin UI. + +### Phase 3 — WebAuthn-attested keys (size: M) + +Editor-side JS integration: WebAuthn registration flow in user profile, +WebAuthn signing flow at publish time. PHP-side: receive +`{publicKeyCredential, signature}` from the editor, verify the signature +against the stored credential, store the result. + +This is the best end-state UX for typical authors. It requires the +two-pass canonicalization flow described above. + +**Done when:** A user can register a hardware key via the WP admin UI +and publish a post that is signed entirely client-side, with no signing +key ever present on the server. + +### Phase 4 — Deprecate the trust-server signing endpoint (size: S) + +Remove `Signing_Service::sign_post`'s call to +`api_client->sign_content()`. Keep `api_client->sign_content` as a +client method (it's still useful for the trust server's own admin +tooling) but no plugin code path calls it. Mark +`X-AUTHOR-API-KEY`-bearing flows as deprecated in plugin docs. + +Trust-directory notification (`/api/content/notify`-style) replaces it, +and is opt-in per author. + +**Done when:** Removing the trust server entirely from a site's config +breaks nothing in the publish path. + +## Effort Summary + +| Phase | Description | Size | Notes | +| ----- | ------------------------------------ | ---- | ------------------------------------------------------ | +| 1 | CLI signer + paste-blob workflow | S | No infra changes; validates the wire format | +| 2 | Server-key signing (PHP-local) | M | Schema migration + UI + crypto; meaningful test surface | +| 3 | WebAuthn editor integration | M | New JS surface; two-pass canonicalization | +| 4 | Deprecate trust-server signing | S | Mostly removal + docs | + +Total: roughly two-to-three sprints of focused work, gated on Jason's +approval at each phase boundary. + +## Recommendation + +Approve phases 1 and 2 as a unit (they're a coherent migration and worth +shipping together). Phase 3 is the right end-state but introduces +non-trivial JS-side complexity; defer the go/no-go decision until phase +2 is in production for a few weeks. Phase 4 is mechanical once 1-3 are +in. + +Do **not** start work on any phase from this PR. This document exists +specifically so the redesign can be argued about before code moves. diff --git a/wordpress/includes/class-content-signing-activator.php b/wordpress/includes/class-content-signing-activator.php index 1b2d4c7..a1e47ed 100644 --- a/wordpress/includes/class-content-signing-activator.php +++ b/wordpress/includes/class-content-signing-activator.php @@ -16,11 +16,23 @@ class ContentSigning_Activator { * * Creates the necessary database tables for the plugin. * + * Idempotent by construction: + * - create_database_tables() uses dbDelta(), which diffs and ALTERs to + * match the desired schema rather than failing on existing tables. + * - set_default_options() uses add_option(), which is a no-op when the + * option already exists. + * + * Safe to call repeatedly. Also called lazily from + * ContentSigning_Plugin::maybe_install_schema() to recover from edge + * cases where the activation hook never fired (manual installs, site + * clones, etc.). + * * @since 1.0.0 */ public static function activate() { self::create_database_tables(); self::set_default_options(); + update_option('content_signing_db_version', CONTENT_SIGNING_VERSION); } /** diff --git a/wordpress/includes/class-content-signing-plugin.php b/wordpress/includes/class-content-signing-plugin.php index 7ebaf68..d17b45b 100644 --- a/wordpress/includes/class-content-signing-plugin.php +++ b/wordpress/includes/class-content-signing-plugin.php @@ -107,6 +107,15 @@ private function __construct() { $this->load_dependencies(); } + /** + * Track whether components have been initialized to avoid double-init. + * + * @since 1.0.1 + * @access private + * @var bool + */ + private $initialized = false; + /** * Load the required dependencies for this plugin. * @@ -144,57 +153,116 @@ private function load_dependencies() { /** * Run the plugin. * + * Defers all DB-touching component initialization to the WordPress 'init' + * action. This is important because the plugin file is loaded on every + * request -- including the activation request -- and at plugin-load time + * our custom tables (wp_content_signing_servers et al.) may not exist yet. + * + * Historical bug: init_components() previously ran synchronously here and + * called $db->get_default_server(), which executes a SELECT against + * wp_content_signing_servers. On a freshly-installed site, the activator + * has not yet created that table when this code path is reached, causing + * a fatal "table doesn't exist" error and breaking `wp plugin activate`. + * + * Deferring to 'init' means components are built only after WordPress is + * fully bootstrapped and after register_activation_hook has had a chance + * to run dbDelta. Hook *registration* (which is metadata-only and does + * not query the DB) happens immediately so that we don't miss the early + * action ordering that some hooks depend on -- but the registration + * itself is just attaching a closure to 'init', not building anything. + * * @since 1.0.0 * @return void */ public function run() { - // Initialize components - $this->init_components(); - - // Register hooks - $this->hooks->register_hooks(); - $this->scheduler->register_hooks(); - $this->public->register_hooks(); + // Defer real component construction until WP is ready and our tables + // are guaranteed to exist (post-activation). Priority 5 so we are + // ready before most other 'init' callers, but after WP core init. + add_action('init', array($this, 'init_components'), 5); } /** * Initialize the plugin components. * + * Public so it can be wired as an 'init' action callback. Idempotent: + * safe to call multiple times -- subsequent calls are no-ops. + * * @since 1.0.0 - * @access private * @return void */ - private function init_components() { - // Initialize database + public function init_components() { + if ($this->initialized) { + return; + } + $this->initialized = true; + + // Defensive: make sure tables exist before we ever touch them. This + // protects against edge cases where the plugin file loads in a + // request context that bypassed normal activation (e.g. a manual + // 'must-use' install, or a site clone where the activation hook + // never fired). Cheap because we gate on a version option. + $this->maybe_install_schema(); + + // Initialize database (constructor only stores wpdb refs / table names; + // does not query). $this->db = new ContentSigning_DB(); - - // Initialize scheduler + + // Initialize scheduler (constructor stores db ref only). $this->scheduler = new ContentSigning_Scheduler($this->db); - - // Get default server + + // Get default server -- this DOES query the DB. Now safe because + // we are inside 'init' and tables exist. $default_server = $this->db->get_default_server(); $api_url = ''; $general_api_key = ''; - + if ($default_server) { $api_url = $default_server->api_url; $general_api_key = $this->db->decrypt($default_server->api_key_encrypted); } - + // Initialize API client $this->api_client = new ContentSigning_API_Client($api_url, $general_api_key, $this->db); - + // Initialize signing service $this->signing_service = new ContentSigning_Signing_Service($this->db, $this->api_client, $this->scheduler); - + // Initialize admin $this->admin = new ContentSigning_Admin($this->db, $this->api_client); - + // Initialize public $this->public = new ContentSigning_Public($this->db, $this->api_client); - + // Initialize hooks $this->hooks = new ContentSigning_Hooks($this->signing_service, $this->admin); + + // Register the per-component hooks now that components exist. + $this->hooks->register_hooks(); + $this->scheduler->register_hooks(); + $this->public->register_hooks(); + } + + /** + * Ensure the plugin's schema is present, idempotently. + * + * Gated on the 'content_signing_db_version' option so we only re-run + * dbDelta when the bundled version differs from the installed version. + * dbDelta itself is idempotent (it diffs and ALTERs to match), so + * calling this on every load would be safe but wasteful. + * + * @since 1.0.1 + * @access private + * @return void + */ + private function maybe_install_schema() { + $installed = get_option('content_signing_db_version'); + if ($installed === CONTENT_SIGNING_VERSION) { + return; + } + + require_once CONTENT_SIGNING_PLUGIN_DIR . 'includes/class-content-signing-activator.php'; + ContentSigning_Activator::activate(); + update_option('content_signing_db_version', CONTENT_SIGNING_VERSION); } /**