From 9d82515dc76a855940c679ede1554105deb28611 Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:12:57 +0100 Subject: [PATCH 1/6] Add content-based package_probe for legacy migration detection Introduce a shared classifier/resolver that decides migratability by the eXeLearning source a stored legacy package actually carries, rather than by filename or assumption. A package is migratable when content.xml (ODE 2.0) sits at the archive root (a native .elpx, a content.xml zip or an IMS export) or when it embeds exactly one safe .elpx; legacy .elp (contentv3.xml), source-less SCORM or web exports, multiple embedded .elpx, and corrupt archives are not. Detection reads only the ZIP central directory (preflight-cheap) and reuses the existing zip_utils path-traversal/symlink defences. Add helper_trait builders for the new package shapes and a unit test covering every branch. --- .../local/migration/source/package_probe.php | 155 +++++++++++++++ tests/classes/helper_trait.php | 48 +++++ tests/local/migration/package_probe_test.php | 183 ++++++++++++++++++ 3 files changed, 386 insertions(+) create mode 100644 classes/local/migration/source/package_probe.php create mode 100644 tests/local/migration/package_probe_test.php diff --git a/classes/local/migration/source/package_probe.php b/classes/local/migration/source/package_probe.php new file mode 100644 index 0000000..d7a227a --- /dev/null +++ b/classes/local/migration/source/package_probe.php @@ -0,0 +1,155 @@ +. + +/** + * Content-based detection of a migratable eXeLearning source inside a stored + * legacy package (issue #13 #3, DEC-0050). + * + * @package mod_exelearning + * @copyright 2026 ATE (Área de Tecnología Educativa) + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +namespace mod_exelearning\local\migration\source; + +use mod_exelearning\local\zip_utils; + +/** + * Decides whether a stored legacy package carries a migratable eXeLearning source + * and resolves it to a temporary path for installation. + * + * The migratability marker is an eXeLearning ODE 2.0 `content.xml`. A package is + * migratable when: + * - `content.xml` sits at the archive root — a native `.elpx`, a content.xml + * `.zip`, an IMS Content Package, or an eXeLearning web export that bundles its + * source (resolved by installing the whole archive); or + * - the archive embeds exactly one safe `.elpx` — an eXeLearning SCORM export + * wrapping its editable source (resolved by extracting only that entry). + * + * Everything else is not migratable: legacy `.elp` (which carries `contentv3.xml`, + * not `content.xml`), a source-less SCORM package, a plain web export with no + * bundled source, more than one embedded `.elpx` (ambiguous), or a corrupt/ + * unreadable archive. The caller leaves the legacy activity untouched — the + * migration never deletes the source, so a skipped package loses no data. + * + * Both source handlers (mod_exeweb, mod_exescorm) share this single detector so + * the rule lives in exactly one place. Detection reads only the ZIP central + * directory (no extraction), keeping the preflight pass cheap. + */ +final class package_probe { + /** + * The ODE 2.0 source marker expected at the archive root. + * + * Mirrors {@see \mod_exelearning\local\package_manager::validate_content_xml()}, + * the same root-level `content.xml` test the upload form uses to accept a package. + * + * @var string + */ + private const CONTENT_XML = 'content.xml'; + + /** + * Classifies a stored package by the eXeLearning source it carries. + * + * Never extracts and never throws: an unreadable archive is downgraded to a + * nosource classification, matching the source_interface contract. + * + * @param \stored_file $pkg The stored legacy package (any zip/.elpx). + * @param int|null $itemid Resolved package itemid, threaded back into the + * classification for the mod_exeweb revision fallback. + * @return classification + */ + public static function classify(\stored_file $pkg, ?int $itemid = null): classification { + // Read only the central directory (preflight-cheap): no extraction. + $entries = $pkg->list_files(get_file_packer('application/zip')); + if (!is_array($entries)) { + // Corrupt or unreadable archive: nothing we can recover. + return classification::nosource(); + } + + $elpx = []; + foreach ($entries as $entry) { + if (!empty($entry->is_directory)) { + continue; + } + // A content.xml at the archive root is the genuine ODE 2.0 marker and + // takes precedence: the whole archive is installable as-is, exactly + // like a native .elpx (which is itself a zip with content.xml at root). + if ($entry->pathname === self::CONTENT_XML) { + return classification::ok(null, $itemid); + } + if (str_ends_with(strtolower($entry->pathname), '.elpx')) { + // The entry name is attacker-influenced (an uploaded SCORM zip can + // embed an .elpx under a path-traversal / absolute / backslash / + // stream-wrapper name). Drop any unsafe entry so it is never + // selected for extraction; an otherwise-fine package then degrades + // to nosource, exactly as if it carried no usable .elpx at all. + if (zip_utils::is_unsafe_zip_entry($entry->pathname)) { + continue; + } + $elpx[] = $entry->pathname; + } + } + + return match (count($elpx)) { + 0 => classification::nosource(), + 1 => classification::ok($elpx[0], $itemid), + default => classification::ambiguoussource(), + }; + } + + /** + * Resolves a classified package to a readable temporary path. + * + * Returns null when the verdict is not migratable. For a root-content.xml + * package the whole archive is copied out verbatim (install_package() extracts + * and validates it downstream). For an embedded .elpx only that single entry is + * extracted, with the same path-traversal / symlink defences the rest of the + * plugin uses. + * + * @param \stored_file $pkg The stored legacy package. + * @param classification $verdict The verdict returned by classify(). + * @return string|null Absolute path to a temporary package, or null. + */ + public static function resolve(\stored_file $pkg, classification $verdict): ?string { + if (!$verdict->is_ok()) { + return null; + } + $tmpdir = make_request_directory(); + if ($verdict->elpxentry === null) { + // Direct package (native .elpx or content.xml-bearing zip): copy it out + // verbatim. install_package() extracts and validates it downstream. + $tmp = $tmpdir . '/source.elpx'; + $pkg->copy_content_to($tmp); + return $tmp; + } + // Defence in depth: classify() already drops unsafe entries, but re-check + // here so resolve() never extracts a hostile name even if reached directly. + if (zip_utils::is_unsafe_zip_entry($verdict->elpxentry)) { + return null; + } + // Extract ONLY the embedded entry, not the whole archive. The packer drops + // the $onlyfiles filter when handed a stored_file, so copy the archive out + // first and extract from the path (cheap: one small entry). + $ziptmp = $tmpdir . '/package.zip'; + $pkg->copy_content_to($ziptmp); + get_file_packer('application/zip')->extract_to_pathname($ziptmp, $tmpdir, [$verdict->elpxentry]); + // Verify nothing escaped $tmpdir (no symlinks, every materialised path stays + // inside it) before trusting the resolved path. + zip_utils::assert_extraction_contained($tmpdir, 'migrateextractfailed'); + $path = $tmpdir . '/' . $verdict->elpxentry; + return is_file($path) ? $path : null; + } +} diff --git a/tests/classes/helper_trait.php b/tests/classes/helper_trait.php index 0283a69..01c58bb 100644 --- a/tests/classes/helper_trait.php +++ b/tests/classes/helper_trait.php @@ -105,6 +105,54 @@ protected function make_scorm_zip(array $elpxentries): string { return $zip; } + /** + * Builds a zip carrying a root content.xml (a genuine ODE 2.0 source) plus an + * index.html, with NO embedded .elpx. + * + * This is the shape of an eXeLearning content .zip / IMS export / web export + * that bundles its source: not named .elpx and not wrapping an .elpx, but still + * a migratable eXeLearning package because content.xml sits at the root. + * + * @return string Absolute path to the built zip. + */ + protected function make_content_xml_zip(): string { + $stage = make_request_directory(); + file_put_contents( + $stage . '/content.xml', + '' + . '' + ); + file_put_contents($stage . '/index.html', 'webweb export'); + $zip = make_request_directory() . '/content-xml.zip'; + get_file_packer('application/zip')->archive_to_pathname([ + 'content.xml' => $stage . '/content.xml', + 'index.html' => $stage . '/index.html', + ], $zip); + return $zip; + } + + /** + * Builds a legacy eXeLearning .elp zip: a contentv3.xml source with neither a + * content.xml nor an embedded .elpx. + * + * Used to assert that legacy .elp projects are out of scope (only content.xml + * packages migrate) and are reported nosource rather than migrated. + * + * @return string Absolute path to the built zip. + */ + protected function make_legacy_elp_zip(): string { + $stage = make_request_directory(); + file_put_contents( + $stage . '/contentv3.xml', + '' + ); + $zip = make_request_directory() . '/legacy.elp'; + get_file_packer('application/zip')->archive_to_pathname([ + 'contentv3.xml' => $stage . '/contentv3.xml', + ], $zip); + return $zip; + } + /** * Creates a fake sibling activity so list_sources() can be tested without the * sibling plugin installed: a minimal sibling table, a {modules} registration, a diff --git a/tests/local/migration/package_probe_test.php b/tests/local/migration/package_probe_test.php new file mode 100644 index 0000000..78a9a85 --- /dev/null +++ b/tests/local/migration/package_probe_test.php @@ -0,0 +1,183 @@ +. + +namespace mod_exelearning\local\migration; + +use advanced_testcase; +use mod_exelearning\local\migration\source\classification; +use mod_exelearning\local\migration\source\package_probe; +use mod_exelearning\tests\helper_trait; + +defined('MOODLE_INTERNAL') || die(); + +global $CFG; +require_once($CFG->dirroot . '/mod/exelearning/lib.php'); + +/** + * Unit tests for the shared content-based package probe (issue #13 #3, DEC-0050). + * + * The probe is the single source of truth for what is migratable: a package with a + * root content.xml (a native .elpx, a content.xml zip or an IMS export) or one + * embedding exactly one .elpx. Legacy .elp (contentv3.xml), source-less SCORM/web + * exports, several embedded .elpx, and corrupt archives are not migratable. + * + * @package mod_exelearning + * @category test + * @copyright 2026 ATE (Área de Tecnología Educativa) + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + * @covers \mod_exelearning\local\migration\source\package_probe + */ +final class package_probe_test extends advanced_testcase { + use helper_trait; + + /** + * The fixture .elpx path (a genuine package with content.xml at its root). + * + * @return string + */ + private function fixture(): string { + global $CFG; + return $CFG->dirroot . '/mod/exelearning/research/fixtures/elpx/actividad-evaluable.elpx'; + } + + /** + * Stores a file in a fresh module context and returns it as a stored_file. + * + * @param string $srcpath Path to the file to store. + * @param string $filename Stored file name. + * @return \stored_file + */ + private function stored(string $srcpath, string $filename): \stored_file { + [, $ctxid] = $this->create_empty_target(); + return $this->store_sibling_package($ctxid, 'mod_exescorm', $srcpath, $filename, 0); + } + + /** + * A native .elpx (content.xml at its root) is a direct, migratable package. + */ + public function test_native_elpx_is_direct(): void { + $pkg = $this->stored($this->fixture(), 'project.elpx'); + + $verdict = package_probe::classify($pkg); + + $this->assertTrue($verdict->is_ok()); + $this->assertNull($verdict->elpxentry); + $resolved = package_probe::resolve($pkg, $verdict); + $this->assertNotNull($resolved); + $this->assertFileExists($resolved); + } + + /** + * A non-.elpx zip carrying content.xml at its root (eXeLearning content zip / + * IMS / web export with source) is direct and migratable. + */ + public function test_root_content_xml_zip_is_direct(): void { + $pkg = $this->stored($this->make_content_xml_zip(), 'web-export.zip'); + + $verdict = package_probe::classify($pkg); + + $this->assertTrue($verdict->is_ok()); + $this->assertNull($verdict->elpxentry); + $this->assertFileExists(package_probe::resolve($pkg, $verdict)); + } + + /** + * A SCORM zip embedding exactly one .elpx is migratable via that single entry. + */ + public function test_single_embedded_elpx_is_ok(): void { + $pkg = $this->stored($this->make_scorm_zip(['content/elp.elpx']), 'scorm.zip'); + + $verdict = package_probe::classify($pkg); + + $this->assertTrue($verdict->is_ok()); + $this->assertSame('content/elp.elpx', $verdict->elpxentry); + $this->assertFileExists(package_probe::resolve($pkg, $verdict)); + } + + /** + * A SCORM zip embedding more than one .elpx is ambiguous (manual migration). + */ + public function test_multiple_embedded_elpx_is_ambiguous(): void { + $pkg = $this->stored($this->make_scorm_zip(['content/a.elpx', 'backup/b.elpx']), 'scorm.zip'); + + $this->assertSame( + migration_result::STATUS_AMBIGUOUSSOURCE, + package_probe::classify($pkg)->status + ); + $this->assertNull(package_probe::resolve($pkg, package_probe::classify($pkg))); + } + + /** + * A source-less SCORM zip (manifest only, no content.xml, no .elpx) is nosource. + */ + public function test_sourceless_scorm_is_nosource(): void { + $pkg = $this->stored($this->make_scorm_zip([]), 'scorm.zip'); + + $this->assertSame(migration_result::STATUS_NOSOURCE, package_probe::classify($pkg)->status); + $this->assertNull(package_probe::resolve($pkg, package_probe::classify($pkg))); + } + + /** + * A legacy .elp (contentv3.xml only) is out of scope and reported nosource. + */ + public function test_legacy_elp_is_nosource(): void { + $pkg = $this->stored($this->make_legacy_elp_zip(), 'legacy.elp'); + + $this->assertSame(migration_result::STATUS_NOSOURCE, package_probe::classify($pkg)->status); + } + + /** + * A corrupt (unreadable) archive is reported nosource, not an exception. + */ + public function test_corrupt_archive_is_nosource(): void { + $broken = make_request_directory() . '/broken.zip'; + file_put_contents($broken, 'this is not a real zip'); + $pkg = $this->stored($broken, 'broken.zip'); + + $this->assertSame(migration_result::STATUS_NOSOURCE, package_probe::classify($pkg)->status); + // The packer logs a developer-only debugging message for the unreadable zip. + $this->assertDebuggingCalled(); + } + + /** + * The resolved itemid is threaded back into the classification (exeweb fallback). + */ + public function test_itemid_is_threaded_into_classification(): void { + $pkg = $this->stored($this->fixture(), 'project.elpx'); + + $this->assertSame(7, package_probe::classify($pkg, 7)->itemid); + } + + /** + * resolve() returns null for a blocked verdict without touching the package. + */ + public function test_resolve_returns_null_for_blocked_verdict(): void { + $pkg = $this->stored($this->fixture(), 'project.elpx'); + + $this->assertNull(package_probe::resolve($pkg, classification::nosource())); + $this->assertNull(package_probe::resolve($pkg, classification::ambiguoussource())); + } + + /** + * resolve() refuses a hostile embedded entry name even if reached directly, + * as defence in depth behind classify()'s own filtering. + */ + public function test_resolve_rejects_unsafe_embedded_entry(): void { + $pkg = $this->stored($this->make_scorm_zip(['content/elp.elpx']), 'scorm.zip'); + + $this->assertNull(package_probe::resolve($pkg, classification::ok('../evil.elpx'))); + } +} From 1fc0448874a5a15e4718a3435840c1b6c45b4dbe Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:13:13 +0100 Subject: [PATCH 2/6] Route exescorm_source through package_probe Replace the .elpx-filename + embedded-.elpx scan with the shared content-based probe (keeping the external/aiccurl/localsync short-circuit). A non-.elpx package carrying content.xml at its root (an eXeLearning content zip or IMS export) is now migratable as a direct package instead of being reported nosource; a single embedded .elpx still resolves as before. Add tests for the content.xml zip case and the legacy .elp exclusion. --- .../migration/source/exescorm_source.php | 67 ++++--------------- .../local/migration/exescorm_source_test.php | 34 ++++++++++ 2 files changed, 46 insertions(+), 55 deletions(-) diff --git a/classes/local/migration/source/exescorm_source.php b/classes/local/migration/source/exescorm_source.php index b113f0c..612eb87 100644 --- a/classes/local/migration/source/exescorm_source.php +++ b/classes/local/migration/source/exescorm_source.php @@ -108,7 +108,14 @@ public function list_sources(): array { } /** - * Classifies a source by its exescormtype and the layout of its stored package. + * Classifies a source by its exescormtype and the eXeLearning source its + * stored package carries. + * + * External / AICC-URL / synchronized types keep no migratable local snapshot + * and are unsupported up front. Otherwise the stored package is handed to the + * shared {@see package_probe}, which is migratable when it holds a root + * content.xml (a native .elpx, a content.xml zip, or an IMS export) or embeds + * exactly one .elpx. * * @param \stdClass $source A row from list_sources(). * @return classification @@ -125,40 +132,12 @@ public function classify(\stdClass $source): classification { return classification::nosource(); } - if (str_ends_with(strtolower($pkg->get_filename()), '.elpx')) { - // The package is itself the editable .elpx (embedded editor export). - return classification::ok(null); - } - - // SCORM zip: read only the central directory, no extraction (preflight-cheap). - $entries = $pkg->list_files(get_file_packer('application/zip')); - if (!is_array($entries)) { - // Corrupt or unreadable zip. - return classification::nosource(); - } - $elpx = []; - foreach ($entries as $entry) { - if (empty($entry->is_directory) && str_ends_with(strtolower($entry->pathname), '.elpx')) { - // The entry name is attacker-influenced (an uploaded SCORM zip can embed - // an .elpx under a path-traversal / absolute / backslash / stream-wrapper - // name). Drop any unsafe entry so it is never selected for extraction; - // an otherwise-fine package then degrades to nosource, exactly as if it - // carried no usable .elpx at all. - if (\mod_exelearning\local\zip_utils::is_unsafe_zip_entry($entry->pathname)) { - continue; - } - $elpx[] = $entry->pathname; - } - } - return match (count($elpx)) { - 0 => classification::nosource(), - 1 => classification::ok($elpx[0]), - default => classification::ambiguoussource(), - }; + return package_probe::classify($pkg); } /** - * Resolves a readable .elpx temp path: the package itself, or the single embedded entry. + * Resolves a readable package temp path: the package itself, or its single + * embedded .elpx entry. * * @param \stdClass $source A row from list_sources(). * @return string|null @@ -172,29 +151,7 @@ public function resolve_elpx(\stdClass $source): ?string { if (!$pkg) { return null; } - $tmpdir = make_request_directory(); - if ($verdict->elpxentry === null) { - // Direct .elpx package: copy it out verbatim. - $tmp = $tmpdir . '/source.elpx'; - $pkg->copy_content_to($tmp); - return $tmp; - } - // Defence in depth: classify() already drops unsafe entries, but re-check here - // so resolve_elpx() never extracts a hostile name even if reached directly. - if (\mod_exelearning\local\zip_utils::is_unsafe_zip_entry($verdict->elpxentry)) { - return null; - } - // Extract ONLY the embedded entry, not the whole SCORM. The packer drops the - // $onlyfiles filter when handed a stored_file, so copy the zip out first and - // extract from the path (cheap: one small entry instead of the whole package). - $ziptmp = $tmpdir . '/scorm.zip'; - $pkg->copy_content_to($ziptmp); - get_file_packer('application/zip')->extract_to_pathname($ziptmp, $tmpdir, [$verdict->elpxentry]); - // Verify nothing escaped $tmpdir (no symlinks, every materialised path stays - // inside it) before trusting the resolved path. - \mod_exelearning\local\zip_utils::assert_extraction_contained($tmpdir, 'migrateextractfailed'); - $path = $tmpdir . '/' . $verdict->elpxentry; - return is_file($path) ? $path : null; + return package_probe::resolve($pkg, $verdict); } /** diff --git a/tests/local/migration/exescorm_source_test.php b/tests/local/migration/exescorm_source_test.php index 8731d81..736937a 100644 --- a/tests/local/migration/exescorm_source_test.php +++ b/tests/local/migration/exescorm_source_test.php @@ -101,6 +101,40 @@ public function test_zip_without_elpx_is_nosource(): void { $this->assertNull($src->resolve_elpx($source)); } + /** + * A non-.elpx package carrying content.xml at its root (an eXeLearning content + * .zip / IMS export) is now migratable as a direct package. The old handler only + * matched a .elpx filename or an embedded .elpx, so this previously reported + * nosource; content-based detection recovers it. + */ + public function test_content_xml_zip_without_elpx_is_ok(): void { + [, $ctxid] = $this->create_empty_target(); + $zip = $this->make_content_xml_zip(); + $this->store_sibling_package($ctxid, 'mod_exescorm', $zip, 'content-export.zip', 0); + $source = $this->make_source_row(['contextid' => $ctxid, 'exescormtype' => 'local']); + + $src = new exescorm_source(); + $verdict = $src->classify($source); + + $this->assertTrue($verdict->is_ok()); + $this->assertNull($verdict->elpxentry); + $this->assertFileExists($src->resolve_elpx($source)); + } + + /** + * A legacy .elp (contentv3.xml, no content.xml) is out of scope: nosource. + */ + public function test_legacy_elp_is_nosource(): void { + [, $ctxid] = $this->create_empty_target(); + $zip = $this->make_legacy_elp_zip(); + $this->store_sibling_package($ctxid, 'mod_exescorm', $zip, 'legacy.elp', 0); + $source = $this->make_source_row(['contextid' => $ctxid, 'exescormtype' => 'local']); + + $src = new exescorm_source(); + $this->assertSame(migration_result::STATUS_NOSOURCE, $src->classify($source)->status); + $this->assertNull($src->resolve_elpx($source)); + } + /** * A SCORM zip embedding more than one .elpx is ambiguous (manual migration). */ From e250694418bb2341b2dd3daa40e5de9fa14df285 Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:13:13 +0100 Subject: [PATCH 3/6] Route exeweb_source through package_probe Locate the stored package (revision itemid, with the restored-backup fallback scan) and defer migratability to the shared content-based probe. A package with no recoverable content.xml source is now reported nosource cleanly instead of classifying ok and failing at extraction. Add tests for the content.xml gate. --- .../local/migration/source/exeweb_source.php | 81 +++++++++++-------- tests/local/migration/exeweb_source_test.php | 33 ++++++++ 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/classes/local/migration/source/exeweb_source.php b/classes/local/migration/source/exeweb_source.php index 1b46bdf..c61a571 100644 --- a/classes/local/migration/source/exeweb_source.php +++ b/classes/local/migration/source/exeweb_source.php @@ -25,7 +25,7 @@ namespace mod_exelearning\local\migration\source; /** - * Treats mod_exeweb activities as read-only sources of native .elpx packages. + * Treats mod_exeweb activities as read-only sources of eXeLearning packages. * * mod_exeweb stores its package at itemid = {exeweb}.revision (see * mod_exeweb/classes/exeweb_package.php save_draft_file(), which calls @@ -34,6 +34,11 @@ * import_service read itemid 0 unconditionally and reported every real exeweb * activity as nosource; this handler fixes that, with a fallback scan for revision * drift (e.g. restored backups). + * + * Once located, the stored package is handed to the shared {@see package_probe} so + * migratability is decided by content (a recoverable eXeLearning content.xml) + * rather than assumed: a legacy package without an ODE source is reported nosource + * instead of being created as a degraded activity. */ final class exeweb_source implements source_interface { /** @@ -81,12 +86,48 @@ public function list_sources(): array { } /** - * Classifies a source: the package must exist in the `package` filearea. + * Classifies a source: locate the stored package, then defer to the shared + * content-based probe (migratable only when an eXeLearning content.xml is + * recoverable). The resolved itemid is threaded through so resolve_elpx() does + * not have to re-derive it. * * @param \stdClass $source A row from list_sources(). * @return classification */ public function classify(\stdClass $source): classification { + [$pkg, $itemid] = $this->locate_package($source); + if (!$pkg) { + return classification::nosource(); + } + return package_probe::classify($pkg, $itemid); + } + + /** + * Resolves the package (or its embedded .elpx) to a temporary path. + * + * @param \stdClass $source A row from list_sources(). + * @return string|null + */ + public function resolve_elpx(\stdClass $source): ?string { + [$pkg, $itemid] = $this->locate_package($source); + if (!$pkg) { + return null; + } + return package_probe::resolve($pkg, package_probe::classify($pkg, $itemid)); + } + + /** + * Locates the stored mod_exeweb package and the itemid it lives at. + * + * mod_exeweb stores the package at itemid = {exeweb}.revision and wipes the + * filearea on each save, so the documented location is tried first and a + * filearea scan (newest itemid wins) covers revision drift, e.g. restored + * backups. + * + * @param \stdClass $source A row from list_sources(). + * @return array{0:\stored_file|null,1:int} The package file (or null) and its itemid. + */ + private function locate_package(\stdClass $source): array { $fs = get_file_storage(); // Primary: the documented location, itemid = {exeweb}.revision. $files = $fs->get_area_files( @@ -98,7 +139,7 @@ public function classify(\stdClass $source): classification { false ); if ($files) { - return classification::ok(null, (int) $source->revision); + return [reset($files), (int) $source->revision]; } // Fallback: scan every itemid (covers revision drift, e.g. restored backups). $all = $fs->get_area_files( @@ -110,39 +151,11 @@ public function classify(\stdClass $source): classification { false ); if (!$all) { - return classification::nosource(); + return [null, 0]; } // The filearea is wiped on each save, so >1 file means drift: newest itemid wins. - return classification::ok(null, (int) reset($all)->get_itemid()); - } - - /** - * Copies the native .elpx out to a temporary path. - * - * @param \stdClass $source A row from list_sources(). - * @return string|null - */ - public function resolve_elpx(\stdClass $source): ?string { - $verdict = $this->classify($source); - if (!$verdict->is_ok()) { - return null; - } - $fs = get_file_storage(); - $files = $fs->get_area_files( - (int) $source->contextid, - 'mod_exeweb', - 'package', - $verdict->itemid, - 'id ASC', - false - ); - $pkg = reset($files); - if (!$pkg) { - return null; - } - $tmp = make_request_directory() . '/source.elpx'; - $pkg->copy_content_to($tmp); - return $tmp; + $pkg = reset($all); + return [$pkg, (int) $pkg->get_itemid()]; } /** diff --git a/tests/local/migration/exeweb_source_test.php b/tests/local/migration/exeweb_source_test.php index 977c4ea..0d34f56 100644 --- a/tests/local/migration/exeweb_source_test.php +++ b/tests/local/migration/exeweb_source_test.php @@ -98,6 +98,39 @@ public function test_empty_area_is_nosource(): void { $this->assertNull($src->resolve_elpx($source)); } + /** + * A content.xml zip (not named .elpx) stored by mod_exeweb is migratable: the + * handler classifies by content, not by filename. + */ + public function test_content_xml_zip_is_ok(): void { + [, $ctxid] = $this->create_empty_target(); + $this->store_sibling_package($ctxid, 'mod_exeweb', $this->make_content_xml_zip(), 'web.zip', 2); + $source = $this->make_source_row(['contextid' => $ctxid, 'revision' => 2]); + + $src = new exeweb_source(); + $verdict = $src->classify($source); + + $this->assertTrue($verdict->is_ok()); + $this->assertSame(2, $verdict->itemid); + $this->assertFileExists($src->resolve_elpx($source)); + } + + /** + * A stored package with no recoverable content.xml source (here a legacy .elp + * carrying contentv3.xml) is now reported nosource instead of being classified + * migratable and failing at extraction. The old handler returned ok whenever any + * file existed in the filearea, without inspecting it. + */ + public function test_package_without_content_xml_is_nosource(): void { + [, $ctxid] = $this->create_empty_target(); + $this->store_sibling_package($ctxid, 'mod_exeweb', $this->make_legacy_elp_zip(), 'legacy.elp', 1); + $source = $this->make_source_row(['contextid' => $ctxid, 'revision' => 1]); + + $src = new exeweb_source(); + $this->assertSame(migration_result::STATUS_NOSOURCE, $src->classify($source)->status); + $this->assertNull($src->resolve_elpx($source)); + } + /** * list_sources() enumerates site-wide exeweb activities with their revision. */ From f2e3614bcd1457ee70f8e3ed4af65cd4b1299c91 Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:13:13 +0100 Subject: [PATCH 4/6] Document the content.xml migration criterion in lang strings Refine migratescormnote and migratestatus_nosource to state that activities are migrated only when an eXeLearning content.xml source can be recovered (root content.xml or a single embedded .elpx), and that legacy .elp and source-less SCORM/web exports are skipped. Reuses the existing nosource bucket; no new keys. --- lang/en/exelearning.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lang/en/exelearning.php b/lang/en/exelearning.php index a85447d..3ef4369 100644 --- a/lang/en/exelearning.php +++ b/lang/en/exelearning.php @@ -264,12 +264,12 @@ $string['migratepreflightready'] = 'Ready to migrate: {$a}'; $string['migratepreflightsummary'] = 'Total: {$a->total}. Already migrated: {$a->alreadymigrated}. Ready: {$a->migratable}. Blocked: {$a->blocked}.'; $string['migrateprogress'] = 'Migrating {$a->done}/{$a->total}: {$a->name}'; -$string['migratescormnote'] = 'SCORM activities are migrated only when an editable eXeLearning source can be recovered: either the stored package is itself an .elpx, or the SCORM zip embeds exactly one .elpx. Their grades are copied to the overall grade. Packages with no embedded source, with several embedded .elpx files, hosted externally, or kept in sync with an external URL are skipped.'; +$string['migratescormnote'] = 'SCORM activities are migrated only when an editable eXeLearning source can be recovered: the stored package contains content.xml at its root (a .elpx, an eXeLearning content .zip or an IMS export), or the SCORM zip embeds exactly one .elpx. Their grades are copied to the overall grade. Packages with no recoverable content.xml source (including legacy .elp projects and source-less SCORM or web exports), with several embedded .elpx files, hosted externally, or kept in sync with an external URL are skipped.'; $string['migratestatus_alreadymigrated'] = 'Already migrated'; $string['migratestatus_ambiguoussource'] = 'Skipped (multiple embedded .elpx files — migrate manually)'; $string['migratestatus_error'] = 'Error'; $string['migratestatus_migrated'] = 'Migrated'; -$string['migratestatus_nosource'] = 'Skipped (no importable source)'; +$string['migratestatus_nosource'] = 'Skipped (no eXeLearning content.xml source to import)'; $string['migratestatus_unsupported'] = 'Skipped (externally hosted or synchronized package)'; $string['migratesummary'] = 'Migrated: {$a->migrated}. Already migrated: {$a->alreadymigrated}. Skipped (no source): {$a->nosource}. Skipped (ambiguous): {$a->ambiguoussource}. Skipped (unsupported): {$a->unsupported}. Errors: {$a->error}.'; $string['migratetitle'] = 'Migrate to eXeLearning'; From 93f4bf8a75bee2e4297206b0a6e8bf5a3d8ea8d0 Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:13:13 +0100 Subject: [PATCH 5/6] Test end-to-end install of a content.xml zip via exescorm_source Cover the detection gap through resolution and installation: a content.xml package stored under a non-.elpx name is detected, resolved and installed into a servable, graded activity. --- .../migration/migration_service_test.php | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/local/migration/migration_service_test.php b/tests/local/migration/migration_service_test.php index 1e8aaaa..91a2918 100644 --- a/tests/local/migration/migration_service_test.php +++ b/tests/local/migration/migration_service_test.php @@ -18,6 +18,7 @@ use advanced_testcase; use mod_exelearning\local\migration\source\classification; +use mod_exelearning\local\migration\source\exescorm_source; use mod_exelearning\local\migration\source\source_interface; use mod_exelearning\tests\helper_trait; use mod_exelearning\tests\stub_source; @@ -104,6 +105,48 @@ public function test_install_package_extracts_and_syncs_grade_items(): void { )); } + /** + * End-to-end: a content.xml package stored under a non-.elpx name is detected by + * exescorm_source, resolved, and installs into a servable, graded activity. + * + * Covers the detection gap (a content.xml package that is neither named .elpx nor + * embeds an .elpx) through resolution and installation: the old handler reported + * it nosource and never reached install_package(). + */ + public function test_content_xml_zip_resolves_and_installs(): void { + global $DB; + [$instance, $ctxid] = $this->create_empty_target(); + + // A separate source context holds the genuine package under a .zip name. + [, $srcctxid] = $this->create_empty_target(); + $this->store_sibling_package($srcctxid, 'mod_exescorm', $this->fixture(), 'content-export.zip', 0); + $source = $this->make_source_row(['contextid' => $srcctxid, 'exescormtype' => 'local']); + + $src = new exescorm_source(); + $verdict = $src->classify($source); + $this->assertTrue($verdict->is_ok()); + $this->assertNull($verdict->elpxentry); + + $path = $src->resolve_elpx($source); + $this->assertNotNull($path); + + migration_service::install_package($path, $instance, $ctxid); + + $fs = get_file_storage(); + $this->assertNotEmpty($fs->get_file( + $ctxid, + 'mod_exelearning', + 'content', + (int) $instance->revision, + '/', + 'index.html' + )); + $this->assertSame(2, $DB->count_records( + 'exelearning_grade_item', + ['exelearningid' => $instance->id, 'deleted' => 0] + )); + } + /** * install_package() rejects a corrupt package instead of recording an empty shell. */ From 80c3990bdd53433618b76fc13ce0178c5aed4ee3 Mon Sep 17 00:00:00 2001 From: erseco Date: Mon, 22 Jun 2026 09:13:13 +0100 Subject: [PATCH 6/6] Add research note AN-015 on content.xml migration detection Spanish research note documenting the package variants each legacy plugin stores, the content.xml migratability criterion and verdict table, why legacy .elp and source-less SCORM/web exports are excluded (no data loss; source kept), and the test matrix. --- .../AN-015-deteccion-content-xml-migracion.md | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 research/analisis/notas/AN-015-deteccion-content-xml-migracion.md diff --git a/research/analisis/notas/AN-015-deteccion-content-xml-migracion.md b/research/analisis/notas/AN-015-deteccion-content-xml-migracion.md new file mode 100644 index 0000000..d1a710c --- /dev/null +++ b/research/analisis/notas/AN-015-deteccion-content-xml-migracion.md @@ -0,0 +1,128 @@ +--- +id: AN-015 +titulo: "Detección por contenido en la migración legacy: criterio content.xml" +fecha: 2026-06-22 +fuentes: + - REPO-001 + - REPO-002 + - REPO-004 + - REPO-005 +relacionados: + - AN-004 + - DEC-0026 + - DEC-0050 +herramienta_ia: + interfaz: claude-code + modelo: claude-opus-4-8 +--- + +## Resumen + +Decisión explícita del producto (@erseco, 2026-06-22): la migración desde +`mod_exeweb` y `mod_exescorm` hacia `mod_exelearning` migra **únicamente los +paquetes de los que se puede recuperar un `content.xml` (ODE 2.0) de +eXeLearning**. Los proyectos `.elp` legacy (que llevan `contentv3.xml`, no +`content.xml`) y los paquetes SCORM/web "finales" sin fuente eXeLearning **no se +migran**: se detectan y se reportan, y la actividad legacy original se conserva +intacta (la migración nunca borra el origen, así que omitir un paquete no pierde +datos). Esto extiende AN-004 al lado de la migración: el plugin trabaja con la +fuente ODE `content.xml`, y el editor embebido, la sincronización de items +calificables y la validación del paquete dependen de ella. + +## Hechos citados (análisis de código) + +- **mod_exeweb** (REPO-002): una sola tabla `exeweb`; el paquete se guarda en la + filearea `package` con `itemid = {exeweb}.revision`, y los ficheros extraídos en + `content`. La validación de subida exige un fichero que case + `/^content(v\d+)?\.xml$/` (admite `content.xml` y el legacy `contentv3.xml`). + No hay campo de tipo de paquete: el origen (local/embedded/exeonline) no se + persiste. El fichero de arranque se guarda en `entrypath` + `entryname`. +- **mod_exescorm** (REPO-001): fork de `mod_scorm`. El campo `exescormtype` + distingue `local`, `localsync`, `embedded` (un `.elpx` guardado tal cual en + `package`, sin parsear como SCORM), `external` y `aiccurl` (URLs remotas, sin + copia local). El paquete local se extrae a la filearea `content`; el SCO de + arranque se resuelve desde `imsmanifest.xml` y se guarda en + `exescorm_scoes.launch`. +- **Exports de eXeLearning v4** (REPO-005): `.elpx`, IMS y export web "con + fuente" llevan `content.xml` en la raíz; los exports SCORM 1.2/2004 llevan + `imsmanifest.xml` y **no** `content.xml`; el export web simple lleva + `index.html` y **no** `content.xml`; el `.elp` legacy lleva `contentv3.xml`. El + marcador fiable y estable es `content.xml` (raíz) ``. +- **mod_exelearning** (estado previo): el motor de extracción + (`package_manager::extract_stored()`) ya es agnóstico al tipo — su única guarda + es un `index.html` en la raíz del contenido extraído. La carencia estaba en la + **detección de origen**: + - `exescorm_source` solo consideraba migrable un paquete cuyo nombre acababa en + `.elpx` o un ZIP SCORM que embebía un `.elpx`; un `.zip` con `content.xml` en + la raíz se reportaba `nosource`. + - `exeweb_source` no inspeccionaba el contenido: devolvía `ok` con que + existiera cualquier fichero en `package`, de modo que un paquete sin fuente + fallaba en la extracción (`STATUS_ERROR`). + +## Regla unificada (única fuente de verdad) + +Inspeccionando solo el directorio central del ZIP (sin extraer): + +| Forma del paquete | Veredicto | Resolución | +|---|---|---| +| `content.xml` en la **raíz** (`.elpx`, `.zip` con fuente, IMS, export web con fuente) | `ok` (directo) | copiar el paquete completo | +| exactamente **un** `*.elpx` embebido y seguro | `ok` (embebido) | extraer solo esa entrada | +| **más de un** `*.elpx` embebido | `ambiguoussource` | — | +| ninguno: `.elp` legacy (`contentv3.xml`), SCORM/web sin fuente, ZIP corrupto | `nosource` | — | + +Precedencia: un `content.xml` en raíz gana sobre el rastreo de `.elpx` embebidos. +Un `.elpx` nativo es un ZIP con `content.xml` en su raíz, así que resuelve a +`ok` directo igual que hoy — **sin regresión** en la migración `.elpx` existente. + +## [INTERPRETACION] Por qué se excluyen .elp legacy y SCORM/web sin fuente + +- Sin `content.xml` no hay proyecto ODE editable: el editor embebido no podría + reabrir la actividad y la detección de items calificables (`grade_sync`, que + parsea `content.xml`) no tendría nada que leer. Migrar tal paquete crearía una + actividad degradada. +- La migración es **no destructiva** (DEC-0026/DEC-0050): la actividad legacy + permanece operativa. Omitir un paquete no migrable no pierde datos; el + administrador sigue usando el plugin legacy o reexporta con fuente. +- El `.elp` legacy queda fuera de alcance por decisión de producto (coherente con + AN-004): no se contempla soporte del formato Python v2/v3. + +## Cambios en el código + +- Nuevo `classes/local/migration/source/package_probe.php`: clasificador y + resolución basados en contenido, compartidos por ambos handlers + (`classify()` + `resolve()`). Reutiliza `zip_utils` para las defensas de + path-traversal / symlink ya existentes. +- `exescorm_source` y `exeweb_source` delegan en `package_probe` + (`exescorm` mantiene el atajo `external`/`aiccurl`/`localsync` → `unsupported`; + `exeweb` mantiene la resolución de `itemid` por revisión y su fallback). +- `lang/en/exelearning.php`: se afinan `migratescormnote` y + `migratestatus_nosource` para nombrar el criterio `content.xml` (se reutiliza el + bucket `nosource`; sin estados/constantes nuevos). + +## Matriz de tests + +| Caso | Origen | Resultado esperado | +|---|---|---| +| `.elpx` nativo | exeweb / exescorm | migrado (directo) | +| `.zip` con `content.xml` en raíz (export web/IMS con fuente) | exeweb / exescorm | migrado (directo) — **caso nuevo** | +| ZIP SCORM con 1 `.elpx` embebido | exescorm | migrado (embebido) | +| ZIP SCORM con >1 `.elpx` | exescorm | `ambiguoussource` | +| SCORM sin fuente (solo `imsmanifest.xml`) | exescorm | `nosource` | +| `.elp` legacy (`contentv3.xml`) | exeweb / exescorm | `nosource` | +| Tipos `external` / `aiccurl` / `localsync` | exescorm | `unsupported` | +| ZIP corrupto | exeweb / exescorm | `nosource` (sin excepción) | + +Cubierta por `package_probe_test`, `exeweb_source_test`, `exescorm_source_test`, +`exescorm_source_security_test` y un test extremo a extremo en +`migration_service_test`. + +## [PENDIENTE] + +- La validación del `.elpx` embebido se basa en su extensión (no se abre el ZIP + anidado en preflight por coste); la guarda de extracción (`index.html`) actúa de + red de seguridad. Verificar `content.xml` dentro del `.elpx` anidado quedaría + para una iteración futura si aparecieran paquetes patológicos. +- Los paquetes SCORM/web sin fuente eXeLearning no se preservan como contenido + estático. Si en el futuro se quisiera ofrecer esa preservación, sería una + decisión de producto aparte (no contemplada aquí).