Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 12 additions & 55 deletions classes/local/migration/source/exescorm_source.php
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,14 @@ public function list_sources(): array {
}

/**
* Classifies a source by its exescormtype and the layout of its stored package.
* Classifies a source by its exescormtype and the eXeLearning source its
* stored package carries.
*
* External / AICC-URL / synchronized types keep no migratable local snapshot
* and are unsupported up front. Otherwise the stored package is handed to the
* shared {@see package_probe}, which is migratable when it holds a root
* content.xml (a native .elpx, a content.xml zip, or an IMS export) or embeds
* exactly one .elpx.
*
* @param \stdClass $source A row from list_sources().
* @return classification
Expand All @@ -125,40 +132,12 @@ public function classify(\stdClass $source): classification {
return classification::nosource();
}

if (str_ends_with(strtolower($pkg->get_filename()), '.elpx')) {
// The package is itself the editable .elpx (embedded editor export).
return classification::ok(null);
}

// SCORM zip: read only the central directory, no extraction (preflight-cheap).
$entries = $pkg->list_files(get_file_packer('application/zip'));
if (!is_array($entries)) {
// Corrupt or unreadable zip.
return classification::nosource();
}
$elpx = [];
foreach ($entries as $entry) {
if (empty($entry->is_directory) && str_ends_with(strtolower($entry->pathname), '.elpx')) {
// The entry name is attacker-influenced (an uploaded SCORM zip can embed
// an .elpx under a path-traversal / absolute / backslash / stream-wrapper
// name). Drop any unsafe entry so it is never selected for extraction;
// an otherwise-fine package then degrades to nosource, exactly as if it
// carried no usable .elpx at all.
if (\mod_exelearning\local\zip_utils::is_unsafe_zip_entry($entry->pathname)) {
continue;
}
$elpx[] = $entry->pathname;
}
}
return match (count($elpx)) {
0 => classification::nosource(),
1 => classification::ok($elpx[0]),
default => classification::ambiguoussource(),
};
return package_probe::classify($pkg);
}

/**
* Resolves a readable .elpx temp path: the package itself, or the single embedded entry.
* Resolves a readable package temp path: the package itself, or its single
* embedded .elpx entry.
*
* @param \stdClass $source A row from list_sources().
* @return string|null
Expand All @@ -172,29 +151,7 @@ public function resolve_elpx(\stdClass $source): ?string {
if (!$pkg) {
return null;
}
$tmpdir = make_request_directory();
if ($verdict->elpxentry === null) {
// Direct .elpx package: copy it out verbatim.
$tmp = $tmpdir . '/source.elpx';
$pkg->copy_content_to($tmp);
return $tmp;
}
// Defence in depth: classify() already drops unsafe entries, but re-check here
// so resolve_elpx() never extracts a hostile name even if reached directly.
if (\mod_exelearning\local\zip_utils::is_unsafe_zip_entry($verdict->elpxentry)) {
return null;
}
// Extract ONLY the embedded entry, not the whole SCORM. The packer drops the
// $onlyfiles filter when handed a stored_file, so copy the zip out first and
// extract from the path (cheap: one small entry instead of the whole package).
$ziptmp = $tmpdir . '/scorm.zip';
$pkg->copy_content_to($ziptmp);
get_file_packer('application/zip')->extract_to_pathname($ziptmp, $tmpdir, [$verdict->elpxentry]);
// Verify nothing escaped $tmpdir (no symlinks, every materialised path stays
// inside it) before trusting the resolved path.
\mod_exelearning\local\zip_utils::assert_extraction_contained($tmpdir, 'migrateextractfailed');
$path = $tmpdir . '/' . $verdict->elpxentry;
return is_file($path) ? $path : null;
return package_probe::resolve($pkg, $verdict);
}

/**
Expand Down
81 changes: 47 additions & 34 deletions classes/local/migration/source/exeweb_source.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
namespace mod_exelearning\local\migration\source;

/**
* Treats mod_exeweb activities as read-only sources of native .elpx packages.
* Treats mod_exeweb activities as read-only sources of eXeLearning packages.
*
* mod_exeweb stores its package at itemid = {exeweb}.revision (see
* mod_exeweb/classes/exeweb_package.php save_draft_file(), which calls
Expand All @@ -34,6 +34,11 @@
* import_service read itemid 0 unconditionally and reported every real exeweb
* activity as nosource; this handler fixes that, with a fallback scan for revision
* drift (e.g. restored backups).
*
* Once located, the stored package is handed to the shared {@see package_probe} so
* migratability is decided by content (a recoverable eXeLearning content.xml)
* rather than assumed: a legacy package without an ODE source is reported nosource
* instead of being created as a degraded activity.
*/
final class exeweb_source implements source_interface {
/**
Expand Down Expand Up @@ -81,12 +86,48 @@ public function list_sources(): array {
}

/**
* Classifies a source: the package must exist in the `package` filearea.
* Classifies a source: locate the stored package, then defer to the shared
* content-based probe (migratable only when an eXeLearning content.xml is
* recoverable). The resolved itemid is threaded through so resolve_elpx() does
* not have to re-derive it.
*
* @param \stdClass $source A row from list_sources().
* @return classification
*/
public function classify(\stdClass $source): classification {
[$pkg, $itemid] = $this->locate_package($source);
if (!$pkg) {
return classification::nosource();
}
return package_probe::classify($pkg, $itemid);
}

/**
* Resolves the package (or its embedded .elpx) to a temporary path.
*
* @param \stdClass $source A row from list_sources().
* @return string|null
*/
public function resolve_elpx(\stdClass $source): ?string {
[$pkg, $itemid] = $this->locate_package($source);
if (!$pkg) {
return null;
}
return package_probe::resolve($pkg, package_probe::classify($pkg, $itemid));
}

/**
* Locates the stored mod_exeweb package and the itemid it lives at.
*
* mod_exeweb stores the package at itemid = {exeweb}.revision and wipes the
* filearea on each save, so the documented location is tried first and a
* filearea scan (newest itemid wins) covers revision drift, e.g. restored
* backups.
*
* @param \stdClass $source A row from list_sources().
* @return array{0:\stored_file|null,1:int} The package file (or null) and its itemid.
*/
private function locate_package(\stdClass $source): array {
$fs = get_file_storage();
// Primary: the documented location, itemid = {exeweb}.revision.
$files = $fs->get_area_files(
Expand All @@ -98,7 +139,7 @@ public function classify(\stdClass $source): classification {
false
);
if ($files) {
return classification::ok(null, (int) $source->revision);
return [reset($files), (int) $source->revision];
}
// Fallback: scan every itemid (covers revision drift, e.g. restored backups).
$all = $fs->get_area_files(
Expand All @@ -110,39 +151,11 @@ public function classify(\stdClass $source): classification {
false
);
if (!$all) {
return classification::nosource();
return [null, 0];
}
// The filearea is wiped on each save, so >1 file means drift: newest itemid wins.
return classification::ok(null, (int) reset($all)->get_itemid());
}

/**
* Copies the native .elpx out to a temporary path.
*
* @param \stdClass $source A row from list_sources().
* @return string|null
*/
public function resolve_elpx(\stdClass $source): ?string {
$verdict = $this->classify($source);
if (!$verdict->is_ok()) {
return null;
}
$fs = get_file_storage();
$files = $fs->get_area_files(
(int) $source->contextid,
'mod_exeweb',
'package',
$verdict->itemid,
'id ASC',
false
);
$pkg = reset($files);
if (!$pkg) {
return null;
}
$tmp = make_request_directory() . '/source.elpx';
$pkg->copy_content_to($tmp);
return $tmp;
$pkg = reset($all);
return [$pkg, (int) $pkg->get_itemid()];
}

/**
Expand Down
155 changes: 155 additions & 0 deletions classes/local/migration/source/package_probe.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.

/**
* Content-based detection of a migratable eXeLearning source inside a stored
* legacy package (issue #13 #3, DEC-0050).
*
* @package mod_exelearning
* @copyright 2026 ATE (Área de Tecnología Educativa)
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

namespace mod_exelearning\local\migration\source;

use mod_exelearning\local\zip_utils;

/**
* Decides whether a stored legacy package carries a migratable eXeLearning source
* and resolves it to a temporary path for installation.
*
* The migratability marker is an eXeLearning ODE 2.0 `content.xml`. A package is
* migratable when:
* - `content.xml` sits at the archive root — a native `.elpx`, a content.xml
* `.zip`, an IMS Content Package, or an eXeLearning web export that bundles its
* source (resolved by installing the whole archive); or
* - the archive embeds exactly one safe `.elpx` — an eXeLearning SCORM export
* wrapping its editable source (resolved by extracting only that entry).
*
* Everything else is not migratable: legacy `.elp` (which carries `contentv3.xml`,
* not `content.xml`), a source-less SCORM package, a plain web export with no
* bundled source, more than one embedded `.elpx` (ambiguous), or a corrupt/
* unreadable archive. The caller leaves the legacy activity untouched — the
* migration never deletes the source, so a skipped package loses no data.
*
* Both source handlers (mod_exeweb, mod_exescorm) share this single detector so
* the rule lives in exactly one place. Detection reads only the ZIP central
* directory (no extraction), keeping the preflight pass cheap.
*/
final class package_probe {
/**
* The ODE 2.0 source marker expected at the archive root.
*
* Mirrors {@see \mod_exelearning\local\package_manager::validate_content_xml()},
* the same root-level `content.xml` test the upload form uses to accept a package.
*
* @var string
*/
private const CONTENT_XML = 'content.xml';

/**
* Classifies a stored package by the eXeLearning source it carries.
*
* Never extracts and never throws: an unreadable archive is downgraded to a
* nosource classification, matching the source_interface contract.
*
* @param \stored_file $pkg The stored legacy package (any zip/.elpx).
* @param int|null $itemid Resolved package itemid, threaded back into the
* classification for the mod_exeweb revision fallback.
* @return classification
*/
public static function classify(\stored_file $pkg, ?int $itemid = null): classification {
// Read only the central directory (preflight-cheap): no extraction.
$entries = $pkg->list_files(get_file_packer('application/zip'));
if (!is_array($entries)) {
// Corrupt or unreadable archive: nothing we can recover.
return classification::nosource();
}

$elpx = [];
foreach ($entries as $entry) {
if (!empty($entry->is_directory)) {
continue;
}
// A content.xml at the archive root is the genuine ODE 2.0 marker and
// takes precedence: the whole archive is installable as-is, exactly
// like a native .elpx (which is itself a zip with content.xml at root).
if ($entry->pathname === self::CONTENT_XML) {
return classification::ok(null, $itemid);
}
if (str_ends_with(strtolower($entry->pathname), '.elpx')) {
// The entry name is attacker-influenced (an uploaded SCORM zip can
// embed an .elpx under a path-traversal / absolute / backslash /
// stream-wrapper name). Drop any unsafe entry so it is never
// selected for extraction; an otherwise-fine package then degrades
// to nosource, exactly as if it carried no usable .elpx at all.
if (zip_utils::is_unsafe_zip_entry($entry->pathname)) {
continue;
}
$elpx[] = $entry->pathname;
}
}

return match (count($elpx)) {
0 => classification::nosource(),
1 => classification::ok($elpx[0], $itemid),
default => classification::ambiguoussource(),
};
}

/**
* Resolves a classified package to a readable temporary path.
*
* Returns null when the verdict is not migratable. For a root-content.xml
* package the whole archive is copied out verbatim (install_package() extracts
* and validates it downstream). For an embedded .elpx only that single entry is
* extracted, with the same path-traversal / symlink defences the rest of the
* plugin uses.
*
* @param \stored_file $pkg The stored legacy package.
* @param classification $verdict The verdict returned by classify().
* @return string|null Absolute path to a temporary package, or null.
*/
public static function resolve(\stored_file $pkg, classification $verdict): ?string {
if (!$verdict->is_ok()) {
return null;
}
$tmpdir = make_request_directory();
if ($verdict->elpxentry === null) {
// Direct package (native .elpx or content.xml-bearing zip): copy it out
// verbatim. install_package() extracts and validates it downstream.
$tmp = $tmpdir . '/source.elpx';
$pkg->copy_content_to($tmp);
return $tmp;
}
// Defence in depth: classify() already drops unsafe entries, but re-check
// here so resolve() never extracts a hostile name even if reached directly.
if (zip_utils::is_unsafe_zip_entry($verdict->elpxentry)) {
return null;
}
// Extract ONLY the embedded entry, not the whole archive. The packer drops
// the $onlyfiles filter when handed a stored_file, so copy the archive out
// first and extract from the path (cheap: one small entry).
$ziptmp = $tmpdir . '/package.zip';
$pkg->copy_content_to($ziptmp);
get_file_packer('application/zip')->extract_to_pathname($ziptmp, $tmpdir, [$verdict->elpxentry]);
// Verify nothing escaped $tmpdir (no symlinks, every materialised path stays
// inside it) before trusting the resolved path.
zip_utils::assert_extraction_contained($tmpdir, 'migrateextractfailed');
$path = $tmpdir . '/' . $verdict->elpxentry;
return is_file($path) ? $path : null;
}
}
4 changes: 2 additions & 2 deletions lang/en/exelearning.php
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,12 @@
$string['migratepreflightready'] = 'Ready to migrate: {$a}';
$string['migratepreflightsummary'] = 'Total: {$a->total}. Already migrated: {$a->alreadymigrated}. Ready: {$a->migratable}. Blocked: {$a->blocked}.';
$string['migrateprogress'] = 'Migrating {$a->done}/{$a->total}: {$a->name}';
$string['migratescormnote'] = 'SCORM activities are migrated only when an editable eXeLearning source can be recovered: either the stored package is itself an .elpx, or the SCORM zip embeds exactly one .elpx. Their grades are copied to the overall grade. Packages with no embedded source, with several embedded .elpx files, hosted externally, or kept in sync with an external URL are skipped.';
$string['migratescormnote'] = 'SCORM activities are migrated only when an editable eXeLearning source can be recovered: the stored package contains content.xml at its root (a .elpx, an eXeLearning content .zip or an IMS export), or the SCORM zip embeds exactly one .elpx. Their grades are copied to the overall grade. Packages with no recoverable content.xml source (including legacy .elp projects and source-less SCORM or web exports), with several embedded .elpx files, hosted externally, or kept in sync with an external URL are skipped.';
$string['migratestatus_alreadymigrated'] = 'Already migrated';
$string['migratestatus_ambiguoussource'] = 'Skipped (multiple embedded .elpx files — migrate manually)';
$string['migratestatus_error'] = 'Error';
$string['migratestatus_migrated'] = 'Migrated';
$string['migratestatus_nosource'] = 'Skipped (no importable source)';
$string['migratestatus_nosource'] = 'Skipped (no eXeLearning content.xml source to import)';
$string['migratestatus_unsupported'] = 'Skipped (externally hosted or synchronized package)';
$string['migratesummary'] = 'Migrated: {$a->migrated}. Already migrated: {$a->alreadymigrated}. Skipped (no source): {$a->nosource}. Skipped (ambiguous): {$a->ambiguoussource}. Skipped (unsupported): {$a->unsupported}. Errors: {$a->error}.';
$string['migratetitle'] = 'Migrate to eXeLearning';
Expand Down
Loading
Loading