diff --git a/.dockerignore b/.dockerignore
index 5518e60..79d8c95 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -4,7 +4,11 @@
 .gitignore
 .gitlab-ci.yml
 .idea
+.ipython
 .pre-commit-config.yaml
 .readthedocs.yml
 .travis.yml
+.continue
+.envs/.local/.django
+temp
 venv
diff --git a/.envs/.local/.django b/.envs/.local/.django
index 168e012..53f6444 100644
--- a/.envs/.local/.django
+++ b/.envs/.local/.django
@@ -15,3 +15,13 @@ CELERY_BROKER_URL=redis://redis:6379/0
 # ------------------------------------------------------------------------------
 CELERY_FLOWER_USER=PhFRdLexbrsBvrrbSXxjcMMOcVOavCrZ
 CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xbVYZbrwR
+
+# OpenSearch
+# ------------------------------------------------------------------------------
+OPENSEARCH_URL=http://172.20.0.1:9200
+OPENSEARCH_BASIC_AUTH=admin,UmaSenhaForte123!
+OPENSEARCH_VERIFY_CERTS=False
+
+# SciELO Books API
+--------------------------------------------------------------------------------
+SCIELO_BOOKS_BASE_URL=http://192.168.2.154:31735
diff --git a/.gitignore b/.gitignore
index 6342047..dd2c92d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,4 +169,11 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+# Local agent/editor state
+.continue/
+temp/
+.envs/.local/.django
+start-dev.sh
+opencode.json
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index 55509fe..0000000
--- a/.pylintrc
+++ /dev/null
@@ -1,14 +0,0 @@
-[MASTER]
-load-plugins=pylint_django, pylint_celery
-django-settings-module=config.settings.base
-[FORMAT]
-max-line-length=120
-
-[MESSAGES CONTROL]
-disable=missing-docstring,invalid-name
-
-[DESIGN]
-max-parents=13
-
-[TYPECHECK]
-generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete
diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index b4cf0c0..0000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-version: 2
-
-sphinx:
-  configuration: docs/conf.py
-
-build:
-  image: testing
-
-python:
-  version: 3.9
-  install:
-    - requirements: requirements/local.txt
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..1f28439
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,83 @@
+# AGENTS.md
+
+## Project
+
+Django 5.2 + Wagtail 7.3 + Celery app that ingests SciELO access logs, validates them, and exports COUNTER-5 metrics to OpenSearch with monthly indices and daily nested metrics.
+
+## Key commands
+
+All commands run inside Docker via the `local.yml` compose file unless noted.
+
+```bash
+make build                           # build images
+make up                              # start all services (django, postgres, redis, celery worker+beat, mailhog)
+make django_shell                    # Django shell via docker compose
+make django_test                     # run full test suite (pytest)
+make django_fast                     # tests with --failfast
+make django_migrate                  # apply migrations
+make django_makemigrations           # generate new migrations
+make django_createsuperuser          # create Wagtail admin user
+```
+
+**Run a single test file/path:**
+```bash
+docker compose -f local.yml run --rm django pytest path/to/test_file.py
+```
+
+**Without Docker** (rare): use `start-dev.sh` after adjusting the ethernet interface name.
+
+## Architecture
+
+- **Wagtail admin**: `http://localhost:8009/admin` (NOT Django admin at `/django-admin/`)
+- **Django apps** (top-level dirs): `core` (Wagtail pages, users, utilities, collectors), `collection`, `log_manager`, `log_manager_config`, `metrics`, `document`, `reports`, `resources`, `source`, `tracker`, `core_settings`
+- **`core/`** contains utilities, shared models, Wagtail hooks, templates, and the `collectors/` subpackage. `config/` is the Django project package (settings, urls, celery_app, wsgi).
+- **Celery pipeline**: `task_daily_log_ingestion_pipeline` (auto-scheduled) chains Search -> Validate -> Parse -> Export using Celery chords. Individual steps can be triggered manually via Wagtail admin.
+- **Task names** use translatable strings, e.g. `_[Log Pipeline] 1. Search Logs (Manual)` — do not rename these casually, it breaks the schedule.
+
+## Settings
+
+- `DJANGO_SETTINGS_MODULE` defaults to `config.settings.local`
+- Tests use `config.settings.test` (set via `pytest.ini` `--ds=config.settings.test`)
+- Env files live in `.envs/.local/` (local) and `.envs/.production/` (production)
+- **`config/settings/test.py`** is minimal — it extends `base.py` and does NOT load local.py. If a test needs a setting that only exists in local.py, it must be added to test.py or set in the test directly.
+
+## Testing
+
+- Framework: **pytest** (not Django's `TestCase` runner), with `--reuse-db` by default
+- Config: `pytest.ini` sets `--ds=config.settings.test --reuse-db`
+- Both `unittest.TestCase` (Django-style) and pytest-style tests coexist; `pytest` is the runner
+- CI runs: `build -> makemigrations -> migrate -> pytest`
+- Shared fixtures in `core/conftest.py` (autouse `media_storage`, `user` fixture via factory-boy)
+
+## Linting & formatting
+
+- **black** (line length 120 implied by flake8 config; black defaults to 88 — pre-commit config pins it)
+- **isort** (black profile via `line_length=88`)
+- **flake8** (max-line-length=120 via setup.cfg)
+- Pre-commit runs all three on commit. Configuration in `setup.cfg` (flake8, isort, mypy) and `.pre-commit-config.yaml`.
+
+## Local dev quirks
+
+- Two SciELO libs (`scielo_log_validator`, `scielo_usage_counter`) are installed from local repos mounted at `/app/scielo_log_validator` and `/app/scielo_usage_counter` when `USE_LOCAL_SCIELO_LIBS=1`. The local Dockerfile strips these from `base.txt` during build and installs them from the mounted volumes via the entrypoint script.
+- Log files volume: `/mnt/pidata2/pi/scl/logs:/app/logs` (host-specific, may not exist on all machines)
+- Mailhog UI at `http://localhost:8029`
+- `manage.py` appends `core/` to `sys.path` so `from core.utils import ...` and `from utils import ...` both resolve.
+
+## OpenSearch
+
+- Client configured via `OPENSEARCH_URL`, `OPENSEARCH_BASIC_AUTH`, `OPENSEARCH_VERIFY_CERTS`
+- Index naming: `usage_monthly_{collection}_{year}` (e.g. `usage_monthly_books_2026`)
+- Upserts use Painless scripts for idempotent daily metric merging
+- `OPENSEARCH_INDEX_NAME` (default `usage`) and `OPENSEARCH_API_KEY` are defined in base settings but not widely used
+
+## MCP tools
+
+- When you need to search framework/library docs (Django, Wagtail, Celery, OpenSearch, etc.), use `context7` tools.
+- When you need to find code examples or patterns from open-source projects, use `gh_grep` tools.
+
+## Wagtail-specific notes
+
+- Multi-language: `pt-br` (default), `en`, `es`
+- Wagtail URL prefixes disabled (`prefix_default_language=False`)
+- After adding a language, run `make wagtail_sync` and `make wagtail_update_translation_field`
+- `wagtail-modeladmin` is used for managing pipeline entities in admin
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 94a9ed0..0000000
--- a/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/README.md b/README.md
index 5347d9d..4dd7a22 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,125 @@
-## SciELO Usage
+# SciELO Usage Metrics Pipeline
 
-## Dev Installation
+A modernized platform for processing and indexing SciELO usage logs into OpenSearch, adhering to COUNTER R5.1 standards.
 
-To build and run the application, being at the root of the project, you can follow these steps:
+**Version**: 2.0.0
+
+## Quick Start (Dev Installation)
+
+To build and run the application locally:
 
 1. `make build compose=local.yml`
-2. `make django_makemigrations`
-3. `make django_migrate`
-4. `make django_createsuperuser`
-5. `make up`
-
-After the fifth step, the application should be functional and accessible at http://0.0.0.0:8009/admin
-
-### Additional notes:
-
-* The instructions assume that you have a working installation of Docker and `make`.
-* The `make` commands use the `compose` file `local.yml` to start the application containers.
-* The `django_makemigrations` and `django_migrate` commands are used to create and apply database migrations.
-* The `django_createsuperuser` command is used to create a superuser account for the application.
-* The `make up` command starts the application containers in the background.
-* The application is accessible at http://0.0.0.0:8009/admin.
-* To log in to the admin panel, you will need to use the superuser credentials that you created with the `django_createsuperuser` command.
-* The `Log Manager` tool can be used to view log files and manage application configurations.
-* To test the application, you will need to add some content, such as a list of collections and configurations.
+2. `make django_migrate`
+3. `make django_createsuperuser`
+4. `make up`
+
+The application will be accessible at [http://localhost:8009/admin](http://localhost:8009/admin).
+
+---
+
+## Key Commands
+
+All commands run inside Docker via the `local.yml` compose file unless noted.
+
+```bash
+make build                           # build images
+make up                              # start all services (django, postgres, redis, celery worker+beat, mailhog)
+make django_shell                    # Django shell via docker compose
+make django_test                     # run full test suite (pytest)
+make django_fast                     # tests with --failfast
+make django_migrate                  # apply migrations
+make django_makemigrations           # generate new migrations
+make django_createsuperuser          # create Wagtail admin user
+```
+
+**Run a single test file/path:**
+```bash
+docker compose -f local.yml run --rm django pytest path/to/test_file.py
+```
+
+## Architecture & Data Pipeline
+
+### Apps
+
+| App | Purpose |
+|---|---|
+| `log_manager` | Log file discovery, validation, and status tracking |
+| `log_manager_config` | Collection-specific configuration (paths, emails, expected logs/day) |
+| `metrics` | Daily metric jobs, OpenSearch export, COUNTER R5.1 aggregation |
+| `document` | Unified metadata model for articles, books, chapters, datasets, and preprints |
+| `source` | Journal, book, preprint server, and data repository metadata |
+| `reports` | Weekly, monthly, and yearly log processing reports |
+| `resources` | Robot user-agent patterns and GeoIP MMDB management |
+| `tracker` | Discarded line tracking and error logging |
+| `core` | Wagtail pages, users, shared utilities, and external API collectors |
+| `collection` | SciELO collection management |
+
+### Core Collectors (`core/collectors/`)
+
+| Collector | Source |
+|---|---|
+| `articlemeta.py` | ArticleMeta REST/Thrift API |
+| `opac.py` | SciELO OPAC endpoint |
+| `preprints.py` | SciELO Preprints OAI-PMH |
+| `dataverse.py` | SciELO Data (Dataverse) |
+| `scielo_books.py` | SciELO Books CouchDB changes feed |
+
+### Log Ingestion Pipeline
+
+The ingestion is fully automated via the **`[Log Pipeline] Daily Routine (Auto)`** task. It follows a strictly ordered sequence using Celery Chords:
+
+- **Search**: Scans configured directories for new `.log` or `.gz` files.
+- **Validate**: Performs statistical sampling to ensure log integrity and detect the usage date.
+- **Parse**: Extracts metrics using `scielo_usage_counter`, performs URL translation, and aggregates data.
+- **Export**: Pushes results to OpenSearch using idempotent upsert scripts.
+
+### Metadata Synchronization
+
+Metadata is kept in sync with SciELO sources (ArticleMeta, OPAC, Books, etc.) via the **`[Metadata] Daily Sync Routine (Auto)`** task, which runs parallel workers to ensure documents and sources are always up to date.
+
+## Supported Log Formats
+
+| Format | Description |
+|---|---|
+| NCSA Extended | Standard Apache combined log format with optional domain prefix and IP list fields. |
+| BunnyCDN | Pipe-delimited format with Unix timestamps (7 or 10 digits), country codes, and request IDs. |
+
+## Environment Variables
+
+| Variable | Default | Description |
+|---|---|---|
+| `OPENSEARCH_URL` | — | OpenSearch cluster URL |
+| `OPENSEARCH_BASIC_AUTH` | — | OpenSearch basic auth credentials (`user:pass`) |
+| `OPENSEARCH_VERIFY_CERTS` | `False` | Verify SSL certificates for OpenSearch connections |
+| `USE_LOCAL_SCIELO_LIBS` | `0` | Mount local `scielo_log_validator` and `scielo_usage_counter` repos for development |
+| `DJANGO_SETTINGS_MODULE` | `config.settings.local` | Django settings module |
+| `REDIS_URL` | — | Redis connection URL for Celery |
+
+## OpenSearch Storage Strategy (Hybrid Monthly)
+
+To optimize storage and performance, this system employs a **Hybrid Granularity** approach in OpenSearch:
+
+- **Monthly Partitioning**: Indices are partitioned by month (e.g., `usage_monthly_books_2026`).
+- **One Document per Month**: Each article/PID has exactly **one document per month**, drastically reducing the total document count (up to 30x reduction).
+- **Daily Nested Metrics**: Daily granularity is preserved inside each monthly document using a `daily_metrics` object.
+- **Atomic Upserts**: Data is merged using OpenSearch **Painless Scripts**, allowing multiple logs for the same day/month to be processed without data duplication or loss.
+
+## Management & Monitoring
+
+All pipelines can be monitored through the **Wagtail Admin**:
+
+- **Log Manager**: Monitor the status of individual log files (`QUEUED`, `PARSING`, `PROCESSED`).
+- **Daily Metric Jobs**: Track the history of daily processing and OpenSearch export attempts.
+- **Log Config**: Manage collection-specific settings, log paths, and notification emails.
+
+### Useful Commands
+
+- `make django_shell`: Access the Django interactive shell.
+- `docker logs -f scielo_usage_local_celeryworker`: Monitor real-time task execution.
+
+## Dependencies
+
+- [scielo_log_validator](https://github.com/scieloorg/scielo_log_validator) — log file validation
+- [scielo_usage_counter](https://github.com/scieloorg/scielo_usage_counter) — COUNTER R5.1 metrics extraction
+- [device_detector](https://github.com/thinkwelltwd/device_detector) — client name/version detection
+- [opensearch-py](https://github.com/opensearch-project/opensearch-py) — OpenSearch client
diff --git a/VERSION b/VERSION
index 850e742..227cea2 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.14.0
+2.0.0
diff --git a/article/admin.py b/article/admin.py
deleted file mode 100644
index 8c38f3f..0000000
--- a/article/admin.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
diff --git a/article/management/commands/load_articles_by_year.py b/article/management/commands/load_articles_by_year.py
deleted file mode 100644
index 335598e..0000000
--- a/article/management/commands/load_articles_by_year.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from django.core.management.base import BaseCommand
-
-from article.tasks import task_load_article_from_opac, task_load_article_from_article_meta
-
-
-class Command(BaseCommand):
-    help = 'Generate task requests for loading article data from Article Meta for each year from 1900 to 2025'
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            '--start-year',
-            type=int,
-            default=1990,
-            help='Start year (default: 1990)'
-        )
-        parser.add_argument(
-            '--end-year',
-            type=int,
-            default=2025,
-            help='End year (default: 2025)'
-        )
-        parser.add_argument(
-            '--collection',
-            type=str,
-            default='scl',
-            help='Collection code (default: scl)'
-        )
-        parser.add_argument(
-            '--task',
-            choices=['load_article_from_opac', 'load_article_from_article_meta'],
-            default='load_article_from_opac',
-            help='Task to execute (default: load_article_from_opac)',
-        )
-
-    def handle(self, *args, **options):
-        start_year = options['start_year']
-        end_year = options['end_year']
-        collection = options['collection']
-        
-        self.stdout.write(
-            self.style.SUCCESS(
-                f'Generating task requests from {start_year} to {end_year} for collection: {collection}'
-            )
-        )
-        
-        total_tasks = 0
-        
-        for year in range(start_year, end_year + 1):
-            from_date = f'{year}-01-01'
-            until_date = f'{year}-12-31'
-            
-            self.stdout.write(f'Queuing task for year {year}...')
-            
-            # Queue the task for each year
-            if options['task'] == 'load_article_from_article_meta':
-                task_result = task_load_article_from_article_meta.delay(
-                    from_date=from_date,
-                    until_date=until_date,
-                    collection=collection
-                )
-            else:
-                task_result = task_load_article_from_opac.delay(
-                    from_date=from_date,
-                    until_date=until_date,
-                    collection=collection
-                )
-            
-            total_tasks += 1
-            
-            self.stdout.write(
-                self.style.SUCCESS(
-                    f'✓ Task queued for year {year}: {from_date} to {until_date} (Task ID: {task_result.id})'
-                )
-            )
-        
-        self.stdout.write(
-            self.style.SUCCESS(
-                f'\nCompleted! {total_tasks} tasks have been queued successfully.'
-            )
-        )
diff --git a/article/migrations/0001_initial.py b/article/migrations/0001_initial.py
deleted file mode 100644
index 816d61e..0000000
--- a/article/migrations/0001_initial.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Generated by Django 5.0.7 on 2025-02-07 17:50
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Article",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "scielo_issn",
-                    models.CharField(
-                        db_index=True, max_length=9, verbose_name="SciELO ISSN"
-                    ),
-                ),
-                (
-                    "pid_v2",
-                    models.CharField(
-                        db_index=True, max_length=23, verbose_name="PID V2"
-                    ),
-                ),
-                (
-                    "pid_v3",
-                    models.CharField(
-                        blank=True,
-                        db_index=True,
-                        max_length=23,
-                        null=True,
-                        verbose_name="PID V3",
-                    ),
-                ),
-                (
-                    "pdfs",
-                    models.JSONField(
-                        blank=True,
-                        default=dict,
-                        null=True,
-                        verbose_name="Format with Language",
-                    ),
-                ),
-                (
-                    "default_lang",
-                    models.CharField(max_length=2, verbose_name="Default Language"),
-                ),
-                (
-                    "text_langs",
-                    models.JSONField(
-                        blank=True,
-                        default=dict,
-                        null=True,
-                        verbose_name="Text Languages",
-                    ),
-                ),
-                (
-                    "processing_date",
-                    models.CharField(max_length=32, verbose_name="Processing Date"),
-                ),
-                (
-                    "publication_date",
-                    models.CharField(max_length=32, verbose_name="Publication Date"),
-                ),
-                (
-                    "publication_year",
-                    models.CharField(
-                        db_index=True, max_length=4, verbose_name="Publication Year"
-                    ),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Article",
-                "verbose_name_plural": "Articles",
-                "unique_together": {("collection", "scielo_issn", "pid_v2", "pid_v3")},
-            },
-        ),
-    ]
diff --git a/article/migrations/0002_alter_article_unique_together_article_files_and_more.py b/article/migrations/0002_alter_article_unique_together_article_files_and_more.py
deleted file mode 100644
index cee055c..0000000
--- a/article/migrations/0002_alter_article_unique_together_article_files_and_more.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Generated by Django 5.0.7 on 2025-04-01 01:09
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0001_initial"),
-        ("collection", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name="article",
-            name="files",
-            field=models.JSONField(
-                blank=True, default=dict, null=True, verbose_name="Files"
-            ),
-        ),
-        migrations.AddField(
-            model_name="article",
-            name="pid_generic",
-            field=models.CharField(
-                blank=True,
-                db_index=True,
-                max_length=50,
-                null=True,
-                verbose_name="PID Generic",
-            ),
-        ),
-        migrations.RemoveField(
-            model_name="article",
-            name="pdfs",
-        ),
-        migrations.AlterUniqueTogether(
-            name="article",
-            unique_together={
-                ("collection", "scielo_issn", "pid_v2", "pid_v3", "pid_generic")
-            },
-        ),
- 
-    ]
diff --git a/article/migrations/0003_article_collection_scielo_issn_idx.py b/article/migrations/0003_article_collection_scielo_issn_idx.py
deleted file mode 100644
index 753ac98..0000000
--- a/article/migrations/0003_article_collection_scielo_issn_idx.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0002_alter_article_unique_together_article_files_and_more"),
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.AddIndex(
-            model_name="article",
-            index=models.Index(
-                fields=["collection", "scielo_issn"], name="collection_scielo_issn_idx"
-            ),
-        ),
-    ]
diff --git a/article/migrations/__init__.py b/article/migrations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/article/models.py b/article/models.py
deleted file mode 100644
index 80d2a97..0000000
--- a/article/models.py
+++ /dev/null
@@ -1,143 +0,0 @@
-from django.db import models
-from django.utils.translation import gettext_lazy as _
-
-from core.models import CommonControlField
-from collection.models import Collection
-
-
-class Article(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.CASCADE,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    scielo_issn = models.CharField(
-        verbose_name=_('SciELO ISSN'),
-        max_length=9,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    pid_v2 = models.CharField(
-        verbose_name=_('PID V2'),
-        max_length=23,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    pid_v3 = models.CharField(
-        verbose_name=_('PID V3'),
-        max_length=23,
-        blank=True,
-        null=True,
-        db_index=True,
-    )
-
-    pid_generic = models.CharField(
-        verbose_name=_('PID Generic'),
-        max_length=50,
-        blank=True,
-        null=True,
-        db_index=True,
-    )
-
-    files = models.JSONField(
-        verbose_name=_('Files'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-
-    default_lang = models.CharField(
-        verbose_name=_('Default Language'),
-        max_length=2,
-        blank=False,
-        null=False,
-    )
-
-    text_langs = models.JSONField(
-        verbose_name=_('Text Languages'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-
-    processing_date = models.CharField(
-        verbose_name=_('Processing Date'), 
-        max_length=32,
-        null=False,
-        blank=False,
-    )
-
-    publication_date = models.CharField(
-        verbose_name=_('Publication Date'), 
-        max_length=32,
-        null=False,
-        blank=False,
-    )
-
-    publication_year = models.CharField(
-        verbose_name=_('Publication Year'), 
-        max_length=4,
-        null=False,
-        blank=False,
-        db_index=True,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.scielo_issn} - {self.pid_v2 or self.pid_v3 or self.pid_generic}'
-
-    @classmethod
-    def metadata(cls, collection=None):
-        qs = cls.objects.select_related('collection').only(
-            'collection__acron3',
-            'default_lang',
-            'files',
-            'pid_v2',
-            'pid_v3',
-            'pid_generic',
-            'processing_date',
-            'publication_date',
-            'publication_year',
-            'scielo_issn',
-            'text_langs',
-        )
-
-        if collection:
-            qs = qs.filter(collection=collection)
-
-        for a in qs.iterator():
-            yield {
-                'collection': a.collection.acron3,
-                'default_lang': a.default_lang,
-                'files': a.files,
-                'pid_v2': a.pid_v2,
-                'pid_v3': a.pid_v3,
-                'pid_generic': a.pid_generic,
-                'processing_date': a.processing_date,
-                'publication_date': a.publication_date,
-                'publication_year': a.publication_year,
-                'scielo_issn': a.scielo_issn,
-                'text_langs': a.text_langs,
-            }
-
-    class Meta:
-        verbose_name = _('Article')
-        verbose_name_plural = _('Articles')
-        unique_together = (
-            'collection',
-            'scielo_issn', 
-            'pid_v2',
-            'pid_v3',
-            'pid_generic',
-        )
-        indexes = [
-            models.Index(fields=['collection', 'scielo_issn'], name='collection_scielo_issn_idx'),
-        ]
-
diff --git a/article/tasks.py b/article/tasks.py
deleted file mode 100644
index 3514fca..0000000
--- a/article/tasks.py
+++ /dev/null
@@ -1,259 +0,0 @@
-import logging
-
-from django.contrib.auth import get_user_model
-from django.db.models import Q
-from django.db import DataError
-from django.utils.translation import gettext as _
-
-from collection.models import Collection
-from config import celery_app
-from core.utils import date_utils
-from core.utils.utils import _get_user
-
-from journal.models import Journal
-
-from tracker.models import ArticleEvent
-from tracker.choices import ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED, ARTICLE_EVENT_TYPE_DATA_ERROR
-
-from . import models, utils
-
-
-User = get_user_model()
-
-@celery_app.task(bind=True, name=_('Load article data from Article Meta'), timelimit=-1, queue='load')
-def task_load_article_from_article_meta(self, from_date=None, until_date=None, days_to_go_back=None, collection=None, issn=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading articles from Article Meta. From: {from_date}, Until: {until_date}, Collection: {collection}, ISSN: {issn}.')
-
-    offset = 0
-    limit = 1000
-    while True:
-        logging.info(f'{from_date}, {until_date}, {offset}, {limit}, {collection}, {issn}')
-        response = utils.fetch_article_meta_dict(from_date, until_date, offset=offset, limit=limit, collection=collection, issn=issn)
-        objects = response.get('objects')
-        if not objects:
-            break
-
-        for obj in objects:
-            codes = obj.get('code_title')
-
-            for issn_code in codes:
-                jou = Journal.objects.filter(
-                    Q(issns__electronic_issn=issn_code) | 
-                    Q(issns__scielo_issn=issn_code) | 
-                    Q(issns__print_issn=issn_code)
-                ).first()
-                if not jou:
-                    continue
-
-            if not jou:
-                logging.info(f'Journal not found for ISSNs: {codes}')
-                continue
-
-            col_obj = Collection.objects.get(acron3=obj.get('collection'))
-            if not col_obj:
-                logging.info(f'Collection not found: {obj.get("collection")}')
-                continue
-
-            try:
-                article, created = models.Article.objects.get_or_create(collection=col_obj, scielo_issn=jou.scielo_issn, pid_v2=obj.get('code'))
-                if created or force_update:
-                    article.files = obj.get('pdfs') or {}
-                    article.processing_date = obj.get('processing_date') or ''
-                    article.publication_date = obj.get('publication_date') or ''
-                    article.publication_year = obj.get('publication_year') or ''
-                    article.default_lang = obj.get('default_language') or ''
-                    article.text_langs = obj.get('text_langs') or ''
-
-                article.save()
-                logging.info(f'Article {"created" if created else "updated"}: {article}')
-            except models.Article.MultipleObjectsReturned as e:
-                logging.error(f'Error getting Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                    message=f'Error getting Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}',
-                    data=obj
-                )
-                continue
-            except DataError as e:
-                logging.error(f'Error saving Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                    message=f'Error saving Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}',
-                    data=obj
-                )
-                continue
-
-        offset += limit
-
-    return True
-
-
-@celery_app.task(bind=True, name=_('Load article data from OPAC'), timelimit=-1, queue='load')
-def task_load_article_from_opac(self, collection='scl', from_date=None, until_date=None, days_to_go_back=None, page=1, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading articles from OPAC. From: {from_date}, Until: {until_date}')
-
-    while True:
-        response = utils.fetch_opac_dict(from_date, until_date, page=page)
-
-        documents = response.get('documents')
-
-        for doc_id, doc in documents.items():
-            col_obj = Collection.objects.get(acron3=collection)
-            if not col_obj:
-                logging.error(f'Collection not found: {collection}')
-                continue
-
-            journal = Journal.objects.get(collection=col_obj, acronym=doc.get('journal_acronym'))
-            if not journal:
-                logging.error(f'Journal not found: {doc.get("journal_acronym")}')
-                continue
-
-            try:
-                article, created = models.Article.objects.get_or_create(collection=col_obj, scielo_issn=journal.scielo_issn, pid_v2=doc.get('pid_v2'))
-
-                if created or force_update:
-                    article.pid_v3 = doc.get('pid_v3') or ''
-                    if not created:
-                        article.pid_v2 = doc.get('pid_v2') or ''
-                        article.publication_date = doc.get('publication_date') or article.publication_date or ''
-                        article.default_lang = doc.get('default_language') or article.default_lang or ''
-            
-                        try:
-                            article.publication_year = article.publication_date[:4]
-                        except IndexError:
-                            article.publication_year = ''
-
-                article.save()
-                logging.info(f'Article {"created" if created else "updated"}: {article}')
-
-            except models.Article.MultipleObjectsReturned as e:
-                logging.error(f'Error getting Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                    message=f'Error creating Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}',
-                    data=doc
-                )
-                continue
-            except DataError as e:
-                logging.error(f'Error saving Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                    message=f'Error saving Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}',
-                    data=doc
-                )
-                continue
-
-        page += 1
-        if page > int(response.get('pages', 0)):
-            break
-
-    return True
-
-
-@celery_app.task(bind=True, name=_('Load preprint data from SciELO Preprints'), timelimit=-1, queue='load')
-def task_load_preprints_from_preprints_api(self, from_date=None, until_date=None, days_to_go_back=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading preprints from SciELO Preprints. From: {from_date}, Until: {until_date}')
-
-    col_obj = Collection.objects.get(acron3='preprints')
-    if not col_obj:
-        logging.error(f'Collection not found: preprints')
-        return False
-
-    for record in utils.fetch_preprint_oai_pmh(from_date, until_date):
-        data = utils.extract_preprint_data(record)
-
-        if not data.get('pid_generic'):
-            logging.error(f'Preprint ID not found in record: {record}')
-            continue
-
-        try:
-            article, created = models.Article.objects.get_or_create(collection=col_obj, pid_generic=data['pid_generic'])
-            if created or force_update:
-                article.text_langs = data.get('text_langs')
-                article.default_lang = data.get('default_language')
-                article.publication_date = data.get('publication_date')
-                article.publication_year = data.get('publication_year')
-                
-                # Preprints do not have a scielo_issn yet
-                article.scielo_issn = '0000-0000'
-
-                article.save()
-                logging.debug(f'Article {"created" if created else "updated"}: {article}')
-        except models.Article.MultipleObjectsReturned as e:
-            logging.error(f'Error creating Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                message=f'Error creating Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}',
-                data=data
-            )
-            continue
-        except DataError as e:
-            logging.error(f'Error saving Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                message=f'Error saving Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}',
-                data=data
-            )
-            continue
-
-
-@celery_app.task(bind=True, name=_('Load dataset metadata from Dataverse'), timelimit=-1, queue='load')
-def task_load_dataset_metadata_from_dataverse(self, from_date=None, until_date=None, days_to_go_back=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading dataset metadata from SciELO Data. From: {from_date}, Until: {until_date}')
-
-    col_obj = Collection.objects.get(acron3='data')
-    if not col_obj:
-        logging.error(f'Collection not found: data')
-        return False
-
-    for record in utils.fetch_dataverse_metadata(from_date, until_date):
-        dataset_doi = record.get('dataset_doi')
-        if not dataset_doi:
-            logging.error(f'Dataset DOI not found in record: {record}')
-            continue
-
-        try:
-            dataset, created = models.Article.objects.get_or_create(collection=col_obj, pid_generic=dataset_doi)
-            if created or force_update:
-                dataset.publication_date = record.get('dataset_published')
-
-                file_persistent_id = record.get('file_persistent_id')
-                file_id = record.get('file_id')
-                file_name = record.get('file_name')
-                file_url = record.get('file_url')
-
-                if file_id:
-                    dataset.files[file_id] = {'name': file_name, 'url': file_url, 'file_persisent_id': file_persistent_id}
-
-                dataset.save()
-                logging.debug(f'Dataset {"created" if created else "updated"}: {dataset}')
-        except models.Article.MultipleObjectsReturned as e:
-            logging.error(f'Error creating Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                message=f'Error creating Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}',
-                data=record
-            )
-            continue
-        except DataError as e:
-            logging.error(f'Error saving Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                message=f'Error saving Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}',
-                data=record
-            )
-            continue
-
-    return True
diff --git a/article/tests.py b/article/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/article/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/article/utils.py b/article/utils.py
deleted file mode 100644
index b9a094e..0000000
--- a/article/utils.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import logging
-import requests
-import os
-
-from sickle import Sickle
-from time import sleep
-
-from core.utils import standardizer
-
-
-ARTICLEMETA_ENDPOINT = os.environ.get('ARTICLEMETA_COLLECT_URL', 'http://articlemeta.scielo.org/api/v1/article/counter_dict')
-ARTICLEMETA_MAX_RETRIES = int(os.environ.get('ARTICLEMETA_MAX_RETRIES', 5))
-ARTICLEMETA_SLEEP_TIME = int(os.environ.get('ARTICLEMETA_SLEEP_TIME', 30))
-
-OPAC_ENDPOINT = os.environ.get('OPAC_ENDPOINT', 'https://www.scielo.br/api/v1/counter_dict')
-OPAC_MAX_RETRIES = int(os.environ.get('OPAC_MAX_RETRIES', 5))
-OPAC_SLEEP_TIME = int(os.environ.get('OPAC_SLEEP_TIME', 30))
-
-OAI_PMH_PREPRINT_ENDPOINT = os.environ.get('OAI_PMH_PREPRINT_ENDPOINT', 'https://preprints.scielo.org/index.php/scielo/oai')
-OAI_METADATA_PREFIX = os.environ.get('OAI_METADATA_PREFIX', 'oai_dc')
-OAI_PMH_MAX_RETRIES = int(os.environ.get('OAI_PMH_MAX_RETRIES', 5))
-
-DATAVERSE_ENDPOINT = os.environ.get('DATAVERSE_ENDPOINT', 'https://data.scielo.org/api')
-DATAVERSE_ROOT_COLLECTION = os.environ.get('DATAVERSE_ROOT_COLLECTION', 'scielodata')
-DATAVERSE_MAX_RETRIES = int(os.environ.get('DATAVERSE_MAX_RETRIES', 5))
-DATAVERSE_SLEEP_TIME = int(os.environ.get('DATAVERSE_SLEEP_TIME', 30))
-
-
-def fetch_article_meta_dict(from_date, until_date, offset=0, limit=1000, collection=None, issn=None):
-    for t in range(1, ARTICLEMETA_MAX_RETRIES + 1):
-        params = {
-            'from': from_date,
-            'until': until_date,
-            'offset': offset,
-            'limit': limit
-        }
-
-        if collection:
-            params['collection'] = collection
-
-        if issn:
-            params['issn'] = issn
-
-        response = requests.get(ARTICLEMETA_ENDPOINT, params=params)
-
-        try:
-            response.raise_for_status()
-            logging.info(response.url)
-
-        except requests.exceptions.HTTPError:
-            logging.warning(
-                'Failed to collect data from %s. Waiting %d seconds before retry %d of %d' % (
-                    response.url, 
-                    ARTICLEMETA_SLEEP_TIME, 
-                    t, 
-                    ARTICLEMETA_MAX_RETRIES
-                )
-            )
-            sleep(ARTICLEMETA_SLEEP_TIME)
-
-        else:
-            return response.json()
-
-
-def fetch_opac_dict(from_date, until_date, page=1):
-    for t in range(1, OPAC_MAX_RETRIES + 1):
-        params = {
-            'begin_date': from_date, 
-            'end_date': until_date, 
-            'page': page
-        }
-
-        response = requests.get(url=OPAC_ENDPOINT, params=params, verify=False)
-
-        try:
-            response.raise_for_status()
-            logging.info(response.url)
-
-        except requests.exceptions.HTTPError:
-            logging.warning('Não foi possível coletar dados de %s. Aguardando %d segundos para tentativa %d de %d' % (response.url, OPAC_SLEEP_TIME, t, OPAC_MAX_RETRIES))
-            sleep(OPAC_SLEEP_TIME)
-
-        else:
-            return response.json()
-
-
-def fetch_preprint_oai_pmh(from_date, until_date):
-    oai_client = Sickle(endpoint=OAI_PMH_PREPRINT_ENDPOINT, max_retries=OAI_PMH_MAX_RETRIES, verify=False)
-    records = oai_client.ListRecords(**{
-        'metadataPrefix': OAI_METADATA_PREFIX,
-        'from': from_date,
-        'until': until_date,
-    })
-
-    for r in records:
-        yield r
-
-
-def extract_preprint_data(record):
-    pid_generic = _extract_preprint_compatible_identifer(record.header.identifier)
-    text_langs = [standardizer.standardize_language_code(l) for l in record.metadata.get('language', [])]
-    publication_date = record.metadata.get('date', [''])[0]
-    default_language = text_langs[0] if text_langs else ''
-    publication_year = _extract_preprint_publication_year_from_date(publication_date)
-
-    data = {
-        'pid_generic': pid_generic,
-        'text_langs': text_langs,
-        'publication_date': publication_date,
-        'default_language': default_language,
-        'publication_year': publication_year
-    }
-
-    return data
-
-
-def _extract_preprint_compatible_identifer(pid_v2):
-    try:
-        # piv_v2 should be something like oai:ops.preprints.scielo.org:preprint/1195
-        # we are using the last part of the string as the identifier
-        return pid_v2.split(':')[-1].split('/')[1]
-    except IndexError:
-        return ''
-
-
-def _extract_preprint_publication_year_from_date(date_str):
-    try:
-        return date_str[:4]
-    except IndexError:
-        return ''
-
-
-def fetch_dataverse_metadata(from_date=None, until_date=None):
-    def get_subdataverses():
-        url = f"{DATAVERSE_ENDPOINT}/dataverses/{DATAVERSE_ROOT_COLLECTION}/contents"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching subdataverses: {e}")
-            return []
-
-    def get_datasets(subdataverse_id):
-        url = f"{DATAVERSE_ENDPOINT}/dataverses/{subdataverse_id}/contents"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching datasets for subdataverse {subdataverse_id}: {e}")
-            return []
-
-    def get_files(dataset_id):
-        url = f"{DATAVERSE_ENDPOINT}/datasets/{dataset_id}/versions/:latest/files"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching files for dataset {dataset_id}: {e}")
-            return []
-
-    subdataverses = get_subdataverses()
-
-    for subdataverse in subdataverses:
-        if subdataverse["type"] != "dataverse":
-            continue
-
-        subdataverse_id = subdataverse["id"]
-        subdataverse_title = subdataverse["title"]
-        datasets = get_datasets(subdataverse_id)
-
-        for dataset in datasets:
-            if dataset["type"] != "dataset":
-                continue
-
-            dataset_id = dataset["id"]
-            doi = standardizer.standardize_doi(dataset.get("persistentUrl"))
-            if not doi:
-                logging.warning(f"Dataset {dataset_id} does not have a DOI.")
-                continue
-
-            publication_date = dataset.get("publicationDate", None)
-
-            if publication_date:
-                if (from_date and publication_date < from_date) or (until_date and publication_date > until_date):
-                    continue
-
-            files = get_files(dataset_id)
-
-            for file in files:
-                file_persistent_id = file["dataFile"].get("persistentId", None)
-                file_persistent_id_stz = standardizer.standardize_pid_generic(file_persistent_id) if file_persistent_id else None
-
-                yield {
-                    "title": subdataverse_title,
-                    "dataset_doi": doi,
-                    "dataset_published": publication_date,
-                    "file_id": file["dataFile"]["id"],
-                    "file_name": file["label"],
-                    "file_url": f"{DATAVERSE_ENDPOINT}/access/datafile/{file['dataFile']['id']}",
-                    "file_persistent_id": file_persistent_id_stz,
-                }
diff --git a/article/views.py b/article/views.py
deleted file mode 100644
index 91ea44a..0000000
--- a/article/views.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.shortcuts import render
-
-# Create your views here.
diff --git a/collection/models.py b/collection/models.py
index f2d7ce7..87da123 100644
--- a/collection/models.py
+++ b/collection/models.py
@@ -9,7 +9,7 @@
 
 from core.forms import CoreAdminModelForm
 from core.models import CommonControlField, Language, TextWithLang
-from core.utils.utils import fetch_data
+from core.utils.request_utils import fetch_data
 
 from . import choices
 
diff --git a/collection/tasks.py b/collection/tasks.py
index 02fd0e7..19372de 100644
--- a/collection/tasks.py
+++ b/collection/tasks.py
@@ -1,14 +1,14 @@
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext as _
 
-from core.utils.utils import _get_user
+from core.utils.request_utils import _get_user
 from collection.models import Collection
 from config import celery_app
 
 User = get_user_model()
 
 
-@celery_app.task(bind=True, name=_('Load collection data'))
+@celery_app.task(bind=True, name=_('[Collection] Load Collection Data'))
 def task_load_collections(self, user_id=None, username=None):
     user = _get_user(self.request, username=username, user_id=user_id)
     Collection.load(user)
diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py
index e7b7e97..52b31a8 100644
--- a/collection/wagtail_hooks.py
+++ b/collection/wagtail_hooks.py
@@ -1,8 +1,5 @@
 from django.utils.translation import gettext as _
 from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
-
-from config.menu import get_menu_order
 
 from .models import Collection
 
@@ -10,10 +7,8 @@
 class CollectionSnippetViewSet(SnippetViewSet):
     model = Collection
     icon = "folder-open-inverse"
-    menu_name = 'collection'
     menu_label = _("Collection")
-    menu_order = get_menu_order("collection")
-    add_to_admin_menu = True
+    menu_order = 100
 
     list_display = (
         "main_name",
@@ -57,6 +52,3 @@ class CollectionSnippetViewSet(SnippetViewSet):
         "updated_by",
     )
     export_filename = "collections"
-
-
-register_snippet(CollectionSnippetViewSet)
diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile
index 4351d9e..aac7972 100644
--- a/compose/local/django/Dockerfile
+++ b/compose/local/django/Dockerfile
@@ -23,8 +23,7 @@ COPY ./requirements .
 RUN python -m pip install --upgrade pip
 
 # Create Python Dependency and Sub-Dependency Wheels.
-RUN pip wheel --wheel-dir /usr/src/app/wheels  \
-  -r ${BUILD_ENVIRONMENT}.txt
+RUN pip wheel --wheel-dir /usr/src/app/wheels -r ${BUILD_ENVIRONMENT}.txt
 
 
 # Python 'run' stage
diff --git a/compose/local/django/celery/worker/start b/compose/local/django/celery/worker/start
index 7db6f27..f0c7efc 100644
--- a/compose/local/django/celery/worker/start
+++ b/compose/local/django/celery/worker/start
@@ -21,4 +21,14 @@ watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concu
 # Worker para arg bol cub data ecu per preprints pry rve spa sss sza ury wid
 watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small -n worker.parse_small@%h &
 
-wait
\ No newline at end of file
+# Workers seriais adicionais para backfill paralelo de colecoes pequenas
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_1 -n worker.parse_small_1@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_2 -n worker.parse_small_2@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_3 -n worker.parse_small_3@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_4 -n worker.parse_small_4@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_5 -n worker.parse_small_5@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_6 -n worker.parse_small_6@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_7 -n worker.parse_small_7@%h &
+watchgod celery.__main__.main --args -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_8 -n worker.parse_small_8@%h &
+
+wait
diff --git a/compose/production/django/celery/worker/start b/compose/production/django/celery/worker/start
index 4fb112e..6269dd5 100644
--- a/compose/production/django/celery/worker/start
+++ b/compose/production/django/celery/worker/start
@@ -22,4 +22,14 @@ celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_medium -n wo
 # Worker para arg bol cub data ecu per preprints pry rve spa sss sza ury wid (coleções pequenas)
 celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small -n worker.parse_small@%h &
 
-wait
\ No newline at end of file
+# Workers seriais adicionais para backfill paralelo de colecoes pequenas
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_1 -n worker.parse_small_1@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_2 -n worker.parse_small_2@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_3 -n worker.parse_small_3@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_4 -n worker.parse_small_4@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_5 -n worker.parse_small_5@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_6 -n worker.parse_small_6@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_7 -n worker.parse_small_7@%h &
+celery -A config.celery_app worker -l INFO --concurrency=1 -Q parse_small_8 -n worker.parse_small_8@%h &
+
+wait
diff --git a/compose/production/django/entrypoint b/compose/production/django/entrypoint
index 599841e..02470cd 100644
--- a/compose/production/django/entrypoint
+++ b/compose/production/django/entrypoint
@@ -16,6 +16,20 @@ if [ -z "${POSTGRES_USER}" ]; then
 fi
 export DATABASE_URL="postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}"
 
+if [ "${USE_LOCAL_SCIELO_LIBS:-0}" = "1" ]; then
+  for path in /app/scielo_log_validator /app/scielo_usage_counter; do
+    if [ ! -f "${path}/setup.py" ] && [ ! -f "${path}/pyproject.toml" ]; then
+      >&2 echo "Local lib path not ready: ${path}"
+      exit 1
+    fi
+  done
+
+  >&2 echo "Installing local SciELO libs from mounted repositories..."
+  pip install --root-user-action=ignore --no-cache-dir --no-build-isolation --no-deps \
+    -e /app/scielo_log_validator \
+    -e /app/scielo_usage_counter
+fi
+
 postgres_ready() {
 python << END
 import sys
diff --git a/config/collections.py b/config/collections.py
new file mode 100644
index 0000000..9aa3efe
--- /dev/null
+++ b/config/collections.py
@@ -0,0 +1,63 @@
+COLLECTION_ACRON3_SIZE_MAP = {
+    "scl": "xlarge",
+    "chl": "large",
+    "col": "large",
+    "mex": "large",
+    "cri": "medium",
+    "esp": "medium",
+    "psi": "medium",
+    "prt": "medium",
+    "ven": "medium",
+    "arg": "small",
+    "bol": "small",
+    "books": "small",
+    "cub": "small",
+    "data": "small",
+    "dom": "small",
+    "ecu": "small",
+    "per": "small",
+    "preprints": "small",
+    "pry": "small",
+    "rve": "small",
+    "spa": "small",
+    "sss": "small",
+    "sza": "small",
+    "ury": "small",
+    "wid": "small",
+}
+
+COLLECTION_SIZE_SAMPLE_MAP = {
+    "small": 1.0,
+    "medium": 0.5,
+    "large": 0.1,
+    "xlarge": 0.1,
+}
+
+LOG_MANAGER_SEED_DATA = [
+    {"acronym": "arg", "directory_name": "Site clássico", "path": "/app/logs/scielo.ar", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "bol", "directory_name": "Site clássico", "path": "/app/logs/scielo.bo", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "chl", "directory_name": "Site clássico", "path": "/app/logs/scielo.cl", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "col", "directory_name": "Site clássico", "path": "/app/logs/scielo.co", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "cri", "directory_name": "Site clássico", "path": "/app/logs/scielo.cr", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "cub", "directory_name": "Site clássico", "path": "/app/logs/scielo.cu", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "data", "directory_name": "Site clássico", "path": "/app/logs/dataverse", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "dataverse"},
+    {"acronym": "dom", "directory_name": "Site novo", "path": "/app/logs/scielo.dom", "quantity": 1, "start_date": "2026-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac"},
+    {"acronym": "ecu", "directory_name": "Site clássico", "path": "/app/logs/scielo.ec", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "esp", "directory_name": "Site clássico", "path": "/app/logs/scielo.es", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "mex", "directory_name": "Site clássico", "path": "/app/logs/scielo.mx", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "per", "directory_name": "Site clássico", "path": "/app/logs/scielo.pe", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "preprints", "directory_name": "Site clássico", "path": "/app/logs/submission-node01", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "preprints"},
+    {"acronym": "prt", "directory_name": "Site clássico", "path": "/app/logs/scielo.pt", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "pry", "directory_name": "Site clássico", "path": "/app/logs/scielo.py", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "psi", "directory_name": "Site clássico", "path": "/app/logs/scielo.pepsic", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "rve", "directory_name": "Site clássico", "path": "/app/logs/scielo.revenf", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "rvt", "directory_name": "Site clássico", "path": "/app/logs/scielo.revtur", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "scl", "directory_name": "Site novo", "path": "/app/logs/scielo.br", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac"},
+    {"acronym": "spa", "directory_name": "Site novo - versão prévia", "path": "/app/logs/scielo.sp", "quantity": 2, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac_alpha"},
+    {"acronym": "sss", "directory_name": "Site clássico", "path": "/app/logs/scielo.ss", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "sza", "directory_name": "Site clássico", "path": "/app/logs/scielo.za", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "ury", "directory_name": "Site clássico", "path": "/app/logs/scielo.uy", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "ven", "directory_name": "Site clássico", "path": "/app/logs/scielo.ve", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "wid", "directory_name": "Site clássico", "path": "/app/logs/scielo.wi", "quantity": 2, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
+    {"acronym": "books", "directory_name": "SciELO Books", "path": "/app/logs/books", "quantity": 1, "start_date": "2012-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "books"},
+]
diff --git a/config/menu.py b/config/menu.py
index 13371c6..844ce0c 100644
--- a/config/menu.py
+++ b/config/menu.py
@@ -1,13 +1,10 @@
 WAGTAIL_MENU_APPS_ORDER = {
-    "collection": 100,
-    "article": 200,
-    "journal": 300,
-    "resources": 400,
-    "log_manager": 500,
-    "log_manager_config": 600,
-    "metrics": 700,
-    "tasks": 800,
-    "unexpected-error": 900,
+    "metadata": 100,
+    "resources": 200,
+    "log_manager": 300,
+    "tracker": 400,
+    "metrics": 500,
+    "tasks": 600,
 }
 
 def get_menu_order(app_name):
diff --git a/config/settings/base.py b/config/settings/base.py
index 4e96ed4..e4a99fa 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -5,7 +5,8 @@
 from pathlib import Path
 
 import environ
-from django.utils.translation import gettext_lazy as _
+
+from config.collections import COLLECTION_ACRON3_SIZE_MAP  # noqa: F401
 
 ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
 # core/
@@ -114,14 +115,15 @@
     "core.users",
     "core_settings",
     # Your stuff: custom apps go here
-    "article",
     "collection",
     "core",
-    "journal",
+    "document",
     "log_manager",
     "log_manager_config",
     "metrics",
+    "reports",
     "resources",
+    "source",
     "tracker",
 ]
 
@@ -404,36 +406,54 @@
 
 SEARCH_PAGINATION_ITEMS_PER_PAGE = 10
 
-# Elasticsearch
+# OpenSearch
 # ------------------------------------------------------------------------------
-ES_URL = env("ES_URL", default="http://192.168.0.33:9200/")
-ES_INDEX_NAME = env("ES_INDEX_NAME", default="usage")
-ES_API_KEY = env("ES_API_KEY", default="")
-ES_BASIC_AUTH = env("ES_BASIC_AUTH", default=("elastic", "iHktg66E"))
-ES_VERIFY_CERTS = env.bool("ES_VERIFY_CERTS", default=False)
+OPENSEARCH_URL = env("OPENSEARCH_URL", default="http://localhost:9200/")
+OPENSEARCH_INDEX_NAME = env("OPENSEARCH_INDEX_NAME", default="usage")
+OPENSEARCH_API_KEY = env("OPENSEARCH_API_KEY", default="")
+OPENSEARCH_BASIC_AUTH = env(
+    "OPENSEARCH_BASIC_AUTH",
+    default=("admin", "admin"),
+)
+OPENSEARCH_VERIFY_CERTS = env.bool(
+    "OPENSEARCH_VERIFY_CERTS",
+    default=False,
+)
+
+# Collectors configuration
+# ------------------------------------------------------------------------------
+# ArticleMeta
+ARTICLEMETA_COLLECT_URL = env(
+    "ARTICLEMETA_COLLECT_URL",
+    default="http://articlemeta.scielo.org/api/v1/article/counter_dict",
+)
+ARTICLEMETA_MAX_RETRIES = env.int("ARTICLEMETA_MAX_RETRIES", default=5)
+ARTICLEMETA_SLEEP_TIME = env.int("ARTICLEMETA_SLEEP_TIME", default=30)
+
+# Dataverse
+DATAVERSE_ENDPOINT = env("DATAVERSE_ENDPOINT", default="https://data.scielo.org/api")
+DATAVERSE_ROOT_COLLECTION = env("DATAVERSE_ROOT_COLLECTION", default="scielodata")
+DATAVERSE_SLEEP_TIME = env.int("DATAVERSE_SLEEP_TIME", default=30)
+
+# OPAC
+OPAC_ENDPOINT = env("OPAC_ENDPOINT", default="https://www.scielo.br/api/v1/counter_dict")
+OPAC_MAX_RETRIES = env.int("OPAC_MAX_RETRIES", default=5)
+OPAC_SLEEP_TIME = env.int("OPAC_SLEEP_TIME", default=30)
+
+# Preprints
+OAI_PMH_PREPRINT_ENDPOINT = env(
+    "OAI_PMH_PREPRINT_ENDPOINT",
+    default="https://preprints.scielo.org/index.php/scielo/oai",
+)
+OAI_METADATA_PREFIX = env("OAI_METADATA_PREFIX", default="oai_dc")
+OAI_PMH_MAX_RETRIES = env.int("OAI_PMH_MAX_RETRIES", default=5)
+
+# SciELO Books
+SCIELO_BOOKS_BASE_URL = env("SCIELO_BOOKS_BASE_URL", default="http://localhost:5984")
+SCIELO_BOOKS_TIMEOUT = env.int("SCIELO_BOOKS_TIMEOUT", default=60)
+SCIELO_BOOKS_DB_NAME = env("SCIELO_BOOKS_DB_NAME", default="scielobooks_1a")
+SCIELO_BOOKS_LIMIT = env.int("SCIELO_BOOKS_LIMIT", default=1000)
 
 # Collection size categories
 # ------------------------------------------------------------------------------
-EXTRA_LARGE_COLLECTIONS = env.list("EXTRA_LARGE_COLLECTIONS", default=["scl"])
-LARGE_COLLECTIONS = env.list("LARGE_COLLECTIONS", default=["chl", "col", "mex"])
-MEDIUM_COLLECTIONS = env.list("MEDIUM_COLLECTIONS", default=["cri", "esp", "psi", "prt", "ven"])
-SMALL_COLLECTIONS = env.list("SMALL_COLLECTIONS", default=["arg", "bol", "cub", "data", "ecu", "per", "preprints", "pry", "rve", "spa", "sss", "sza", "ury", "wid"])
-
-# Collection size mapping
-def _build_collection_size_map():
-    """Build mapping of collection acronyms to their size categories."""
-    size_map = {}
-    size_categories = {
-        "xlarge": EXTRA_LARGE_COLLECTIONS,
-        "large": LARGE_COLLECTIONS,
-        "medium": MEDIUM_COLLECTIONS,
-        "small": SMALL_COLLECTIONS,
-    }
-    
-    for size, collections in size_categories.items():
-        for acron3 in collections:
-            size_map[acron3] = size
-    
-    return size_map
-
-COLLECTION_ACRON3_SIZE_MAP = _build_collection_size_map()
+SUPPORTED_LOGFILE_EXTENSIONS = env.list("SUPPORTED_LOGFILE_EXTENSIONS", default=[".log", ".gz", ".zip"])
diff --git a/core/collectors/__init__.py b/core/collectors/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/core/collectors/__init__.py
@@ -0,0 +1 @@
+
diff --git a/core/collectors/articlemeta.py b/core/collectors/articlemeta.py
new file mode 100644
index 0000000..7f6ace0
--- /dev/null
+++ b/core/collectors/articlemeta.py
@@ -0,0 +1,60 @@
+import logging
+
+import requests
+from django.conf import settings
+from articlemeta.client import RestfulClient, ThriftClient
+from time import sleep
+
+
+def fetch_article_counter_dict(
+    from_date,
+    until_date,
+    offset=0,
+    limit=1000,
+    collection=None,
+    issn=None,
+):
+    for attempt in range(1, settings.ARTICLEMETA_MAX_RETRIES + 1):
+        params = {
+            "from": from_date,
+            "until": until_date,
+            "offset": offset,
+            "limit": limit,
+        }
+
+        if collection:
+            params["collection"] = collection
+
+        if issn:
+            params["issn"] = issn
+
+        response = requests.get(settings.ARTICLEMETA_COLLECT_URL, params=params)
+
+        try:
+            response.raise_for_status()
+            logging.info(response.url)
+        except requests.exceptions.HTTPError:
+            logging.warning(
+                "Failed to collect data from %s. Waiting %d seconds before retry %d of %d",
+                response.url,
+                settings.ARTICLEMETA_SLEEP_TIME,
+                attempt,
+                settings.ARTICLEMETA_MAX_RETRIES,
+            )
+            sleep(settings.ARTICLEMETA_SLEEP_TIME)
+        else:
+            return response.json()
+
+    return {}
+
+
+def iter_journals(collection="scl", mode="rest"):
+    if mode == "rest":
+        client = RestfulClient()
+    elif mode == "thrift":
+        client = ThriftClient()
+    else:
+        raise ValueError(f"Unsupported ArticleMeta mode: {mode}")
+
+    for journal in client.journals(collection=collection):
+        yield journal
diff --git a/core/collectors/dataverse.py b/core/collectors/dataverse.py
new file mode 100644
index 0000000..ca51fd7
--- /dev/null
+++ b/core/collectors/dataverse.py
@@ -0,0 +1,75 @@
+import logging
+
+import requests
+from django.conf import settings
+
+from core.utils import standardizer
+
+
+def _request_json(url):
+    try:
+        response = requests.get(url, timeout=settings.DATAVERSE_SLEEP_TIME)
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.RequestException as exc:
+        logging.error("Error fetching %s: %s", url, exc)
+        return {}
+
+
+def _get_subdataverses():
+    url = f"{settings.DATAVERSE_ENDPOINT}/dataverses/{settings.DATAVERSE_ROOT_COLLECTION}/contents"
+    return _request_json(url).get("data", [])
+
+
+def _get_datasets(subdataverse_id):
+    url = f"{settings.DATAVERSE_ENDPOINT}/dataverses/{subdataverse_id}/contents"
+    return _request_json(url).get("data", [])
+
+
+def _get_files(dataset_id):
+    url = f"{settings.DATAVERSE_ENDPOINT}/datasets/{dataset_id}/versions/:latest/files"
+    return _request_json(url).get("data", [])
+
+
+def iter_dataset_metadata(from_date=None, until_date=None):
+    for subdataverse in _get_subdataverses():
+        if subdataverse.get("type") != "dataverse":
+            continue
+
+        subdataverse_id = subdataverse["id"]
+        subdataverse_title = subdataverse["title"]
+
+        for dataset in _get_datasets(subdataverse_id):
+            if dataset.get("type") != "dataset":
+                continue
+
+            dataset_id = dataset["id"]
+            doi = standardizer.standardize_doi(dataset.get("persistentUrl"))
+            if not doi:
+                logging.warning("Dataset %s does not have a DOI.", dataset_id)
+                continue
+
+            publication_date = dataset.get("publicationDate")
+            if publication_date:
+                if (from_date and publication_date < from_date) or (
+                    until_date and publication_date > until_date
+                ):
+                    continue
+
+            for file_data in _get_files(dataset_id):
+                file_persistent_id = file_data["dataFile"].get("persistentId")
+                standardized_persistent_id = (
+                    standardizer.standardize_pid_generic(file_persistent_id)
+                    if file_persistent_id
+                    else None
+                )
+
+                yield {
+                    "title": subdataverse_title,
+                    "dataset_doi": doi,
+                    "dataset_published": publication_date,
+                    "file_id": file_data["dataFile"]["id"],
+                    "file_name": file_data["label"],
+                    "file_url": f"{settings.DATAVERSE_ENDPOINT}/access/datafile/{file_data['dataFile']['id']}",
+                    "file_persistent_id": standardized_persistent_id,
+                }
diff --git a/core/collectors/opac.py b/core/collectors/opac.py
new file mode 100644
index 0000000..94122b7
--- /dev/null
+++ b/core/collectors/opac.py
@@ -0,0 +1,33 @@
+import logging
+
+import requests
+from django.conf import settings
+from time import sleep
+
+
+def fetch_counter_dict(from_date, until_date, page=1):
+    for attempt in range(1, settings.OPAC_MAX_RETRIES + 1):
+        params = {
+            "begin_date": from_date,
+            "end_date": until_date,
+            "page": page,
+        }
+
+        response = requests.get(url=settings.OPAC_ENDPOINT, params=params, verify=False)
+
+        try:
+            response.raise_for_status()
+            logging.info(response.url)
+        except requests.exceptions.HTTPError:
+            logging.warning(
+                "Could not collect data from %s. Waiting %d seconds for attempt %d of %d",
+                response.url,
+                settings.OPAC_SLEEP_TIME,
+                attempt,
+                settings.OPAC_MAX_RETRIES,
+            )
+            sleep(settings.OPAC_SLEEP_TIME)
+        else:
+            return response.json()
+
+    return {}
diff --git a/core/collectors/preprints.py b/core/collectors/preprints.py
new file mode 100644
index 0000000..bead72c
--- /dev/null
+++ b/core/collectors/preprints.py
@@ -0,0 +1,55 @@
+from django.conf import settings
+from sickle import Sickle
+
+from core.utils import standardizer
+
+
+def iter_records(from_date, until_date):
+    oai_client = Sickle(
+        endpoint=settings.OAI_PMH_PREPRINT_ENDPOINT,
+        max_retries=settings.OAI_PMH_MAX_RETRIES,
+        verify=False,
+    )
+    records = oai_client.ListRecords(
+        **{
+            "metadataPrefix": settings.OAI_METADATA_PREFIX,
+            "from": from_date,
+            "until": until_date,
+        }
+    )
+
+    for record in records:
+        yield record
+
+
+def extract_record_data(record):
+    pid_generic = _extract_compatible_identifier(record.header.identifier)
+    text_langs = [
+        standardizer.standardize_language_code(language)
+        for language in record.metadata.get("language", [])
+    ]
+    publication_date = record.metadata.get("date", [""])[0]
+    default_language = text_langs[0] if text_langs else ""
+    publication_year = _extract_publication_year_from_date(publication_date)
+
+    return {
+        "pid_generic": pid_generic,
+        "text_langs": text_langs,
+        "publication_date": publication_date,
+        "default_language": default_language,
+        "publication_year": publication_year,
+    }
+
+
+def _extract_compatible_identifier(identifier):
+    try:
+        return identifier.split(":")[-1].split("/")[1]
+    except IndexError:
+        return ""
+
+
+def _extract_publication_year_from_date(date_str):
+    try:
+        return date_str[:4]
+    except IndexError:
+        return ""
diff --git a/core/collectors/scielo_books.py b/core/collectors/scielo_books.py
new file mode 100644
index 0000000..b1f2dd8
--- /dev/null
+++ b/core/collectors/scielo_books.py
@@ -0,0 +1,182 @@
+import logging
+
+import requests
+from django.conf import settings
+from urllib.parse import urlencode
+
+
+
+
+def build_url(base_url, params=None):
+    if not params:
+        return base_url
+    return f"{base_url}?{urlencode(params, doseq=True)}"
+
+
+def sanitize_raw_data(payload):
+    if not isinstance(payload, dict):
+        return payload
+
+    if "_id" not in payload:
+        return payload
+
+    sanitized = dict(payload)
+    sanitized["id"] = sanitized.pop("_id")
+    return sanitized
+
+
+def fetch_document(doc_id, base_url=None, db_name=None, headers=None):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    resolved_base_url = base_url or settings.SCIELO_BOOKS_BASE_URL
+    if not resolved_base_url:
+        logging.error("Sem base url definida para coleta de books")
+        raise ValueError("SCIELO_BOOKS_BASE_URL is not configured")
+
+    url = f"{resolved_base_url}/{db_name}/{doc_id}"
+    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response.raise_for_status()
+    payload = response.json()
+    return sanitize_raw_data(payload), url
+
+
+def fetch_changes_page(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    include_docs=False,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    resolved_base_url = base_url or settings.SCIELO_BOOKS_BASE_URL
+    if not resolved_base_url:
+        logging.error("Sem base url definida para coleta de books")
+        raise ValueError("SCIELO_BOOKS_BASE_URL is not configured")
+
+    params = {
+        "since": since,
+        "limit": limit,
+    }
+    if include_docs:
+        params["include_docs"] = "true"
+
+    url = build_url(f"{resolved_base_url}/{db_name}/_changes", params)
+    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response.raise_for_status()
+    payload = response.json()
+    return payload if isinstance(payload, dict) else {}
+
+
+def extract_changes(payload):
+    if isinstance(payload, dict) and isinstance(payload.get("results"), list):
+        return payload.get("results")
+    return []
+
+
+def extract_last_seq(payload):
+    if isinstance(payload, dict):
+        return payload.get("last_seq") or payload.get("seq")
+    return None
+
+
+def iter_changes(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    current_since = since or 0
+
+    while True:
+        payload = fetch_changes_page(
+            base_url=base_url,
+            db_name=db_name,
+            since=current_since,
+            limit=limit,
+            include_docs=False,
+            headers=headers,
+        )
+        changes = extract_changes(payload)
+        if not changes:
+            break
+
+        for change in changes:
+            yield change
+
+        last_seq = extract_last_seq(payload)
+        if last_seq is None or last_seq == current_since:
+            break
+        current_since = last_seq
+
+
+def iter_change_documents(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    current_since = since or 0
+
+    while True:
+        payload = fetch_changes_page(
+            base_url=base_url,
+            db_name=db_name,
+            since=current_since,
+            limit=limit,
+            include_docs=True,
+            headers=headers,
+        )
+        changes = extract_changes(payload)
+        if not changes:
+            break
+
+        for change in changes:
+            doc_id = change.get("id")
+            if not doc_id:
+                continue
+
+            deleted = bool(change.get("deleted"))
+            raw_doc = change.get("doc") or {}
+            if deleted:
+                yield {
+                    "change": change,
+                    "deleted": True,
+                    "payload": None,
+                    "source_url": None,
+                }
+                continue
+
+            if raw_doc:
+                sanitized = sanitize_raw_data(raw_doc)
+                yield {
+                    "change": change,
+                    "deleted": False,
+                    "payload": sanitized,
+                    "source_url": f"{(base_url or settings.SCIELO_BOOKS_BASE_URL)}/{db_name}/{doc_id}",
+                }
+                continue
+
+            document_payload, source_url = fetch_document(
+                doc_id=doc_id,
+                base_url=base_url,
+                db_name=db_name,
+                headers=headers,
+            )
+            yield {
+                "change": change,
+                "deleted": False,
+                "payload": document_payload,
+                "source_url": source_url,
+            }
+
+        last_seq = extract_last_seq(payload)
+        if last_seq is None or last_seq == current_since:
+            break
+        current_since = last_seq
diff --git a/core/models.py b/core/models.py
index 1aeab73..2a4ecbf 100644
--- a/core/models.py
+++ b/core/models.py
@@ -11,7 +11,7 @@
 from wagtailautocomplete.edit_handlers import AutocompletePanel
 
 from . import choices
-from .utils.utils import language_iso
+from .utils.standardizer import language_iso
 
 User = get_user_model()
 
diff --git a/article/__init__.py b/core/tests/__init__.py
similarity index 100%
rename from article/__init__.py
rename to core/tests/__init__.py
diff --git a/core/tests/tests_collectors.py b/core/tests/tests_collectors.py
new file mode 100644
index 0000000..6d13a7c
--- /dev/null
+++ b/core/tests/tests_collectors.py
@@ -0,0 +1,55 @@
+import unittest
+from unittest.mock import patch
+
+from core.collectors import scielo_books
+
+
+class SciELOBooksCollectorTests(unittest.TestCase):
+    def test_build_url_appends_query_params(self):
+        url = scielo_books.build_url(
+            "https://books.example/_changes",
+            {"since": 10, "limit": 100},
+        )
+
+        self.assertEqual(url, "https://books.example/_changes?since=10&limit=100")
+
+    def test_sanitize_raw_data_renames__id(self):
+        payload = {"_id": "abc123", "TYPE": "Monograph"}
+
+        sanitized = scielo_books.sanitize_raw_data(payload)
+
+        self.assertEqual(sanitized["id"], "abc123")
+        self.assertNotIn("_id", sanitized)
+        self.assertEqual(sanitized["TYPE"], "Monograph")
+
+    def test_extract_last_seq_accepts_both_couch_formats(self):
+        self.assertEqual(scielo_books.extract_last_seq({"last_seq": 123}), 123)
+        self.assertEqual(scielo_books.extract_last_seq({"seq": 456}), 456)
+
+    @patch("core.collectors.scielo_books.fetch_document")
+    @patch("core.collectors.scielo_books.fetch_changes_page")
+    def test_iter_change_documents_uses_docs_from_changes_payload(self, mock_fetch_changes_page, mock_fetch_document):
+        mock_fetch_changes_page.side_effect = [
+            {
+                "results": [
+                    {
+                        "seq": 10,
+                        "id": "book1",
+                        "doc": {"_id": "book1", "TYPE": "Monograph", "title": "Book One"},
+                    }
+                ],
+                "last_seq": 10,
+            },
+            {"results": [], "last_seq": 10},
+        ]
+
+        results = list(scielo_books.iter_change_documents(base_url="https://books.example", db_name="scielobooks_1a"))
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]["payload"]["id"], "book1")
+        self.assertEqual(results[0]["payload"]["TYPE"], "Monograph")
+        mock_fetch_document.assert_not_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/core/tests_date_utils.py b/core/tests/tests_date_utils.py
similarity index 87%
rename from core/tests_date_utils.py
rename to core/tests/tests_date_utils.py
index 9f2b657..8d4f9b6 100644
--- a/core/tests_date_utils.py
+++ b/core/tests/tests_date_utils.py
@@ -86,7 +86,15 @@ def test_extract_minute_second_key(self):
         key = extract_minute_second_key(dt)
         self.assertEqual(key, '30:45')
 
+    def test_extract_minute_second_key_returns_none_for_invalid_datetime(self):
+        self.assertIsNone(extract_minute_second_key(None))
+        self.assertIsNone(extract_minute_second_key("invalid-date"))
+
     def test_truncate_datetime_to_hour(self):
         dt = datetime(2023, 3, 15, 14, 30, 45)
         truncated = truncate_datetime_to_hour(dt)
         self.assertEqual(truncated, datetime(2023, 3, 15, 14, 0, 0))
+
+    def test_truncate_datetime_to_hour_returns_none_for_invalid_datetime(self):
+        self.assertIsNone(truncate_datetime_to_hour(None))
+        self.assertIsNone(truncate_datetime_to_hour("invalid-date"))
diff --git a/core/tests_standardizer.py b/core/tests_standardizer.py
deleted file mode 100644
index a50ff87..0000000
--- a/core/tests_standardizer.py
+++ /dev/null
@@ -1,201 +0,0 @@
-from django.test import TestCase
-
-from core.utils import standardizer
-
-
-class StandardizerStandardizeCodeAndNameTest(TestCase):
-
-    def test_standardize_code_and_name_returns_both(self):
-        expected = [{"code": "CE", "name": "Ceará"}]
-        text = "Ceará / CE"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-    def test_standardize_code_and_name_returns_acronym(self):
-        expected = [{"code": "CE", }]
-        text = "CE"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-    def test_standardize_code_and_name_returns_name(self):
-        expected = [{"name": "Ceará"}]
-        text = "Ceará"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-    def test_standardize_code_and_name_returns_more_than_one_both(self):
-        expected = [{"code": "CE", "name": "Ceará"},
-            {"code": "SP", "name": "São Paulo"}]
-        text = "Ceará / CE, São Paulo / SP"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-    def test_standardize_code_and_name_returns_more_than_one_acronym(self):
-        expected = [{"code": "CE", }, {"code": "SP", }]
-        text = "CE / SP"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-    def test_standardize_code_and_name_returns_more_than_one_name(self):
-        expected = [{"name": "Ceará"}, {"name": "São Paulo"}]
-        text = "Ceará - São Paulo"
-        result = standardizer.standardize_code_and_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertDictEqual(expected[i], item)
-
-
-class StandardizerStandardizeNameTest(TestCase):
-
-    def test_standardize_name(self):
-        expected = ["Txto 1", "Texto 2", "Texto 3"]
-        text = "Txto 1,    Texto 2,    Texto   3"
-        result = standardizer.standardize_name(text)
-        for i, item in enumerate(result):
-            with self.subTest(i):
-                self.assertEqual({"name": expected[i]}, item)
-
-
-class StandardizerStandardizeLanguageCode(TestCase):
-    def test_standardize_language_code_en_us_is_valid(self):
-        language_code = 'en-US'
-        standardized = standardizer.standardize_language_code(language_code)
-        self.assertEqual(standardized, 'en')
-
-    def test_standardize_language_code_esp_is_valid(self):
-        language_code = 'esp'
-        standardized = standardizer.standardize_language_code(language_code)
-        self.assertEqual(standardized, 'es')
-
-    def test_standardize_language_code_pt_br_is_valid(self):
-        language_code = 'pt-BR'
-        standardized = standardizer.standardize_language_code(language_code)
-        self.assertEqual(standardized, 'pt')
-
-    def test_standardize_language_code_es_is_valid(self):
-        language_code = 'spa'
-        standardized = standardizer.standardize_language_code(language_code)
-        self.assertEqual(standardized, 'es')
-
-    def test_standardize_language_code_en_gb_is_valid(self):
-        language_code = 'en-GB'
-        standardized = standardizer.standardize_language_code(language_code)
-        self.assertEqual(standardized, 'en')
-
-
-class StandardizerStandardizePIDV3(TestCase):
-    def test_standardize_pid_v3_is_valid(self):
-        pid_v3 = 'jGJccQ7bFdbz6wy3nfXGVdv'
-        standardized = standardizer.standardize_pid_v3(pid_v3)
-        self.assertEqual(standardized, 'jGJccQ7bFdbz6wy3nfXGVdv')
-
-
-class StandardizerStandardizePIDV2(TestCase):
-    def test_standardize_pid_v2_is_valid(self):
-        pid_v2 = 'S0102-67202020000100001'
-        standardized = standardizer.standardize_pid_v2(pid_v2)
-        self.assertEqual(standardized, 'S0102-67202020000100001')
-
-
-class StandardizerStandardizeDOI(TestCase):
-    def test_standardize_doi_is_valid(self):
-        doi = '10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-    def test_standardize_doi_is_valid_with_doi_prefix(self):
-        doi = 'doi:10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-    def test_standardize_doi_is_valid_with_http_prefix(self):
-        doi = 'http://doi.org/10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-    def test_standardize_doi_is_valid_with_https_prefix(self):
-        doi = 'https://doi.org/10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-    def test_standardize_doi_is_valid_with_doi_prefix_and_http_prefix(self):
-        doi = 'doi:http://doi.org/10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-    def test_standardize_doi_is_valid_with_doi_prefix_and_https_prefix(self):
-        doi = 'doi:https://doi.org/10.1590/S0102-67202020000100001'
-        standardized = standardizer.standardize_doi(doi)
-        self.assertEqual(standardized, '10.1590/S0102-67202020000100001')
-
-
-class TestStandardizeYearOfPublication(TestCase):
-    def test_standardize_year_of_publication_four_digit_year(self):
-        """Test that a four-digit year is returned as-is"""
-        year = "2023"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, "2023")
-    
-    def test_standardize_year_of_publication_integer_year(self):
-        """Test that an integer year is converted to string"""
-        year = 2023
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, "2023")
-    
-    def test_standardize_year_of_publication_year_range(self):
-        """Test that a year range returns the first year"""
-        year = "2020-2023"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, "2020")
-    
-    def test_standardize_year_of_publication_year_with_slash(self):
-        """Test that a year with slash returns the first year"""
-        year = "2020/2023"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, "2020")
-    
-    def test_standardize_year_of_publication_year_with_extra_text(self):
-        """Test that year with extra text extracts the year"""
-        year = "Published in 2023"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, "")
-    
-    def test_standardize_year_of_publication_invalid_year(self):
-        """Test that invalid year returns None or empty string"""
-        year = "invalid"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, '')
-    
-    def test_standardize_year_of_publication_empty_string(self):
-        """Test that empty string returns None or empty string"""
-        year = ""
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, '')
-    
-    def test_standardize_year_of_publication_none_input(self):
-        """Test that None input returns None"""
-        year = None
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, '')
-    
-    def test_standardize_year_of_publication_two_digit_year(self):
-        """Test that two-digit year is converted to four-digit year"""
-        year = "23"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, '')
-    
-    def test_standardize_year_of_publication_year_with_parentheses(self):
-        """Test that year in parentheses is extracted"""
-        year = "(2023)"
-        result = standardizer.standardize_year_of_publication(year)
-        self.assertEqual(result, '')
diff --git a/metrics/utils/file_utils.py b/core/utils/csv_utils.py
similarity index 100%
rename from metrics/utils/file_utils.py
rename to core/utils/csv_utils.py
diff --git a/core/utils/date_utils.py b/core/utils/date_utils.py
index 026d434..f20ffea 100644
--- a/core/utils/date_utils.py
+++ b/core/utils/date_utils.py
@@ -29,7 +29,7 @@ def get_date_obj(date_str: str, format: str = "%Y-%m-%d") -> datetime.date:
     try:
         return datetime.strptime(date_str, format).date()
     except (ValueError, TypeError):
-        ...
+        return None
 
 
 def get_date_range_str(from_date_str: str = None, until_date_str: str = None, days_to_go_back: int = None) -> tuple[str, str]:
@@ -99,12 +99,9 @@ def truncate_datetime_to_hour(dt):
     Returns:
     datetime: The truncated datetime object.
     """
-    if isinstance(dt, str):
-        try:
-            dt = datetime.strptime(dt, "%Y-%m-%d %H:%M:%S")
-        except ValueError:
-            logging.error("Invalid datetime string format. Expected '%Y-%m-%d %H:%M:%S'.")
-            return None
+    dt = _coerce_datetime(dt)
+    if dt is None:
+        return None
 
     return dt.replace(minute=0, second=0, microsecond=0)
 
@@ -119,11 +116,23 @@ def extract_minute_second_key(dt):
     Returns:
     str: A string in the format "MM:SS" representing the minute and second.
     """
+    dt = _coerce_datetime(dt)
+    if dt is None:
+        return None
+
+    return f"{dt.minute:02}:{dt.second:02}"
+
+
+def _coerce_datetime(dt):
+    if isinstance(dt, datetime):
+        return dt
+
     if isinstance(dt, str):
         try:
-            dt = datetime.strptime(dt, "%Y-%m-%d %H:%M:%S")
+            return datetime.strptime(dt, "%Y-%m-%d %H:%M:%S")
         except ValueError:
             logging.error("Invalid datetime string format. Expected '%Y-%m-%d %H:%M:%S'.")
             return None
 
-    return f"{dt.minute:02}:{dt.second:02}"
+    logging.error("Invalid datetime value: %r.", dt)
+    return None
diff --git a/core/utils/utils.py b/core/utils/request_utils.py
similarity index 90%
rename from core/utils/utils.py
rename to core/utils/request_utils.py
index 0397338..c4fbec6 100644
--- a/core/utils/utils.py
+++ b/core/utils/request_utils.py
@@ -1,8 +1,6 @@
 import logging
-import re
 
 import requests
-from langcodes import standardize_tag, tag_is_valid
 from tenacity import (
     retry,
     retry_if_exception_type,
@@ -17,13 +15,6 @@
 User = get_user_model()
 
 
-def language_iso(code):
-    code = re.split(r"-|_", code)[0] if code else ""
-    if tag_is_valid(code):
-        return standardize_tag(code)
-    return ""
-
-
 class RetryableError(Exception):
     """Recoverable error without having to modify the data state on the client
     side, e.g. timeouts, errors from network partitioning, etc.
@@ -92,4 +83,4 @@ def _get_user(request, username=None, user_id=None):
         if user_id:
             return User.objects.get(pk=user_id)
         if username:
-            return User.objects.get(username=username)
\ No newline at end of file
+            return User.objects.get(username=username)
diff --git a/core/utils/standardizer.py b/core/utils/standardizer.py
index 27b5cba..c228bf5 100644
--- a/core/utils/standardizer.py
+++ b/core/utils/standardizer.py
@@ -1,247 +1,77 @@
-import langcodes
 import re
 
-
-ITEMS_SEP_FOR_LOCATION = [";", ", ", "|", "/"]
-PARTS_SEP_FOR_LOCATION = [" - ", "- ", " -", ", ", "(", "/"]
-
-ITEMS_SEP_FOR_CITY = [",", "|"]
-PARTS_SEP_FOR_CITY = []
-
-
-def remove_extra_spaces(text):
-    text = text and text.strip()
-    if not text:
-        return text
-    # padroniza a quantidade de espaços
-    return " ".join([item.strip() for item in text.split() if item.strip()])
-
-
-def standardize_code_and_name(original):
-    """
-    Dado o texto original, identifica pares de code e nome.
-    Os separadores podem separar code e nome e/ou itens de lista.
-    Ex.: USP / Unicamp
-    São Paulo/SP, Rio de Janeiro/RJ
-    """
-    text_ = original
-    text_ = text_ and text_.strip()
-    if not text_:
-        return []
-
-    text_ = remove_extra_spaces(text_)
-    if not text_:
-        yield {"name": None}
-        return
-
-    items_separators = ITEMS_SEP_FOR_LOCATION
-    parts_separators = PARTS_SEP_FOR_LOCATION
-
-    PARTBR = "~PARTBR~"
-    LINEBR = "~LINEBR~"
-    for sep in items_separators:
-        text_ = text_.replace(sep, PARTBR)
-    for sep in parts_separators:
-        text_ = text_.replace(sep, PARTBR)
-
-    codes = []
-    names = []
-    for item in text_.split(PARTBR):
-        item = item.strip()
-        if not item:
-            continue
-        if len(item) == 2:
-            codes.append(item)
-        else:
-            names.append(item)
-
-    if len(names) == len(codes):
-        for acron, name in zip(codes, names):
-            yield {"code": acron, "name": name}
-    elif len(names) == 0:
-        for acron in codes:
-            yield {"code": acron}
-    elif len(codes) == 0:
-        for name in names:
-            yield {"name": name}
-    else:
-        # como o texto está bem fora do padrão,
-        # pode-se evidenciar retornando o original
-        yield {"name": original}
-
-
-def standardize_name(original):
-    original = original and original.strip()
-    if not original:
-        return
-
-    items_separators = ITEMS_SEP_FOR_CITY
-
-    LINEBR = "~LINEBR~"
-
-    text_ = original
-    text_ = remove_extra_spaces(text_)
-
-    for sep in items_separators:
-        text_ = text_.replace(sep, LINEBR)
-
-    for row in text_.split(LINEBR):
-        row = row and row.strip()
-        if not row:
-            continue
-        yield {"name": row}
+import langcodes
 
 
 def standardize_language_code(language_code: str, threshold=0.75):
-    """
-    Standardizes a media language using langcodes library.
-
-    Parameters:
-    media_language (str): The media language to be standardized.
-    threshold (float): The minimum score for a language to be considered valid. Default is 0.75.
-
-    Returns:
-    str: The standardized media language or None if the input is not a valid language tag.
-    """
-    if not language_code:
-        return 'un'
-    
-    if langcodes.tag_is_valid(language_code):
-        return langcodes.standardize_tag(language_code).split('-')[0]
-    
-    # Handle special cases
-    if language_code.lower() == 'esp':
-        return 'es'
-
-    inferred_lang, score = langcodes.best_match(language_code, langcodes.LANGUAGE_ALPHA3.keys())
-
-    if score >= threshold:
-        return langcodes.standardize_tag(inferred_lang).split('-')[0]
-
-    # Handle unknown languages
-    return 'un'
+    language_code = str(language_code).strip().strip("'\"")
+    lang = langcodes.get(language_code)
+    try:
+        parts = str(lang).split("-")
+    except Exception:
+        return "un"
+    return parts[0]
 
 
 def standardize_pid_v2(pid_v2):
-    """
-    Standardizes a PID v2.
-
-    Parameters:
-    pid_v2 (str): The PID v2 to be standardized.
-
-    Returns:
-    str: The standardized PID v2 or an empty string if the input is not a valid PID v2.
-    """
-    if not pid_v2 or not pid_v2.lower().startswith('s') or len(pid_v2) < 23:
-        return ''
-    
+    if not pid_v2 or not pid_v2.lower().startswith("s") or len(pid_v2) < 23:
+        return ""
+
     if len(pid_v2) == 23:
         return pid_v2[0].upper() + pid_v2[1:]
-    
+
     if len(pid_v2) > 23:
         return pid_v2[0].upper() + pid_v2[1:23]
-    
-    if len(pid_v2) < 23:
-        return ''
 
+    return ""
 
-def standardize_pid_v3(pid_v3):
-    """
-    Standardizes a PID v3 using langcodes library."
-
-    Parameters:
-    pid_v3 (str): The PID v3 to be standardized.
 
-    Returns:
-    str: The standardized PID v3 or an empty string if the input is not a valid PID v3.
-    """
-
-    if not pid_v3:
-        return ''
-
-    if len(pid_v3) == 23:
-        return pid_v3
-    
-    if len(pid_v3) > 23:
-        return pid_v3[:23]
-    
-    if len(pid_v3) < 23:
-        return ''
+def standardize_pid_v3(pid_v3):
+    return str(pid_v3 or "")
 
 
 def standardize_doi(text):
-    """"
-    Standardizes a DOI.
-    
-    Parameters:
-    text (str): The DOI to be standardized.
-
-    Returns:
-    str: The standardized DOI
-    """
-    PATTERNS_DOI = [re.compile(pd) for pd in [
-        r'10.\d{4,9}/[-._;()/:A-Z0-9]+$',
-        r'10.1002/[^\s]+$',
-        r'10.\d{4}/\d+-\d+X?(\d+)\d+<[\d\w]+:[\d\w]*>\d+.\d+.\w+;\d$',
-        r'10.1207/[\w\d]+\&\d+_\d+$',
-        r'10.\d{4,9}/[-._;()/:a-zA-Z0-9]*']
+    text = (text or "").strip()
+    if not text:
+        return ""
+
+    doi_prefixes = [
+        "https://doi.org/",
+        "http://doi.org/",
+        "https://dx.doi.org/",
+        "http://dx.doi.org/",
+        "doi.org/",
+        "dx.doi.org/",
+        "doi:",
     ]
-    matched_doi = False
-
-    for pattern_doi in PATTERNS_DOI:
-        matched_doi = pattern_doi.search(text)
-        if matched_doi:
+    for prefix in doi_prefixes:
+        if text.lower().startswith(prefix):
+            text = text[len(prefix):]
             break
 
-    if not matched_doi:
-        return  
-    
-    return matched_doi.group().upper()
+    if text.lower().startswith("10."):
+        return text
+
+    return ""
 
 
 def standardize_pid_generic(pid_generic):
-    """
-    Standardizes a PID."
-    
-    Parameters:
-    pid_generic (str): The PID to be standardized.
-
-    Returns:
-    str: The standardized PID or an empty string if the input is not a valid PID.
-    """
-
-    if not pid_generic:
-        return ''
-    
-    pid_generic_based_on_doi = standardize_doi(pid_generic)
-    if pid_generic_based_on_doi:
-        return pid_generic_based_on_doi
-    
-    return pid_generic.strip().upper()
+    value = str(pid_generic or "").strip().upper()
+    value = re.sub(r"\s+", "", value)
+    value = value.rstrip(".,;:")
+    return value or ""
 
 
 def standardize_year_of_publication(year_of_publication):
-    """
-    Standardizes a year of publication.
-
-    Parameters:
-        year_of_publication (str): The year of publication to be standardized.
-
-    Returns:
-        str: The standardized year of publication or an empty string if the input is not a valid year.
-    """
-    if not year_of_publication:
-        return ''
-    
-    # Truncate to 4 characters if longer
-    if isinstance(year_of_publication, str) and len(year_of_publication) > 4:
-        year_of_publication = year_of_publication[:4]
-
-    try:
-        year = int(year_of_publication)
-        if 1500 <= year <= 2100:
-            return str(year)
-    except ValueError:
-        pass
-   
-    return ''
+    value = str(year_of_publication or "").strip()
+    if not value:
+        return ""
+    match = re.match(r"(\d{4})", value)
+    return match.group(1) if match else ""
+
+
+def language_iso(code):
+    code = re.split(r"-|_", code)[0] if code else ""
+    if langcodes.tag_is_valid(code):
+        return langcodes.standardize_tag(code)
+    return ""
diff --git a/django_celery_beat/views.py b/django_celery_beat/views.py
index 3a4ddb0..b5cff84 100644
--- a/django_celery_beat/views.py
+++ b/django_celery_beat/views.py
@@ -21,6 +21,13 @@ def task_run(request):
 
     task = current_app.tasks.get(p_task.task)
 
+    if task is None:
+        messages.error(
+            request,
+            _("Task '{0}' not found in the Celery registry.").format(p_task.task),
+        )
+        return redirect(request.META.get("HTTP_REFERER"))
+
     kwargs = json.loads(p_task.kwargs)
     kwargs["user_id"] = request.user.id
 
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 6957700..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = ./_build
-APP = /app
-
-.PHONY: help livehtml apidocs Makefile
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -c .
-
-# Build, watch and serve docs with live reload
-livehtml:
-	sphinx-autobuild -b html --host 0.0.0.0 --port 9000 --watch $(APP) -c . $(SOURCEDIR) $(BUILDDIR)/html
-
-# Outputs rst files from django application code
-apidocs:
-	sphinx-apidoc -o $(SOURCEDIR)/api $(APP)
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -c .
diff --git a/docs/__init__.py b/docs/__init__.py
deleted file mode 100644
index 8772c82..0000000
--- a/docs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Included so that Django's startproject comment runs against the docs directory
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 51cd921..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-
-import os
-import sys
-
-import django
-
-if os.getenv("READTHEDOCS", default=False) == "True":
-    sys.path.insert(0, os.path.abspath(".."))
-    os.environ["DJANGO_READ_DOT_ENV_FILE"] = "True"
-    os.environ["USE_DOCKER"] = "no"
-else:
-    sys.path.insert(0, os.path.abspath("/app"))
-os.environ["DATABASE_URL"] = "sqlite:///readthedocs.db"
-os.environ["CELERY_BROKER_URL"] = os.getenv("REDIS_URL", "redis://redis:6379")
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
-django.setup()
-
-# -- Project information -----------------------------------------------------
-
-project = "SciELO Core"
-copyright = """2022, SciELO"""
-author = "SciELO"
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.napoleon",
-]
-
-# Add any paths that contain templates here, relative to this directory.
-# templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "alabaster"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ["_static"]
diff --git a/docs/howto.rst b/docs/howto.rst
deleted file mode 100644
index 9fae300..0000000
--- a/docs/howto.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-How To - Project Documentation
-======================================================================
-
-Get Started
-----------------------------------------------------------------------
-
-Documentation can be written as rst files in `core/docs`.
-
-
-To build and serve docs, use the commands::
-    
-    docker compose -f local.yml up docs
-
-
-
-Changes to files in `docs/_source` will be picked up and reloaded automatically.
-
-`Sphinx <https://www.sphinx-doc.org/>`_ is the tool used to build documentation.
-
-Docstrings to Documentation
-----------------------------------------------------------------------
-
-The sphinx extension `apidoc <https://www.sphinx-doc.org/en/master/man/sphinx-apidoc.html/>`_ is used to automatically document code using signatures and docstrings.
-
-Numpy or Google style docstrings will be picked up from project files and availble for documentation. See the `Napoleon <https://sphinxcontrib-napoleon.readthedocs.io/en/latest/>`_ extension for details.
-
-For an in-use example, see the `page source <_sources/users.rst.txt>`_ for :ref:`users`.
-
-To compile all docstrings automatically into documentation source files, use the command:
-    ::
-    
-        make apidocs
-
-
-This can be done in the docker container:
-    :: 
-        
-        docker run --rm docs make apidocs
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index b6c6ded..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-.. SciELO Content Manager  documentation master file, created by
-   sphinx-quickstart.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to SciELO Core's documentation!
-======================================================================
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   howto
-   users
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 4f70eed..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,46 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build -c .
-)
-set SOURCEDIR=_source
-set BUILDDIR=_build
-set APP=..\core
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.Install sphinx-autobuild for live serving.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -b %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:livehtml
-sphinx-autobuild -b html --open-browser -p 9000 --watch %APP% -c . %SOURCEDIR% %BUILDDIR%/html
-GOTO :EOF
-
-:apidocs
-sphinx-apidoc -o %SOURCEDIR%/api %APP%
-GOTO :EOF
-
-:help
-%SPHINXBUILD% -b help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/users.rst b/docs/users.rst
deleted file mode 100644
index 21e08aa..0000000
--- a/docs/users.rst
+++ /dev/null
@@ -1,15 +0,0 @@
- .. _users:
-
-Users
-======================================================================
-
-Starting a new project, it’s highly recommended to set up a custom user model, 
-even if the default User model is sufficient for you. 
-
-This model behaves identically to the default user model, 
-but you’ll be able to customize it in the future if the need arises.
-
-.. automodule:: core.users.models
-   :members:
-   :noindex:
-
diff --git a/document/__init__.py b/document/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/__init__.py
@@ -0,0 +1 @@
+
diff --git a/journal/apps.py b/document/apps.py
similarity index 62%
rename from journal/apps.py
rename to document/apps.py
index e10a171..eb482d2 100644
--- a/journal/apps.py
+++ b/document/apps.py
@@ -1,6 +1,6 @@
 from django.apps import AppConfig
 
 
-class JournalConfig(AppConfig):
+class DocumentConfig(AppConfig):
     default_auto_field = "django.db.models.BigAutoField"
-    name = "journal"
+    name = "document"
diff --git a/document/management/__init__.py b/document/management/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/management/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/management/commands/__init__.py b/document/management/commands/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/management/commands/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/management/commands/load_articles_by_year.py b/document/management/commands/load_articles_by_year.py
new file mode 100644
index 0000000..a922456
--- /dev/null
+++ b/document/management/commands/load_articles_by_year.py
@@ -0,0 +1,80 @@
+from django.core.management.base import BaseCommand
+
+from document.tasks import task_load_documents_from_article_meta
+from document.tasks import task_load_documents_from_opac
+
+
+class Command(BaseCommand):
+    help = "Generate task requests for loading document data by year"
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--start-year",
+            type=int,
+            default=1990,
+            help="Start year (default: 1990)",
+        )
+        parser.add_argument(
+            "--end-year",
+            type=int,
+            default=2025,
+            help="End year (default: 2025)",
+        )
+        parser.add_argument(
+            "--collection",
+            type=str,
+            default="scl",
+            help="Collection code (default: scl)",
+        )
+        parser.add_argument(
+            "--task",
+            choices=["load_documents_from_opac", "load_documents_from_article_meta"],
+            default="load_documents_from_opac",
+            help="Task to execute (default: load_documents_from_opac)",
+        )
+
+    def handle(self, *args, **options):
+        start_year = options["start_year"]
+        end_year = options["end_year"]
+        collection = options["collection"]
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"Generating task requests from {start_year} to {end_year} for collection: {collection}"
+            )
+        )
+
+        total_tasks = 0
+
+        for year in range(start_year, end_year + 1):
+            from_date = f"{year}-01-01"
+            until_date = f"{year}-12-31"
+
+            self.stdout.write(f"Queuing task for year {year}...")
+
+            if options["task"] == "load_documents_from_article_meta":
+                task_result = task_load_documents_from_article_meta.delay(
+                    from_date=from_date,
+                    until_date=until_date,
+                    collection=collection,
+                )
+            else:
+                task_result = task_load_documents_from_opac.delay(
+                    from_date=from_date,
+                    until_date=until_date,
+                    collection=collection,
+                )
+
+            total_tasks += 1
+
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"✓ Task queued for year {year}: {from_date} to {until_date} (Task ID: {task_result.id})"
+                )
+            )
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"\nCompleted! {total_tasks} tasks have been queued successfully."
+            )
+        )
diff --git a/document/migrations/0001_initial.py b/document/migrations/0001_initial.py
new file mode 100644
index 0000000..bff11be
--- /dev/null
+++ b/document/migrations/0001_initial.py
@@ -0,0 +1,279 @@
+# Generated by Django 5.0.7 on 2026-03-15 00:00
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+        ("source", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Document",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True,
+                        verbose_name="Creation date",
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True,
+                        verbose_name="Last update date",
+                    ),
+                ),
+                (
+                    "document_type",
+                    models.CharField(
+                        choices=[
+                            ("article", "Article"),
+                            ("preprint", "Preprint"),
+                            ("dataset", "Dataset"),
+                            ("book", "Book"),
+                            ("chapter", "Chapter"),
+                            ("other", "Other"),
+                        ],
+                        db_index=True,
+                        max_length=32,
+                        verbose_name="Document Type",
+                    ),
+                ),
+                (
+                    "document_id",
+                    models.CharField(
+                        db_index=True,
+                        max_length=255,
+                        verbose_name="Document ID",
+                    ),
+                ),
+                (
+                    "scielo_issn",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=9,
+                        null=True,
+                        verbose_name="SciELO ISSN",
+                    ),
+                ),
+                (
+                    "pid_v2",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=23,
+                        null=True,
+                        verbose_name="PID V2",
+                    ),
+                ),
+                (
+                    "pid_v3",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=23,
+                        null=True,
+                        verbose_name="PID V3",
+                    ),
+                ),
+                (
+                    "pid_generic",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=255,
+                        null=True,
+                        verbose_name="PID Generic",
+                    ),
+                ),
+                (
+                    "title",
+                    models.CharField(
+                        blank=True,
+                        max_length=500,
+                        null=True,
+                        verbose_name="Document Title",
+                    ),
+                ),
+                (
+                    "identifiers",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Identifiers",
+                    ),
+                ),
+                (
+                    "files",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Files",
+                    ),
+                ),
+                (
+                    "default_lang",
+                    models.CharField(
+                        blank=True,
+                        max_length=8,
+                        null=True,
+                        verbose_name="Default Language",
+                    ),
+                ),
+                (
+                    "text_langs",
+                    models.JSONField(
+                        blank=True,
+                        default=list,
+                        null=True,
+                        verbose_name="Text Languages",
+                    ),
+                ),
+                (
+                    "default_media_format",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Default Media Format",
+                    ),
+                ),
+                (
+                    "processing_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Processing Date",
+                    ),
+                ),
+                (
+                    "publication_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Publication Date",
+                    ),
+                ),
+                (
+                    "publication_year",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=4,
+                        null=True,
+                        verbose_name="Publication Year",
+                    ),
+                ),
+                (
+                    "extra_data",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Extra Data",
+                    ),
+                ),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "parent_document",
+                    models.ForeignKey(
+                        blank=True,
+                        db_index=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="child_documents",
+                        to="document.document",
+                        verbose_name="Parent Document",
+                    ),
+                ),
+                (
+                    "source",
+                    models.ForeignKey(
+                        blank=True,
+                        db_index=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="documents",
+                        to="source.source",
+                        verbose_name="Source",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Document",
+                "verbose_name_plural": "Documents",
+                "unique_together": {("collection", "document_type", "document_id")},
+                "indexes": [
+                    models.Index(
+                        fields=["collection", "document_type"],
+                        name="document_collection_type_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "scielo_issn"],
+                        name="document_collection_issn_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "pid_v2"],
+                        name="document_collection_pidv2_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "pid_generic"],
+                        name="doc_coll_pidgen_idx",
+                    ),
+                ],
+            },
+        ),
+    ]
diff --git a/document/migrations/__init__.py b/document/migrations/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/migrations/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/models.py b/document/models.py
new file mode 100644
index 0000000..5197692
--- /dev/null
+++ b/document/models.py
@@ -0,0 +1,258 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+from source.models import Source
+
+
+class Document(CommonControlField):
+    DOCUMENT_TYPE_ARTICLE = "article"
+    DOCUMENT_TYPE_PREPRINT = "preprint"
+    DOCUMENT_TYPE_DATASET = "dataset"
+    DOCUMENT_TYPE_BOOK = "book"
+    DOCUMENT_TYPE_CHAPTER = "chapter"
+    DOCUMENT_TYPE_OTHER = "other"
+    DOCUMENT_TYPE_CHOICES = (
+        (DOCUMENT_TYPE_ARTICLE, _("Article")),
+        (DOCUMENT_TYPE_PREPRINT, _("Preprint")),
+        (DOCUMENT_TYPE_DATASET, _("Dataset")),
+        (DOCUMENT_TYPE_BOOK, _("Book")),
+        (DOCUMENT_TYPE_CHAPTER, _("Chapter")),
+        (DOCUMENT_TYPE_OTHER, _("Other")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source = models.ForeignKey(
+        Source,
+        verbose_name=_("Source"),
+        on_delete=models.CASCADE,
+        related_name="documents",
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    parent_document = models.ForeignKey(
+        "self",
+        verbose_name=_("Parent Document"),
+        on_delete=models.SET_NULL,
+        related_name="child_documents",
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    document_type = models.CharField(
+        verbose_name=_("Document Type"),
+        max_length=32,
+        choices=DOCUMENT_TYPE_CHOICES,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    document_id = models.CharField(
+        verbose_name=_("Document ID"),
+        max_length=255,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    scielo_issn = models.CharField(
+        verbose_name=_("SciELO ISSN"),
+        max_length=9,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_v2 = models.CharField(
+        verbose_name=_("PID V2"),
+        max_length=23,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_v3 = models.CharField(
+        verbose_name=_("PID V3"),
+        max_length=23,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_generic = models.CharField(
+        verbose_name=_("PID Generic"),
+        max_length=255,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    title = models.CharField(
+        verbose_name=_("Document Title"),
+        max_length=500,
+        blank=True,
+        null=True,
+    )
+
+    identifiers = models.JSONField(
+        verbose_name=_("Identifiers"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    files = models.JSONField(
+        verbose_name=_("Files"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    default_lang = models.CharField(
+        verbose_name=_("Default Language"),
+        max_length=8,
+        blank=True,
+        null=True,
+    )
+
+    text_langs = models.JSONField(
+        verbose_name=_("Text Languages"),
+        null=True,
+        blank=True,
+        default=list,
+    )
+
+    default_media_format = models.CharField(
+        verbose_name=_("Default Media Format"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    processing_date = models.CharField(
+        verbose_name=_("Processing Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_date = models.CharField(
+        verbose_name=_("Publication Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_year = models.CharField(
+        verbose_name=_("Publication Year"),
+        max_length=4,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    extra_data = models.JSONField(
+        verbose_name=_("Extra Data"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    def __str__(self):
+        return f"{self.collection.acron3} - {self.document_type} - {self.document_id}"
+
+    @classmethod
+    def metadata(cls, collection=None):
+        queryset = cls.objects.select_related("collection", "source").only(
+            "collection__acron3",
+            "default_lang",
+            "default_media_format",
+            "document_id",
+            "document_type",
+            "extra_data",
+            "files",
+            "identifiers",
+            "parent_document__document_id",
+            "pid_generic",
+            "pid_v2",
+            "pid_v3",
+            "processing_date",
+            "publication_date",
+            "publication_year",
+            "scielo_issn",
+            "source__scielo_issn",
+            "source__source_id",
+            "source__source_type",
+            "text_langs",
+            "title",
+        )
+
+        if collection:
+            queryset = queryset.filter(collection=collection)
+
+        for document in queryset.iterator():
+            source = document.source
+            yield {
+                "collection": document.collection.acron3,
+                "default_lang": document.default_lang,
+                "default_media_format": document.default_media_format,
+                "document_id": document.document_id,
+                "document_type": document.document_type,
+                "extra_data": document.extra_data or {},
+                "files": document.files or {},
+                "identifiers": document.identifiers or {},
+                "parent_document_id": (
+                    document.parent_document.document_id if document.parent_document else None
+                ),
+                "pid_generic": document.pid_generic,
+                "pid_v2": document.pid_v2,
+                "pid_v3": document.pid_v3,
+                "processing_date": document.processing_date,
+                "publication_date": document.publication_date,
+                "publication_year": document.publication_year,
+                "scielo_issn": document.scielo_issn or (source.scielo_issn if source else None),
+                "source_id": source.source_id if source else None,
+                "source_type": source.source_type if source else None,
+                "text_langs": document.text_langs or [],
+                "title": document.title,
+            }
+
+    class Meta:
+        verbose_name = _("Document")
+        verbose_name_plural = _("Documents")
+        unique_together = (
+            "collection",
+            "document_type",
+            "document_id",
+        )
+        indexes = [
+            models.Index(
+                fields=["collection", "document_type"],
+                name="document_collection_type_idx",
+            ),
+            models.Index(
+                fields=["collection", "scielo_issn"],
+                name="document_collection_issn_idx",
+            ),
+            models.Index(
+                fields=["collection", "pid_v2"],
+                name="document_collection_pidv2_idx",
+            ),
+            models.Index(
+                fields=["collection", "pid_generic"],
+                name="doc_coll_pidgen_idx",
+            ),
+        ]
diff --git a/document/services/__init__.py b/document/services/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/services/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/services/articles.py b/document/services/articles.py
new file mode 100644
index 0000000..09244b3
--- /dev/null
+++ b/document/services/articles.py
@@ -0,0 +1,166 @@
+from document.models import Document
+
+from .common import build_document_id, compact_dict, get_existing_document, normalize_langs, normalize_year
+
+
+def upsert_article_document_from_articlemeta(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+):
+    pid_v2 = payload.get("code")
+    document_id = build_document_id(pid_v2, payload.get("pid_v3"), payload.get("pid_generic"))
+    if not document_id:
+        return None
+
+    document = get_existing_document(
+        collection,
+        Document.DOCUMENT_TYPE_ARTICLE,
+        document_id,
+        pid_v2,
+    )
+    created = document is None
+    if created:
+        document = Document(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id=document_id,
+        )
+        if user:
+            document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = source.scielo_issn if source else None
+        document.pid_v2 = pid_v2 or document.pid_v2
+        document.pid_v3 = payload.get("pid_v3") or document.pid_v3
+        document.pid_generic = payload.get("pid_generic") or document.pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = _merge_dicts(
+            document.identifiers,
+            _build_articlemeta_identifiers(payload, source),
+        )
+        document.files = payload.get("pdfs") or document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs"))
+        document.default_media_format = document.default_media_format
+        document.processing_date = payload.get("processing_date") or document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = _merge_dicts(
+            document.extra_data,
+            compact_dict(
+                {
+                    "provider": "articlemeta",
+                    "issn_codes": payload.get("code_title"),
+                }
+            ),
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def upsert_article_document_from_opac(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+):
+    pid_v2 = payload.get("pid_v2")
+    pid_v3 = payload.get("pid_v3")
+    document_id = build_document_id(pid_v2, pid_v3, payload.get("pid_generic"))
+    if not document_id:
+        return None
+
+    document = get_existing_document(
+        collection,
+        Document.DOCUMENT_TYPE_ARTICLE,
+        document_id,
+        pid_v2,
+        pid_v3,
+        payload.get("pid_generic"),
+    )
+    created = document is None
+    if created:
+        document = Document(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id=document_id,
+        )
+        if user:
+            document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = source.scielo_issn if source else None
+        document.pid_v2 = pid_v2 or document.pid_v2
+        document.pid_v3 = pid_v3 or document.pid_v3
+        document.pid_generic = payload.get("pid_generic") or document.pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = _merge_dicts(
+            document.identifiers,
+            _build_opac_identifiers(payload, source),
+        )
+        document.files = document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs")) or document.text_langs or []
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = _merge_dicts(
+            document.extra_data,
+            compact_dict(
+                {
+                    "provider": "opac",
+                    "journal_acronym": payload.get("journal_acronym"),
+                }
+            ),
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def _build_articlemeta_identifiers(payload, source):
+    return compact_dict(
+        {
+            "pid_v2": payload.get("code"),
+            "scielo_issn": source.scielo_issn if source else None,
+        }
+    )
+
+
+def _build_opac_identifiers(payload, source):
+    return compact_dict(
+        {
+            "pid_v2": payload.get("pid_v2"),
+            "pid_v3": payload.get("pid_v3"),
+            "scielo_issn": source.scielo_issn if source else None,
+            "journal_acronym": payload.get("journal_acronym"),
+        }
+    )
+
+
+def _merge_dicts(current, new_values):
+    merged = dict(current or {})
+    merged.update(new_values or {})
+    return merged
diff --git a/document/services/books.py b/document/services/books.py
new file mode 100644
index 0000000..96d92e1
--- /dev/null
+++ b/document/services/books.py
@@ -0,0 +1,256 @@
+from document.models import Document
+
+
+def build_book_pid_generic(book_id):
+    if book_id in (None, ""):
+        return None
+    return f"book:{book_id}"
+
+
+def build_chapter_pid_generic(book_id, chapter_id):
+    if book_id in (None, "") or chapter_id in (None, ""):
+        return None
+    return f"book:{book_id}/chapter:{chapter_id}"
+
+
+def enrich_part_payload(payload, monograph_payload):
+    if not monograph_payload:
+        return payload
+
+    enriched = dict(payload)
+    enriched["monograph_title"] = monograph_payload.get("title")
+    enriched["monograph_language"] = monograph_payload.get("language")
+    enriched["monograph_publication_date"] = monograph_payload.get("publication_date")
+    enriched["monograph_year"] = monograph_payload.get("year")
+    enriched["monograph_publisher"] = monograph_payload.get("publisher")
+    enriched["monograph_isbn"] = monograph_payload.get("isbn")
+    enriched["monograph_eisbn"] = monograph_payload.get("eisbn")
+    enriched["monograph_doi_number"] = monograph_payload.get("doi_number")
+    enriched["monograph_creators"] = monograph_payload.get("creators")
+    return enriched
+
+
+def upsert_monograph_document(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Monograph":
+        return None
+
+    book_id = str(payload.get("id"))
+    pid_generic = build_book_pid_generic(book_id)
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or book_id
+        document.identifiers = _build_monograph_identifiers(payload)
+        document.files = {}
+        document.default_lang = payload.get("language") or None
+        document.text_langs = _unique_list(payload.get("language"))
+        document.default_media_format = None
+        document.processing_date = None
+        document.publication_date = payload.get("publication_date") or None
+        document.publication_year = _normalize_year(payload.get("year"))
+        document.extra_data = _build_monograph_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def upsert_part_document(
+    payload,
+    collection,
+    source=None,
+    parent_document=None,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Part":
+        return None
+
+    book_id = payload.get("monograph")
+    chapter_id = payload.get("id")
+    pid_generic = build_chapter_pid_generic(book_id, chapter_id)
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_CHAPTER,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = parent_document
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or str(chapter_id)
+        document.identifiers = _build_part_identifiers(payload)
+        document.files = {}
+        document.default_lang = (
+            payload.get("text_language")
+            or payload.get("monograph_language")
+            or None
+        )
+        document.text_langs = _unique_list(
+            payload.get("text_language") or payload.get("monograph_language")
+        )
+        document.default_media_format = None
+        document.processing_date = None
+        document.publication_date = payload.get("monograph_publication_date") or None
+        document.publication_year = _normalize_year(payload.get("monograph_year"))
+        document.extra_data = _build_part_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def delete_book_document(collection, book_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=build_book_pid_generic(book_id),
+    ).delete()
+
+
+def delete_document_by_raw_id(collection, raw_id):
+    return Document.objects.filter(
+        collection=collection,
+        extra_data__raw_id=str(raw_id),
+    ).delete()
+
+
+def has_monograph_document_for_raw_id(collection, raw_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        extra_data__raw_id=str(raw_id),
+    ).exists()
+
+
+def get_monograph_document(collection, book_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=build_book_pid_generic(book_id),
+    ).first()
+
+
+def _build_monograph_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("isbn"),
+        "eisbn": payload.get("eisbn"),
+        "doi": payload.get("doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_part_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "chapter_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("monograph_isbn"),
+        "eisbn": payload.get("monograph_eisbn"),
+        "doi": payload.get("doi_number"),
+        "book_doi": payload.get("monograph_doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_monograph_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "city": payload.get("city"),
+        "country": payload.get("country"),
+        "pages": payload.get("pages"),
+        "publisher": payload.get("publisher"),
+        "creators": payload.get("creators"),
+        "translated_titles": payload.get("translated_titles"),
+        "translated_synopses": payload.get("translated_synopses"),
+        "synopsis": payload.get("synopsis"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _build_part_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "order": payload.get("order"),
+        "pages": payload.get("pages"),
+        "creators": payload.get("creators"),
+        "translated_titles": payload.get("translated_titles"),
+        "monograph_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "monograph_title": payload.get("monograph_title"),
+        "monograph_language": payload.get("monograph_language"),
+        "monograph_publication_date": payload.get("monograph_publication_date"),
+        "monograph_year": payload.get("monograph_year"),
+        "monograph_publisher": payload.get("monograph_publisher"),
+        "monograph_creators": payload.get("monograph_creators"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _unique_list(value):
+    if not value:
+        return []
+    return [value]
+
+
+def _normalize_year(value):
+    if value in (None, ""):
+        return None
+    return str(value)[:4]
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/document/services/common.py b/document/services/common.py
new file mode 100644
index 0000000..91e103d
--- /dev/null
+++ b/document/services/common.py
@@ -0,0 +1,58 @@
+from document.models import Document
+
+
+def build_document_id(*values):
+    for value in values:
+        if value not in (None, ""):
+            return str(value)
+    return None
+
+
+def get_existing_document(collection, document_type, *identifiers):
+    identifiers = [str(value) for value in identifiers if value not in (None, "")]
+    if not identifiers:
+        return None
+
+    queryset = Document.objects.filter(
+        collection=collection,
+        document_type=document_type,
+    )
+
+    for field_name in ("document_id", "pid_v2", "pid_v3", "pid_generic"):
+        for identifier in identifiers:
+            document = queryset.filter(**{field_name: identifier}).first()
+            if document:
+                return document
+
+    return None
+
+
+def normalize_langs(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return [item for item in value if item not in (None, "")]
+
+    if isinstance(value, dict):
+        return [key for key, enabled in value.items() if enabled]
+
+    return [value]
+
+
+def normalize_year(value, fallback_date=None):
+    if value not in (None, ""):
+        return str(value)[:4]
+
+    if fallback_date not in (None, ""):
+        return str(fallback_date)[:4]
+
+    return None
+
+
+def compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/document/services/datasets.py b/document/services/datasets.py
new file mode 100644
index 0000000..2496b20
--- /dev/null
+++ b/document/services/datasets.py
@@ -0,0 +1,69 @@
+from document.models import Document
+
+from .common import compact_dict, normalize_year
+
+
+def upsert_dataset_document(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+):
+    dataset_doi = payload.get("dataset_doi")
+    if not dataset_doi:
+        return None
+
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_DATASET,
+        document_id=dataset_doi,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        files = dict(document.files or {})
+        file_id = payload.get("file_id")
+        if file_id:
+            files[str(file_id)] = compact_dict(
+                {
+                    "name": payload.get("file_name"),
+                    "url": payload.get("file_url"),
+                    "file_persistent_id": payload.get("file_persistent_id"),
+                }
+            )
+
+        document.source = None
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = dataset_doi
+        document.title = payload.get("title") or document.title
+        document.identifiers = compact_dict(
+            {
+                "dataset_doi": dataset_doi,
+            }
+        )
+        document.files = files
+        document.default_lang = document.default_lang
+        document.text_langs = document.text_langs or []
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("dataset_published") or document.publication_date
+        document.publication_year = normalize_year(
+            None,
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = compact_dict(
+            {
+                "provider": "dataverse",
+            }
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
diff --git a/document/services/preprints.py b/document/services/preprints.py
new file mode 100644
index 0000000..4be89f1
--- /dev/null
+++ b/document/services/preprints.py
@@ -0,0 +1,58 @@
+from document.models import Document
+
+from .common import compact_dict, normalize_langs, normalize_year
+
+
+def upsert_preprint_document(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+):
+    pid_generic = payload.get("pid_generic")
+    if not pid_generic:
+        return None
+
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_PREPRINT,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = None
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = compact_dict(
+            {
+                "pid_generic": pid_generic,
+            }
+        )
+        document.files = document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs"))
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = compact_dict(
+            {
+                "provider": "preprints",
+            }
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
diff --git a/document/tasks/__init__.py b/document/tasks/__init__.py
new file mode 100644
index 0000000..95a0ba5
--- /dev/null
+++ b/document/tasks/__init__.py
@@ -0,0 +1,28 @@
+from .articlemeta import (
+    load_documents_from_article_meta,
+    task_load_documents_from_article_meta,
+)
+from .common import (
+    get_latest_scielo_books_last_seq,
+)
+from .dataverse import (
+    load_dataset_metadata_from_dataverse,
+    task_load_dataset_metadata_into_documents,
+)
+from .opac import (
+    load_documents_from_opac,
+    task_load_documents_from_opac,
+)
+from .pipeline import (
+    task_daily_metadata_sync_pipeline,
+)
+from .preprints import (
+    load_preprints_from_preprints_api,
+    task_load_preprints_into_documents,
+)
+from .scielo_books import (
+    load_documents_from_scielo_books,
+    sync_documents_from_scielo_books,
+    task_load_documents_from_scielo_books,
+    task_sync_documents_from_scielo_books,
+)
diff --git a/document/tasks/articlemeta.py b/document/tasks/articlemeta.py
new file mode 100644
index 0000000..75b2689
--- /dev/null
+++ b/document/tasks/articlemeta.py
@@ -0,0 +1,120 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import articlemeta as articlemeta_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import articles as article_service
+from source.services import journals as journal_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_documents_from_article_meta(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    collection=None,
+    issn=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading documents from Article Meta. From: %s, Until: %s, Collection: %s, ISSN: %s",
+        from_date,
+        until_date,
+        collection,
+        issn,
+    )
+
+    offset = 0
+    limit = 1000
+    while True:
+        response = articlemeta_collector.fetch_article_counter_dict(
+            from_date,
+            until_date,
+            offset=offset,
+            limit=limit,
+            collection=collection,
+            issn=issn,
+        )
+        objects = response.get("objects") or []
+        if not objects:
+            break
+
+        for payload in objects:
+            collection_obj = _get_collection(payload.get("collection") or collection)
+            if not collection_obj:
+                logging.info(
+                    "Collection not found for payload %s",
+                    payload.get("code"),
+                )
+                continue
+
+            source = journal_service.find_journal_source_by_issns(
+                collection_obj,
+                payload.get("code_title"),
+            )
+            if not source:
+                logging.info(
+                    "Source not found for collection %s and ISSNs %s",
+                    collection_obj.acron3,
+                    payload.get("code_title"),
+                )
+                continue
+
+            try:
+                article_service.upsert_article_document_from_articlemeta(
+                    payload,
+                    collection=collection_obj,
+                    source=source,
+                    user=user,
+                    force_update=force_update,
+                )
+            except DataError as exc:
+                logging.error(
+                    "Error saving Document from Article Meta. "
+                    "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
+                    collection_obj,
+                    source.source_id,
+                    payload.get('code'),
+                    exc
+                )
+                continue
+
+        offset += limit
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Article Meta)"), timelimit=-1, queue="load")
+def task_load_documents_from_article_meta(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    collection=None,
+    issn=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_article_meta(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        collection=collection,
+        issn=issn,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/common.py b/document/tasks/common.py
new file mode 100644
index 0000000..1645918
--- /dev/null
+++ b/document/tasks/common.py
@@ -0,0 +1,43 @@
+import logging
+
+from collection.models import Collection
+from document.models import Document
+from source.models import Source
+
+
+def _get_collection(acronym):
+    if not acronym:
+        return None
+    return Collection.objects.filter(acron3=acronym).first()
+
+
+def get_latest_scielo_books_last_seq(collection="books"):
+    document_last_seq = _get_latest_last_seq_from_queryset(
+        Document.objects.filter(collection__acron3=collection).only("extra_data")
+    )
+    source_last_seq = _get_latest_last_seq_from_queryset(
+        Source.objects.filter(
+            collection__acron3=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+        ).only("extra_data")
+    )
+    return max(document_last_seq, source_last_seq)
+
+
+def _get_latest_last_seq_from_queryset(queryset):
+    latest = 0
+    for item in queryset.iterator():
+        value = _coerce_last_seq((item.extra_data or {}).get("last_seq"))
+        if value is not None and value > latest:
+            latest = value
+    return latest
+
+
+def _coerce_last_seq(value):
+    if value in (None, ""):
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        logging.warning("Ignoring invalid SciELO Books last_seq value: %r", value)
+        return None
diff --git a/document/tasks/dataverse.py b/document/tasks/dataverse.py
new file mode 100644
index 0000000..15618a5
--- /dev/null
+++ b/document/tasks/dataverse.py
@@ -0,0 +1,80 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import dataverse as dataverse_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import datasets as dataset_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_dataset_metadata_from_dataverse(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading dataset metadata into documents. From: %s, Until: %s",
+        from_date,
+        until_date,
+    )
+
+    collection_obj = _get_collection("data")
+    if not collection_obj:
+        logging.error("Collection not found: data")
+        return False
+
+    for payload in dataverse_collector.iter_dataset_metadata(from_date, until_date):
+        if not payload.get("dataset_doi"):
+            logging.error("Dataset DOI not found in record: %s", payload)
+            continue
+
+        try:
+            dataset_service.upsert_dataset_document(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+            )
+        except DataError as exc:
+            logging.error(
+                "Error saving Dataset Document. Collection: %s, PID: %s. Error: %s",
+                collection_obj,
+                payload.get('dataset_doi'),
+                exc
+            )
+            continue
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Dataverse)"), timelimit=-1, queue="load")
+def task_load_dataset_metadata_into_documents(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_dataset_metadata_from_dataverse(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/opac.py b/document/tasks/opac.py
new file mode 100644
index 0000000..5e1c81e
--- /dev/null
+++ b/document/tasks/opac.py
@@ -0,0 +1,107 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import opac as opac_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import articles as article_service
+from source.services import journals as journal_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_documents_from_opac(
+    collection="scl",
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    page=1,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading documents from OPAC. From: %s, Until: %s, Collection: %s",
+        from_date,
+        until_date,
+        collection,
+    )
+
+    collection_obj = _get_collection(collection)
+    if not collection_obj:
+        logging.error("Collection not found: %s", collection)
+        return False
+
+    while True:
+        response = opac_collector.fetch_counter_dict(from_date, until_date, page=page)
+        documents = response.get("documents") or {}
+
+        for payload in documents.values():
+            source = journal_service.find_journal_source_by_acronym(
+                collection_obj,
+                payload.get("journal_acronym"),
+            )
+            if not source:
+                logging.info(
+                    "Source not found for collection %s and acronym %s",
+                    collection_obj.acron3,
+                    payload.get("journal_acronym"),
+                )
+                continue
+
+            try:
+                article_service.upsert_article_document_from_opac(
+                    payload,
+                    collection=collection_obj,
+                    source=source,
+                    user=user,
+                    force_update=force_update,
+                )
+            except DataError as exc:
+                logging.error(
+                    "Error saving Document from OPAC. "
+                    "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
+                    collection_obj,
+                    source.source_id,
+                    payload.get('pid_v2'),
+                    exc
+                )
+                continue
+
+        page += 1
+        if page > int(response.get("pages", 0)):
+            break
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (OPAC)"), timelimit=-1, queue="load")
+def task_load_documents_from_opac(
+    self,
+    collection="scl",
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    page=1,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_opac(
+        collection=collection,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        page=page,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/pipeline.py b/document/tasks/pipeline.py
new file mode 100644
index 0000000..97bef7c
--- /dev/null
+++ b/document/tasks/pipeline.py
@@ -0,0 +1,24 @@
+import logging
+
+from celery import group
+from django.utils.translation import gettext as _
+
+from config import celery_app
+
+from .articlemeta import task_load_documents_from_article_meta
+from .dataverse import task_load_dataset_metadata_into_documents
+from .opac import task_load_documents_from_opac
+from .preprints import task_load_preprints_into_documents
+from .scielo_books import task_sync_documents_from_scielo_books
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Daily Sync Routine (Auto)"), queue="load")
+def task_daily_metadata_sync_pipeline(self):
+    logging.info("Starting Daily Metadata Sync Pipeline")
+    group([
+        task_load_documents_from_article_meta.s(),
+        task_load_documents_from_opac.s(),
+        task_load_preprints_into_documents.s(),
+        task_load_dataset_metadata_into_documents.s(),
+        task_sync_documents_from_scielo_books.s(),
+    ]).apply_async()
diff --git a/document/tasks/preprints.py b/document/tasks/preprints.py
new file mode 100644
index 0000000..ee63211
--- /dev/null
+++ b/document/tasks/preprints.py
@@ -0,0 +1,82 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import preprints as preprints_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import preprints as preprint_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_preprints_from_preprints_api(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading preprints into documents. From: %s, Until: %s",
+        from_date,
+        until_date,
+    )
+
+    collection_obj = _get_collection("preprints")
+    if not collection_obj:
+        logging.error("Collection not found: preprints")
+        return False
+
+    for record in preprints_collector.iter_records(from_date, until_date):
+        payload = preprints_collector.extract_record_data(record)
+
+        if not payload.get("pid_generic"):
+            logging.error("Preprint ID not found in record: %s", record)
+            continue
+
+        try:
+            preprint_service.upsert_preprint_document(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+            )
+        except DataError as exc:
+            logging.error(
+                "Error saving Preprint Document. Collection: %s, PID: %s. Error: %s",
+                collection_obj,
+                payload.get('pid_generic'),
+                exc
+            )
+            continue
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Preprints)"), timelimit=-1, queue="load")
+def task_load_preprints_into_documents(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_preprints_from_preprints_api(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/scielo_books.py b/document/tasks/scielo_books.py
new file mode 100644
index 0000000..ddbd462
--- /dev/null
+++ b/document/tasks/scielo_books.py
@@ -0,0 +1,247 @@
+import logging
+
+from django.conf import settings
+from django.utils.translation import gettext as _
+
+from core.collectors import scielo_books as scielo_books_collector
+from core.utils.request_utils import _get_user
+from document.services import books as document_books_service
+from source.services import books as source_books_service
+
+from config import celery_app
+
+from .common import get_latest_scielo_books_last_seq
+
+
+def load_documents_from_scielo_books(
+    collection="books",
+    db_name=None,
+    since=0,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    collection_obj = source_books_service.get_books_collection(collection)
+    monograph_cache = {}
+
+    logging.info(
+        "Loading documents from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
+        collection,
+        db_name,
+        since,
+        limit,
+    )
+
+    for item in scielo_books_collector.iter_change_documents(
+        base_url=base_url,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        headers=headers,
+    ):
+        change = item["change"]
+        raw_id = change.get("id")
+
+        if item["deleted"]:
+            delete_source = document_books_service.has_monograph_document_for_raw_id(
+                collection_obj,
+                raw_id,
+            )
+            document_books_service.delete_document_by_raw_id(collection_obj, raw_id)
+            if delete_source:
+                source_books_service.delete_book_source(collection_obj, raw_id)
+            continue
+
+        payload = item["payload"] or {}
+        source_url = item.get("source_url")
+        last_seq = change.get("seq")
+
+        if payload.get("TYPE") == "Monograph":
+            source = source_books_service.upsert_monograph_source(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+                source_url=source_url,
+                last_seq=last_seq,
+            )
+            document_books_service.upsert_monograph_document(
+                payload,
+                collection=collection_obj,
+                source=source,
+                user=user,
+                force_update=force_update,
+                source_url=source_url,
+                last_seq=last_seq,
+            )
+            monograph_cache[str(payload.get("id"))] = payload
+            continue
+
+        if payload.get("TYPE") != "Part":
+            continue
+
+        monograph_payload = _get_monograph_payload(
+            payload,
+            monograph_cache=monograph_cache,
+            base_url=base_url,
+            db_name=db_name,
+            headers=headers,
+        )
+        if not monograph_payload:
+            logging.warning(
+                "Skipping part %s because monograph %s could not be loaded.",
+                payload.get("id"),
+                payload.get("monograph"),
+            )
+            continue
+
+        source = source_books_service.upsert_monograph_source(
+            monograph_payload,
+            collection=collection_obj,
+            user=user,
+            force_update=force_update,
+            source_url=None,
+            last_seq=last_seq,
+        )
+        parent_document = document_books_service.upsert_monograph_document(
+            monograph_payload,
+            collection=collection_obj,
+            source=source,
+            user=user,
+            force_update=force_update,
+            source_url=None,
+            last_seq=last_seq,
+        )
+        enriched_payload = document_books_service.enrich_part_payload(
+            payload,
+            monograph_payload,
+        )
+        document_books_service.upsert_part_document(
+            enriched_payload,
+            collection=collection_obj,
+            source=source,
+            parent_document=parent_document,
+            user=user,
+            force_update=force_update,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    return True
+
+
+def sync_documents_from_scielo_books(
+    collection="books",
+    db_name=None,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    since = get_latest_scielo_books_last_seq(collection=collection)
+    logging.info(
+        "Syncing documents from SciELO Books incrementally. Collection: %s, Since: %s, Limit: %s",
+        collection,
+        since,
+        limit,
+    )
+    return load_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Manual)"), queue="load")
+def task_load_documents_from_scielo_books(
+    self,
+    collection="books",
+    db_name=None,
+    since=0,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Incremental)"), queue="load")
+def task_sync_documents_from_scielo_books(
+    self,
+    collection="books",
+    db_name=None,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return sync_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+def _get_monograph_payload(payload, monograph_cache, base_url=None, db_name=None, headers=None):
+    monograph_id = payload.get("monograph")
+    if not monograph_id:
+        return None
+
+    monograph_key = str(monograph_id)
+    if monograph_key in monograph_cache:
+        return monograph_cache[monograph_key]
+
+    try:
+        monograph_payload, _ = scielo_books_collector.fetch_document(
+            doc_id=monograph_id,
+            base_url=base_url,
+            db_name=db_name or settings.SCIELO_BOOKS_DB_NAME,
+            headers=headers,
+        )
+    except Exception as exc:
+        logging.warning(
+            "Failed to fetch monograph %s for part %s: %s",
+            monograph_id,
+            payload.get("id"),
+            exc,
+        )
+        return None
+
+    monograph_cache[monograph_key] = monograph_payload
+    return monograph_payload
diff --git a/document/tests.py b/document/tests.py
new file mode 100644
index 0000000..14d9bcd
--- /dev/null
+++ b/document/tests.py
@@ -0,0 +1,255 @@
+from django.test import TestCase
+from unittest.mock import patch
+
+from collection.models import Collection
+from document import tasks as document_tasks
+from source.services import books as source_books_service
+from source.models import Source
+
+from .models import Document
+from .services import articles as article_service
+from .services import books as books_service
+from .services import datasets as dataset_service
+from .services import preprints as preprint_service
+
+
+class DocumentMetadataTests(TestCase):
+    def test_metadata_includes_source_context_and_legacy_identifiers(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            title="Test Journal",
+            identifiers={"scielo_issn": "1234-5678"},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id="S123456782024000100001",
+            scielo_issn="1234-5678",
+            pid_v2="S123456782024000100001",
+            pid_v3="abc123",
+            title="Test Article",
+            identifiers={"doi": "10.1590/example"},
+            files={"pt": {"path": "/pdf/test.pdf"}},
+            default_lang="en",
+            text_langs=["en", "pt"],
+            publication_date="2024-01-15",
+            publication_year="2024",
+        )
+
+        metadata = list(Document.metadata(collection=collection))
+
+        self.assertEqual(len(metadata), 1)
+        self.assertEqual(metadata[0]["document_type"], Document.DOCUMENT_TYPE_ARTICLE)
+        self.assertEqual(metadata[0]["document_id"], "S123456782024000100001")
+        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(metadata[0]["source_id"], "1234-5678")
+        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
+
+    def test_upsert_monograph_and_part_documents_from_books_payload(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        monograph_payload = {
+            "TYPE": "Monograph",
+            "id": "abcd1",
+            "title": "Sample Book",
+            "isbn": "9788578791889",
+            "eisbn": "9788578791880",
+            "doi_number": "10.1234/book",
+            "language": "pt",
+            "publication_date": "2024-05-20",
+            "year": "2024",
+            "publisher": "SciELO Books",
+        }
+        part_payload = {
+            "TYPE": "Part",
+            "id": "18",
+            "monograph": "abcd1",
+            "title": "Chapter 18",
+            "text_language": "es",
+            "order": "18",
+        }
+
+        source = source_books_service.upsert_monograph_source(
+            monograph_payload,
+            collection=collection,
+        )
+        parent_document = books_service.upsert_monograph_document(
+            monograph_payload,
+            collection=collection,
+            source=source,
+        )
+        chapter = books_service.upsert_part_document(
+            books_service.enrich_part_payload(part_payload, monograph_payload),
+            collection=collection,
+            source=source,
+            parent_document=parent_document,
+        )
+
+        self.assertEqual(parent_document.document_type, Document.DOCUMENT_TYPE_BOOK)
+        self.assertEqual(parent_document.document_id, "book:abcd1")
+        self.assertEqual(parent_document.pid_generic, "book:abcd1")
+        self.assertEqual(chapter.document_type, Document.DOCUMENT_TYPE_CHAPTER)
+        self.assertEqual(chapter.document_id, "book:abcd1/chapter:18")
+        self.assertEqual(chapter.parent_document, parent_document)
+        self.assertEqual(chapter.identifiers["book_id"], "abcd1")
+        self.assertEqual(chapter.default_lang, "es")
+
+    def test_articlemeta_and_opac_upsert_same_document(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+            identifiers={"scielo_issn": "1234-5678"},
+        )
+
+        first = article_service.upsert_article_document_from_articlemeta(
+            {
+                "code": "S123456782024000100001",
+                "title": "Article Title",
+                "pdfs": {"en": {"url": "/pdf/en.pdf"}},
+                "processing_date": "2024-02-10",
+                "publication_date": "2024-01-15",
+                "publication_year": "2024",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+                "code_title": ["1234-5678"],
+            },
+            collection=collection,
+            source=source,
+        )
+        second = article_service.upsert_article_document_from_opac(
+            {
+                "pid_v2": "S123456782024000100001",
+                "pid_v3": "S1234-56782024000100001",
+                "title": "Article Title",
+                "journal_acronym": "testjou",
+                "publication_date": "2024-01-15",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+            },
+            collection=collection,
+            source=source,
+        )
+
+        self.assertEqual(first.pk, second.pk)
+        self.assertEqual(Document.objects.count(), 1)
+        second.refresh_from_db()
+        self.assertEqual(second.pid_v3, "S1234-56782024000100001")
+        self.assertEqual(second.identifiers["journal_acronym"], "testjou")
+
+    def test_upsert_preprint_document_maps_metadata(self):
+        collection = Collection.objects.create(acron3="preprints", acron2="pp")
+
+        document = preprint_service.upsert_preprint_document(
+            {
+                "pid_generic": "preprint/123",
+                "title": "Preprint Title",
+                "text_langs": ["en", "pt"],
+                "default_language": "en",
+                "publication_date": "2024-01-20",
+                "publication_year": "2024",
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(document.document_type, Document.DOCUMENT_TYPE_PREPRINT)
+        self.assertEqual(document.document_id, "preprint/123")
+        self.assertEqual(document.pid_generic, "preprint/123")
+        self.assertEqual(document.default_lang, "en")
+
+    def test_upsert_dataset_document_accumulates_files(self):
+        collection = Collection.objects.create(acron3="data", acron2="dt")
+
+        dataset_service.upsert_dataset_document(
+            {
+                "title": "Dataset Title",
+                "dataset_doi": "10.1234/dataset",
+                "dataset_published": "2024-03-15",
+                "file_id": "1",
+                "file_name": "first.csv",
+                "file_url": "https://example.org/first.csv",
+                "file_persistent_id": "pid:first",
+            },
+            collection=collection,
+        )
+        document = dataset_service.upsert_dataset_document(
+            {
+                "title": "Dataset Title",
+                "dataset_doi": "10.1234/dataset",
+                "dataset_published": "2024-03-15",
+                "file_id": "2",
+                "file_name": "second.csv",
+                "file_url": "https://example.org/second.csv",
+                "file_persistent_id": "pid:second",
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(document.document_type, Document.DOCUMENT_TYPE_DATASET)
+        self.assertEqual(document.document_id, "10.1234/dataset")
+        self.assertEqual(set(document.files.keys()), {"1", "2"})
+
+
+class DocumentBooksSyncTests(TestCase):
+    def test_get_latest_scielo_books_last_seq_uses_documents_and_sources(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": "135"},
+        )
+
+        self.assertEqual(document_tasks.get_latest_scielo_books_last_seq("books"), 135)
+
+    def test_sync_documents_from_scielo_books_uses_computed_since(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": 135},
+        )
+
+        with patch("document.tasks.scielo_books.load_documents_from_scielo_books", return_value=True) as mocked:
+            result = document_tasks.sync_documents_from_scielo_books(
+                collection="books",
+                db_name="scielobooks_1a",
+                limit=500,
+            )
+
+        self.assertTrue(result)
+        mocked.assert_called_once_with(
+            collection="books",
+            db_name="scielobooks_1a",
+            since=135,
+            limit=500,
+            force_update=True,
+            headers=None,
+            base_url=None,
+            user=None,
+        )
diff --git a/article/wagtail_hooks.py b/document/wagtail_hooks.py
similarity index 50%
rename from article/wagtail_hooks.py
rename to document/wagtail_hooks.py
index 4cf55bd..de291c9 100644
--- a/article/wagtail_hooks.py
+++ b/document/wagtail_hooks.py
@@ -1,39 +1,35 @@
 from django.utils.translation import gettext_lazy as _
 from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
 
-from config.menu import get_menu_order
+from .models import Document
 
-from .models import Article
 
-
-class ArticleSnippetViewSet(SnippetViewSet):
-    model = Article
+class DocumentSnippetViewSet(SnippetViewSet):
+    model = Document
     icon = "folder-open-inverse"
-    menu_name = "article"
-    menu_label = _("Article")
-    menu_order = get_menu_order("article")
-    add_to_admin_menu = True
+    menu_label = _("Document")
+    menu_order = 300
 
     list_display = (
         "collection",
-        "scielo_issn",
+        "document_type",
+        "document_id",
+        "source",
+        "title",
         "pid_v2",
         "pid_v3",
         "pid_generic",
-        "files",
         "publication_year",
     )
     list_filter = (
         "collection",
-        "scielo_issn",
+        "document_type",
         "publication_year",
     )
     search_fields = (
-        "scielo_issn",
+        "document_id",
+        "title",
         "pid_v2",
         "pid_v3",
         "pid_generic",
     )
-
-register_snippet(ArticleSnippetViewSet)
diff --git a/journal/__init__.py b/journal/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/journal/admin.py b/journal/admin.py
deleted file mode 100644
index 8c38f3f..0000000
--- a/journal/admin.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
diff --git a/journal/migrations/0001_initial.py b/journal/migrations/0001_initial.py
deleted file mode 100644
index 7164bbc..0000000
--- a/journal/migrations/0001_initial.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Generated by Django 5.0.7 on 2025-02-07 17:50
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Journal",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "scielo_issn",
-                    models.CharField(max_length=9, verbose_name="SciELO ISSN"),
-                ),
-                (
-                    "issns",
-                    models.JSONField(
-                        blank=True, default=dict, null=True, verbose_name="ISSNs"
-                    ),
-                ),
-                (
-                    "acronym",
-                    models.CharField(
-                        blank=True,
-                        default="",
-                        max_length=32,
-                        null=True,
-                        verbose_name="Journal Acronym",
-                    ),
-                ),
-                (
-                    "title",
-                    models.CharField(max_length=255, verbose_name="Journal Title"),
-                ),
-                (
-                    "publisher_name",
-                    models.JSONField(
-                        blank=True,
-                        default=list,
-                        null=True,
-                        verbose_name="Publisher Name",
-                    ),
-                ),
-                (
-                    "subject_areas",
-                    models.JSONField(
-                        default=list, verbose_name="Subject Areas (CAPES)"
-                    ),
-                ),
-                (
-                    "wos_subject_areas",
-                    models.JSONField(default=list, verbose_name="Subject Areas (WoS)"),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Journal",
-                "verbose_name_plural": "Journals",
-                "unique_together": {("collection", "scielo_issn", "acronym")},
-            },
-        ),
-    ]
diff --git a/journal/migrations/0002_alter_journal_scielo_issn.py b/journal/migrations/0002_alter_journal_scielo_issn.py
deleted file mode 100644
index 07cf94f..0000000
--- a/journal/migrations/0002_alter_journal_scielo_issn.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("journal", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="journal",
-            name="scielo_issn",
-            field=models.CharField(
-                db_index=True, max_length=9, verbose_name="SciELO ISSN"
-            ),
-        ),
-    ]
diff --git a/journal/migrations/__init__.py b/journal/migrations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/journal/models.py b/journal/models.py
deleted file mode 100644
index 0d830e9..0000000
--- a/journal/models.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from django.db import models
-from django.utils.translation import gettext_lazy as _
-
-from core.models import CommonControlField
-from collection.models import Collection
-
-
-class Journal(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.CASCADE,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    scielo_issn = models.CharField(
-        verbose_name=_('SciELO ISSN'),
-        max_length=9,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    issns = models.JSONField(
-        verbose_name=_('ISSNs'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-    
-    acronym = models.CharField(
-        verbose_name=_('Journal Acronym'),
-        max_length=32,
-        blank=True,
-        null=True,
-        default='',
-    )
-
-    title = models.CharField(
-        verbose_name=_('Journal Title'),
-        max_length=255,
-        blank=False,
-        null=False,
-    )
-
-    publisher_name = models.JSONField(
-        verbose_name=_('Publisher Name'),
-        blank=True,
-        null=True,
-        default=list,
-    )
-
-    subject_areas = models.JSONField(
-        verbose_name=_('Subject Areas (CAPES)'),
-        null=False,
-        blank=False,
-        default=list,
-    )
-
-    wos_subject_areas = models.JSONField(
-        verbose_name=_('Subject Areas (WoS)'),
-        null=False,
-        blank=False,
-        default=list,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron2} - {self.scielo_issn} - {self.acronym}'
-
-    @classmethod
-    def metadata(cls, collection=None):
-        queryset = cls.objects.all()
-        if collection:
-            queryset = queryset.filter(collection=collection)
-
-        for journal in queryset.only(
-            'acronym', 'collection__acron3', 'issns', 'publisher_name',
-            'scielo_issn', 'subject_areas', 'title', 'wos_subject_areas'
-        ):
-            yield {
-                'acronym': journal.acronym,
-                'collection': journal.collection.acron3,
-                'issns': set([v for v in journal.issns.values() if v]),
-                'publisher_name': journal.publisher_name,
-                'scielo_issn': journal.scielo_issn,
-                'subject_areas': journal.subject_areas,
-                'title': journal.title,
-                'wos_subject_areas': journal.wos_subject_areas,
-            }
-
-    class Meta:
-        verbose_name = _('Journal')
-        verbose_name_plural = _('Journals')
-        unique_together = (
-            'collection',
-            'scielo_issn', 
-            'acronym',
-        )
diff --git a/journal/tasks.py b/journal/tasks.py
deleted file mode 100644
index 71681cb..0000000
--- a/journal/tasks.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import logging
-
-from django.contrib.auth import get_user_model
-from django.db import IntegrityError
-from django.utils import timezone
-from django.utils.translation import gettext as _
-
-from collection.models import Collection
-from config import celery_app
-from core.utils.utils import _get_user
-
-from . import models, utils
-
-
-User = get_user_model()
-
-
-@celery_app.task(bind=True, name=_('Load journal data from Article Meta'), queue='load')
-def task_load_journal_data_from_article_meta(self, collections=[], force_update=True, user_id=None, username=None, mode='thrift'):
-    user = _get_user(user_id, username)
-
-    for col in collections or Collection.acron3_list():
-        for j in utils.fetch_article_meta_journals(collection=col, mode=mode):
-            collection = Collection.objects.get(acron3=j.collection_acronym)
-            if not collection:
-                logging.error(f'Collection {j.collection_acronym} does not exist')
-                continue
-
-            try:
-                journal, created = models.Journal.objects.get_or_create(collection=collection, scielo_issn=j.scielo_issn)
-            except IntegrityError as e:
-                logging.error(f'Journal {j} has not been created due to error: {e}')
-                continue
-
-            if created:
-                journal.creator = user
-                journal.created = timezone.now()
-            
-            if created or force_update:
-                journal.updated_by = user
-                journal.updated = timezone.now()
-                journal.issns = {
-                    'electronic_issn': j.electronic_issn or '', 
-                    'print_issn': j.print_issn or '',
-                    'scielo_issn': j.scielo_issn
-                }
-                journal.acronym = j.acronym
-                journal.title = j.title
-                journal.publisher_name = j.publisher_name or ''
-                journal.subject_areas = j.subject_areas or []
-                journal.wos_subject_areas = j.wos_subject_areas or []
-                logging.info(f'Journal {"created" if created else "updated"}: {journal}')
-
-            journal.save()
-
-    return True
diff --git a/journal/tests.py b/journal/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/journal/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/journal/utils.py b/journal/utils.py
deleted file mode 100644
index 8a80521..0000000
--- a/journal/utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from articlemeta.client import ThriftClient, RestfulClient
-
-
-def fetch_article_meta_journals(collection='scl', mode='rest'):
-    """
-    Fetches article metadata from journals.
-
-    Returns
-    -------
-    list
-        A list of article metadata.
-    """
-    if mode == 'rest':
-        am = RestfulClient()
-    elif mode == 'thrift':
-        am = ThriftClient()
-    
-    for j in am.journals(collection=collection):
-        yield j
diff --git a/journal/views.py b/journal/views.py
deleted file mode 100644
index 91ea44a..0000000
--- a/journal/views.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.shortcuts import render
-
-# Create your views here.
diff --git a/journal/wagtail_hooks.py b/journal/wagtail_hooks.py
deleted file mode 100644
index 725b370..0000000
--- a/journal/wagtail_hooks.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
-
-from config.menu import get_menu_order
-
-from .models import Journal
-
-
-class JournalSnippetViewSet(SnippetViewSet):
-    model = Journal
-    icon = "folder-open-inverse"
-    menu_name = "journal"
-    menu_label = _("Journal")
-    menu_order = get_menu_order('journal')
-    add_to_admin_menu = True
-
-    list_display = (
-        "collection",
-        "scielo_issn",
-        "acronym",
-        "title",
-        "issns",
-        "publisher_name",
-        "subject_areas",
-        "wos_subject_areas",
-    )
-    list_filter = (
-        "collection",
-    )
-    search_fields = (
-        "issns",
-        "acronym",
-        "publisher_name",
-        "subject_areas",
-        "wos_subject_areas",
-    )
-
-
-register_snippet(JournalSnippetViewSet)
diff --git a/local.yml b/local.yml
index 3c25357..9b3a047 100644
--- a/local.yml
+++ b/local.yml
@@ -11,10 +11,15 @@ services:
       - mailhog
     volumes:
       - .:/app:z
-      - ../scms_data/scielo_usage/data/logs:/app/logs
+      - /mnt/pidata2/pi/scl/logs:/app/logs
+      # Uncomment to use local SciELO lib repos for development:
+      # - ../scielo_log_validator:/app/scielo_log_validator:z
+      # - ../scielo_usage_counter:/app/scielo_usage_counter:z
     env_file:
       - ./.envs/.local/.django
       - ./.envs/.local/.postgres
+    environment:
+      - USE_LOCAL_SCIELO_LIBS=0
     ports:
       - "8009:8000"
     command: /start
@@ -40,7 +45,7 @@ services:
       - "8029:8025"
 
   redis:
-    image: redis:6
+    image: redis:8
     container_name: scielo_usage_local_redis
     ports:
       - "6399:6379"
diff --git a/log_manager/choices.py b/log_manager/choices.py
index e98c8f2..c6e461a 100644
--- a/log_manager/choices.py
+++ b/log_manager/choices.py
@@ -19,13 +19,3 @@
     (LOG_FILE_STATUS_IGNORED, _("Ignored")),
 ]
 
-
-COLLECTION_LOG_FILE_DATE_COUNT_OK = 'OK'
-COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES = 'MIS'
-COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES = 'EXT'
-
-COLLECTION_LOG_FILE_DATE_COUNT = [
-    (COLLECTION_LOG_FILE_DATE_COUNT_OK, _("OK")),
-    (COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES, _("Missing Files")),
-    (COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES, _("Extra files")),
-]
diff --git a/log_manager/management/__init__.py b/log_manager/management/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/log_manager/management/__init__.py
@@ -0,0 +1 @@
+
diff --git a/log_manager/management/commands/__init__.py b/log_manager/management/commands/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/log_manager/management/commands/__init__.py
@@ -0,0 +1 @@
+
diff --git a/log_manager/management/commands/reset_log_catalog.py b/log_manager/management/commands/reset_log_catalog.py
new file mode 100644
index 0000000..5ded576
--- /dev/null
+++ b/log_manager/management/commands/reset_log_catalog.py
@@ -0,0 +1,94 @@
+from django.core.management.base import BaseCommand
+from django.db import transaction
+
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+from metrics.services import daily_payloads
+from reports.models import MonthlyLogReport, WeeklyLogReport, YearlyLogReport
+from tracker.models import LogFileDiscardedLine
+
+
+class Command(BaseCommand):
+    help = (
+        "Clear the log catalog stored in the database, including derived parsing "
+        "records, daily metric payloads, and optionally reports, "
+        "while preserving the source log files on disk."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--collection",
+            action="append",
+            dest="collections",
+            help="Collection acronym to limit cleanup. Repeat the option for multiple collections.",
+        )
+        parser.add_argument(
+            "--reports",
+            action="store_true",
+            default=False,
+            help="Also clear Weekly/Monthly/Yearly log reports for the selected collections.",
+        )
+
+    def handle(self, *args, **options):
+        collections = options.get("collections") or []
+        clear_reports = options.get("reports")
+
+        log_files = LogFile.objects.all()
+        if collections:
+            log_files = log_files.filter(collection__acron3__in=collections)
+
+        log_file_ids = list(log_files.values_list("id", flat=True))
+        if not log_file_ids:
+            self.stdout.write(self.style.WARNING("No log catalog rows found for cleanup."))
+            return
+
+        daily_jobs = DailyMetricJob.objects.all()
+        if collections:
+            daily_jobs = daily_jobs.filter(collection__acron3__in=collections)
+        payload_paths = list(daily_jobs.exclude(storage_path="").values_list("storage_path", flat=True))
+
+        summary = {
+            "log_files": len(log_file_ids),
+            "discarded_lines": LogFileDiscardedLine.objects.filter(
+                log_file_id__in=log_file_ids
+            ).count(),
+            "daily_metric_jobs": daily_jobs.count(),
+        }
+
+        for storage_path in payload_paths:
+            daily_payloads.delete_payload(storage_path)
+
+        with transaction.atomic():
+            LogFileDiscardedLine.objects.filter(log_file_id__in=log_file_ids).delete()
+            daily_jobs.delete()
+            LogFile.objects.filter(id__in=log_file_ids).delete()
+
+            if clear_reports:
+                report_qs = WeeklyLogReport.objects.all()
+                m_qs = MonthlyLogReport.objects.all()
+                y_qs = YearlyLogReport.objects.all()
+                if collections:
+                    report_qs = report_qs.filter(collection__acron3__in=collections)
+                    m_qs = m_qs.filter(collection__acron3__in=collections)
+                    y_qs = y_qs.filter(collection__acron3__in=collections)
+                summary["weekly_reports"] = report_qs.count()
+                summary["monthly_reports"] = m_qs.count()
+                summary["yearly_reports"] = y_qs.count()
+                report_qs.delete()
+                m_qs.delete()
+                y_qs.delete()
+
+        msg = (
+            f"Cleared log catalog: "
+            f"{summary['log_files']} log files, "
+            f"{summary['discarded_lines']} discarded lines, "
+            f"{summary['daily_metric_jobs']} daily metric jobs."
+        )
+        if clear_reports:
+            msg += (
+                f" Also cleared reports: "
+                f"{summary['weekly_reports']} weekly, "
+                f"{summary['monthly_reports']} monthly, "
+                f"{summary['yearly_reports']} yearly."
+            )
+        self.stdout.write(self.style.SUCCESS(msg))
diff --git a/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py b/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py
new file mode 100644
index 0000000..d30cdf4
--- /dev/null
+++ b/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py
@@ -0,0 +1,52 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("log_manager", "0009_collectionlogfiledatecount_exported_files_count"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="log_file",
+        ),
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="logfile",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfile",
+            name="updated_by",
+        ),
+        migrations.AddField(
+            model_name="logfile",
+            name="date",
+            field=models.DateField(
+                blank=True, db_index=True, null=True, verbose_name="Date"
+            ),
+        ),
+        migrations.AddField(
+            model_name="logfile",
+            name="parse_heartbeat_at",
+            field=models.DateTimeField(
+                blank=True, null=True, verbose_name="Parse Heartbeat At"
+            ),
+        ),
+        migrations.DeleteModel(
+            name="CollectionLogFileDateCount",
+        ),
+        migrations.DeleteModel(
+            name="LogFileDate",
+        ),
+    ]
diff --git a/log_manager/models.py b/log_manager/models.py
index fc3a8b6..6bf04d8 100644
--- a/log_manager/models.py
+++ b/log_manager/models.py
@@ -1,209 +1,20 @@
 import logging
 
-from django.db import models
-from django.db.models import Q
+from django.db import IntegrityError, models
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 from wagtail.admin.panels import FieldPanel
 from wagtailautocomplete.edit_handlers import AutocompletePanel
 
 from collection.models import Collection
-from core.forms import CoreAdminModelForm
-from core.models import CommonControlField
 
 from . import choices
 
 
-class LogFileDate(CommonControlField):
-    date = models.DateField(
-        verbose_name=_("Date"),
-        null=False,
-        blank=False,
-        db_index=True,
-    )
-
-    log_file = models.ForeignKey(
-        'LogFile',
-        verbose_name=_('Log File'),
-        blank=True,
-        on_delete=models.DO_NOTHING,
-        db_index=True,
-    )
-
-    base_form_class = CoreAdminModelForm
-
-    panel = [
-        FieldPanel('date'),
-        AutocompletePanel('log_file')
-    ]
-
-    class Meta:
-        ordering = ['-date']
-        verbose_name = _("Log File Date")
-        verbose_name_plural = _("Log File Dates")
-        unique_together = (
-            'date',
-            'log_file',
-        )
-        indexes = [
-            models.Index(fields=['date', 'log_file']),
-        ]
-
-    @classmethod
-    def create_or_update(cls, user, log_file, date):
-        obj, created = cls.objects.get_or_create(
-            log_file=log_file, 
-            date=date,
-        )
-
-        if not created:
-            obj.updated_by = user
-            obj.updated = timezone.now()
-        else:
-            obj.creator = user
-            obj.created = timezone.now()
-
-        return obj
-    
-    @classmethod
-    def filter_by_collection_and_date(cls, collection, date):
-        return cls.objects.filter(
-            ~Q(log_file__status__in=[
-                choices.LOG_FILE_STATUS_CREATED, 
-                choices.LOG_FILE_STATUS_INVALIDATED
-            ]),
-            log_file__collection__acron3=collection,
-            date=date,
-        )
-        
-    @classmethod
-    def get_number_of_found_files_for_date(cls, collection, date):
-        return cls.objects.filter(
-            ~Q(log_file__status__in=[
-                choices.LOG_FILE_STATUS_CREATED, 
-                choices.LOG_FILE_STATUS_INVALIDATED
-            ]),
-            log_file__collection__acron3=collection,
-            date=date,
-        ).count()
-
-    def __str__(self):
-        return f'{self.log_file.path}-{self.date}'
-
-
-class CollectionLogFileDateCount(CommonControlField):
-    collection = models.ForeignKey(
-        Collection, 
-        verbose_name=_('Collection'), 
-        on_delete=models.DO_NOTHING, 
-        null=False, 
-        blank=False,
-    )
-
-    date = models.DateField(
-        _('Date'),
-        null=False,
-        blank=False,
-    )
-    
-    year = models.IntegerField(
-        _('Year'),
-        null=False,
-        blank=False,
-    )
-    
-    month = models.IntegerField(
-        _('Month'),
-        null=False,
-        blank=False,
-    )
-
-    found_log_files = models.IntegerField(
-        verbose_name=_('Number of Found Valid Log Files'), 
-        default=0,
-    )
-
-    expected_log_files = models.IntegerField(
-        verbose_name=_('Number of Expected Valid Log Files'),
-        blank=True,
-        null=True,
-    )
-
-    is_usage_metric_computed = models.BooleanField(
-        verbose_name=_('Is Usage Metric Computed'),
-        default=False,
-    )
-
-    exported_files_count = models.SmallIntegerField(
-        verbose_name=_('Exported Files Count'),
-        default=0,
-    )
-    
-    status = models.CharField(
-        verbose_name=_('Status'),
-        choices=choices.COLLECTION_LOG_FILE_DATE_COUNT,
-        max_length=3,
-    )
-
-    def set_status(self):
-        if self.found_log_files < self.expected_log_files:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES
-        elif self.found_log_files > self.expected_log_files:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES
-        else:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_OK
-
-    def set_is_usage_metric_computed(self):
-        if self.exported_files_count == self.found_log_files:
-            self.is_usage_metric_computed = True
-             
-    @classmethod
-    def create_or_update(cls, user, collection, date, expected_log_files, found_log_files):
-        obj, created = cls.objects.get_or_create(
-            collection=collection, 
-            date=date,
-            month=date.month,
-            year=date.year,
-        )
-
-        if not created:
-            obj.updated_by = user
-            obj.updated = timezone.now()
-        else:
-            obj.creator = user
-            obj.created = timezone.now()
-
-        obj.expected_log_files = expected_log_files            
-        obj.found_log_files = found_log_files
-        obj.set_status()
-        
-        obj.save()
-        return obj
-    
-    class Meta:
-        ordering = ['-date']
-        verbose_name = _("Collection Log File Date Count")
-        unique_together = (
-            'collection',
-            'date',
-        )
-
-    panels = [
-        AutocompletePanel('collection'),
-        FieldPanel('date'),
-        FieldPanel('year'),
-        FieldPanel('month'),
-        FieldPanel('found_log_files'),
-        FieldPanel('expected_log_files'),
-        FieldPanel('status'),
-        FieldPanel('is_usage_metric_computed'),
-    ]
-
-    def __str__(self):
-        return f'{self.collection.acron3}-{self.date}'
-    
-
-class LogFile(CommonControlField):
+class LogFile(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
+    date = models.DateField(verbose_name=_("Date"), null=True, blank=True, db_index=True)
     hash = models.CharField(_("Hash MD5"), max_length=32, null=True, blank=True, unique=True)
 
     path = models.CharField(_("Name"), max_length=255, null=False, blank=False)
@@ -246,19 +57,25 @@ class LogFile(CommonControlField):
         default=0,
     )
 
+    parse_heartbeat_at = models.DateTimeField(
+        _("Parse Heartbeat At"),
+        null=True,
+        blank=True,
+    )
+
     panels = [
         FieldPanel('hash'),
+        FieldPanel('date'),
         FieldPanel('path'),
         FieldPanel('stat_result'),
         FieldPanel('status'),
         FieldPanel('validation'),
         FieldPanel('summary'),
         FieldPanel('last_processed_line'),
+        FieldPanel('parse_heartbeat_at'),
         AutocompletePanel('collection'),
     ]
 
-    base_form_class = CoreAdminModelForm
-
     class Meta:
         verbose_name = _("Log File")
         verbose_name_plural = _("Log Files")
@@ -268,25 +85,28 @@ def get(cls, hash):
         return cls.objects.get(hash=hash)
 
     @classmethod
-    def create_or_update(cls, user, collection, path, stat_result, hash, status=None):
+    def create_or_update(cls, collection, path, stat_result, hash, status=None):
         try:
+            obj, created = cls.objects.get_or_create(
+                hash=hash,
+                defaults={
+                    "collection": collection,
+                    "path": path,
+                    "stat_result": stat_result,
+                    "status": status or choices.LOG_FILE_STATUS_CREATED,
+                },
+            )
+        except IntegrityError:
             obj = cls.get(hash=hash)
-            obj.updated_by = user
+            created = False
+
+        if created:
+            logging.info(f'File {path} added to the database.')
+        else:
             obj.updated = timezone.now()
+            obj.save(update_fields=["updated"])
             logging.info(f'File {path} already exists in the database.')
 
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-            obj.path = path
-            obj.stat_result = stat_result
-            obj.hash = hash
-            obj.status = status or choices.LOG_FILE_STATUS_CREATED
-            logging.info(f'File {path} added to the database.')
-        
-        obj.save()
         return obj
         
     def __str__(self):
diff --git a/log_manager/tasks.py b/log_manager/tasks.py
index e14fe92..10148b1 100644
--- a/log_manager/tasks.py
+++ b/log_manager/tasks.py
@@ -1,20 +1,17 @@
 import logging
-import json
 import os
 
 from django.conf import settings
-from django.core.mail import send_mail
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext as _
 
 from core.utils import date_utils
-from core.utils.utils import _get_user
+from core.utils.request_utils import _get_user
 from config import celery_app
 from collection.models import Collection
-from log_manager_config import exceptions as lmc_exceptions, models as lmc_models
+from log_manager_config import models as lmc_models
 
 from . import (
-    exceptions, 
     choices, 
     models, 
     utils,
@@ -26,8 +23,8 @@
 User = get_user_model()
 
 
-@celery_app.task(bind=True, name=_('Search for log files'), queue='load')
-def task_search_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None):
+@celery_app.task(bind=True, name=_('[Log Pipeline] 1. Search Logs (Manual)'), queue='load')
+def task_search_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, trigger_validation=False):
     """
     Task to search for log files in the directories defined in the CollectionLogDirectory model.
 
@@ -44,11 +41,11 @@ def task_search_log_files(self, collections=[], from_date=None, until_date=None,
     for col in collections or Collection.acron3_list():
         collection = Collection.objects.get(acron3=col)
 
-        col_configs_dirs = lmc_models.CollectionLogDirectory.objects.filter(collection__acron3=col, active=True)
+        col_configs_dirs = lmc_models.CollectionLogDirectory.objects.filter(config__collection__acron3=col, active=True)
         if len(col_configs_dirs) == 0:
             logging.error(f'No CollectionLogDirectory found for collection {col}.')
 
-        supported_logfile_extensions = lmc_models.SupportedLogFile.objects.values_list('file_extension', flat=True)
+        supported_logfile_extensions = settings.SUPPORTED_LOGFILE_EXTENSIONS
         if len(supported_logfile_extensions) == 0:
             logging.error('No SupportedLogFile found. Please, add a SupportedLogFile for each of the supported log file formats.')
 
@@ -62,7 +59,18 @@ def task_search_log_files(self, collections=[], from_date=None, until_date=None,
                     visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back)
                     logging.debug(f'Visible dates: {visible_dates}')
 
-                    _add_log_file(user, collection, root, name, visible_dates)
+                    _add_log_file(collection, root, name, visible_dates)
+
+    if trigger_validation:
+        task_validate_log_files.apply_async(kwargs={
+            "collections": collections,
+            "from_date": from_date,
+            "until_date": until_date,
+            "days_to_go_back": days_to_go_back,
+            "user_id": user_id,
+            "username": username,
+            "trigger_parse": True
+        })
 
 
 def _get_visible_dates(from_date, until_date, days_to_go_back):
@@ -70,14 +78,13 @@ def _get_visible_dates(from_date, until_date, days_to_go_back):
     return date_utils.get_date_objs_from_date_range(from_date_str, until_date_str)
 
 
-def _add_log_file(user, collection, root, name, visible_dates):
+def _add_log_file(collection, root, name, visible_dates):
     file_path = os.path.join(root, name)
     file_ctime = date_utils.get_date_obj_from_timestamp(os.stat(file_path).st_ctime)
 
     logging.debug(f'Checking file {file_path} with ctime {file_ctime}.')
     if file_ctime in visible_dates:
         models.LogFile.create_or_update(
-            user=user,
             collection=collection,
             path=file_path,
             stat_result=os.stat(file_path),
@@ -85,8 +92,8 @@ def _add_log_file(user, collection, root, name, visible_dates):
         )
 
 
-@celery_app.task(bind=True, name=_('Validate log files'), timelimit=-1, queue='load')
-def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False):
+@celery_app.task(bind=True, name=_('[Log Pipeline] 2. Validate Logs (Manual)'), timelimit=-1, queue='load')
+def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False, trigger_parse=False, revalidate=False, status_list=None):
     """
     Task to validate log files in the database.
 
@@ -98,22 +105,58 @@ def task_validate_log_files(self, collections=[], from_date=None, until_date=Non
         user_id (int, optional): The ID of the user initiating the task. Defaults to None.
         username (str, optional): The username of the user initiating the task. Defaults to None.
         ignore_date (bool, optional): If True, ignore the date of the log file. Defaults to False.
+        revalidate (bool, optional): If True, also revalidate files in statuses from status_list. Defaults to False.
+        status_list (list, optional): List of status codes to revalidate when revalidate=True. Defaults to [QUE, INV, ERR].
     """
     cols = collections or Collection.acron3_list()
     logging.info(f'Validating log files for collections: {cols}.')
 
     visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back)
     if not ignore_date:
+        if not visible_dates:
+            logging.warning("No visible dates found for log validation.")
+            return
         logging.info(f'Interval: {visible_dates[0]} to {visible_dates[-1]}.')
 
+    status_filter = [choices.LOG_FILE_STATUS_CREATED]
+    if revalidate:
+        status_filter += status_list or [choices.LOG_FILE_STATUS_QUEUED, choices.LOG_FILE_STATUS_INVALIDATED, choices.LOG_FILE_STATUS_ERROR]
+
+    tasks = []
     for col in cols:
-        for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col):
+        for log_file in models.LogFile.objects.filter(status__in=status_filter, collection__acron3=col):
             file_ctime = date_utils.get_date_obj_from_timestamp(log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX])
             if file_ctime in visible_dates or ignore_date:
-                task_validate_log_file.apply_async(args=(log_file.hash, user_id, username))
-
-
-@celery_app.task(bind=True, name=_('Validate log file'), timelimit=-1, queue='load')
+                tasks.append(task_validate_log_file.s(log_file.hash, user_id, username))
+
+    if tasks:
+        if trigger_parse:
+            from celery import chord
+            from metrics.tasks import task_parse_logs
+            chord(tasks)(task_parse_logs.si(
+                collections=collections,
+                from_date=from_date,
+                until_date=until_date,
+                days_to_go_back=days_to_go_back,
+                user_id=user_id,
+                username=username,
+            ))
+        else:
+            for task in tasks:
+                task.apply_async()
+    elif trigger_parse:
+        from metrics.tasks import task_parse_logs
+        task_parse_logs.apply_async(kwargs={
+            "collections": collections,
+            "from_date": from_date,
+            "until_date": until_date,
+            "days_to_go_back": days_to_go_back,
+            "user_id": user_id,
+            "username": username,
+        })
+
+
+@celery_app.task(bind=True, name=_('[Log Pipeline] Validate Single Log File (Auto)'), timelimit=-1, queue='load')
 def task_validate_log_file(self, log_file_hash, user_id=None, username=None):
     """
     Task to validate a specific log file.
@@ -135,21 +178,21 @@ def task_validate_log_file(self, log_file_hash, user_id=None, username=None):
         del val_result['content']['summary']['datetimes']
 
     if 'probably_date' in val_result:
-        val_result['probably_date'] = date_utils.get_date_str(val_result['probably_date'])
-
-    try:
-        log_file.validation = val_result
-        log_file.validation.update({'buffer_size': buffer_size, 'sample_size': sample_size})
-    except json.JSONDecodeError as e:
-        logging.error(f'Error serializing validation result: {e}')
-        log_file.validation = {}
+        if isinstance(val_result['probably_date'], dict):
+            logging.error(f"Error determining probably_date: {val_result['probably_date'].get('error')}")
+            val_result['probably_date'] = None
+        else:
+            try:
+                val_result['probably_date'] = date_utils.get_date_str(val_result['probably_date'])
+            except (ValueError, AttributeError) as e:
+                logging.error(f'Error serializing probably_date: {e}')
+                val_result['probably_date'] = None
+
+    log_file.validation = val_result
+    log_file.validation.update({'buffer_size': buffer_size, 'sample_size': sample_size})
 
     if val_result.get('is_valid', {}).get('all', False):
-        models.LogFileDate.create_or_update(
-            user=user,
-            log_file=log_file,
-            date=val_result.get('probably_date', ''),
-        )
+        log_file.date = val_result.get('probably_date') or None
         log_file.status = choices.LOG_FILE_STATUS_QUEUED
 
     else:
@@ -160,116 +203,19 @@ def task_validate_log_file(self, log_file_hash, user_id=None, username=None):
 
 
 def _fetch_validation_parameters(collection, default_buffer_size=0.1, default_sample_size=2048):
-    col_configs_params = lmc_models.CollectionValidationParameters.objects.filter(collection__acron3=collection).first()
-    if not col_configs_params:
-        logging.warning(f'No CollectionValidationParameters found for collection {collection}. Using default values.')
+    col_configs = lmc_models.LogManagerCollectionConfig.objects.filter(collection__acron3=collection).first()
+    if not col_configs:
+        logging.warning(f'No LogManagerCollectionConfig found for collection {collection}. Using default values.')
         return default_buffer_size, default_sample_size
-    return col_configs_params.buffer_size, col_configs_params.sample_size
+    return col_configs.buffer_size, col_configs.sample_size
 
 
-@celery_app.task(bind=True, name=_('Check missing log files'))
-def task_check_missing_logs_for_date_range(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None):
+@celery_app.task(bind=True, name=_('[Log Pipeline] Daily Routine (Auto)'), queue='load')
+def task_daily_log_ingestion_pipeline(self):
     """
-    Task to check for missing log files in the defined date range.
-
-    Parameters:
-        collections (list, optional): List of collection acronyms. Defaults to [].
-        from_date (str, optional): The start date for log discovery in YYYY-MM-DD format. Defaults to None.
-        until_date (str, optional): The end date for log discovery in YYYY-MM-DD format. Defaults to None.
-        days_to_go_back (int, optional): The number of days to go back from today for log discovery. Defaults to None.
-        user_id (int, optional): The ID of the user initiating the task. Defaults to None.
-        username (str, optional): The username of the user initiating the task. Defaults to None.
-
-    Raises:
-        exceptions.UndefinedCollectionFilesPerDayError: Raised when there are no expected log files for the collection.
-        exceptions.MultipleFilesPerDayForTheSameDateError: Raised when there are multiple expected log files for the same date.
+    Facade task for the daily log ingestion pipeline.
+    It initiates the Search -> Validate -> Parse chain using default parameters.
+    No arguments are required, making it easy to schedule periodically.
     """
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date_str, until_date_str = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-
-    for col in collections or Collection.acron3_list():
-        collection = Collection.objects.get(acron3=col)
-        for date in date_utils.get_date_objs_from_date_range(from_date_str, until_date_str):
-            logging.info(f'Couting logs for collection {col} and date {date}')
-            count_logs_for_date(user, collection, date)
-
-
-def count_logs_for_date(user, collection, date):
-    try:
-        n_expected_files = lmc_models.CollectionLogFilesPerDay.get_number_of_expected_files_by_day(collection=collection.acron3, date=date)
-    except lmc_exceptions.UndefinedCollectionFilesPerDayError:
-        return
-    except lmc_exceptions.MultipleFilesPerDayForTheSameDateError:
-        return
-        
-    n_found_logs = models.LogFileDate.get_number_of_found_files_for_date(collection=collection.acron3, date=date)
-    
-    obj = models.CollectionLogFileDateCount.create_or_update(
-        user=user,
-        collection=collection,
-        date=date,
-        expected_log_files=n_expected_files,
-        found_log_files=n_found_logs,
-    )
-    logging.info(f'Created CollectionLogFileDateCount object {obj}.')
-
-
-@celery_app.task(bind=True, name=_('Generate log files count report'))
-def task_log_files_count_status_report(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None):
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    
-    from_date_obj = date_utils.get_date_obj(from_date)
-    until_date_obj = date_utils.get_date_obj(until_date)
-
-    for collection in collections or Collection.acron3_list():
-        col = models.Collection.objects.get(acron3=collection)
-        subject = _(f'Usage Log Validation Results ({from_date} to {until_date})')
-        message = _(f'This message provides the results of the Usage Log Validation for the period {from_date} to {until_date}:\n\n')
-        
-        missing = models.CollectionLogFileDateCount.objects.filter(
-            collection__acron3=collection,
-            status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES,
-            date__gte=from_date_obj,
-            date__lte=until_date_obj,
-        )
-        extra = models.CollectionLogFileDateCount.objects.filter(
-            collection__acron3=collection, 
-            status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES,
-            date__gte=from_date_obj,
-            date__lte=until_date_obj,
-        )
-        ok = models.CollectionLogFileDateCount.objects.filter(
-            collection__acron3=collection, 
-            status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK,
-            date__gte=from_date_obj,
-            date__lte=until_date_obj,
-        )
-
-        if missing.count() > 0:
-            message += _(f'- There are {missing.count()} missing log files.\n')
-        if extra.count() > 0:
-            message += _(f'- There are {extra.count()} extra log files.\n')
-        if ok.count() > 0:
-            message += _(f'- There are {ok.count()} dates with correct log files.\n')
-
-        if missing.count() > 0 or extra.count() > 0:
-            message += _(f'\nPlease review the script responsible for sharing the log files.\n')
-            
-        message += _(f'\nYou can view the full report at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.')
-
-        logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}')
-        _send_message(subject, message, collection)
-
-
-def _send_message(subject, message, collection):
-    collection_emails = lmc_models.CollectionEmail.objects.filter(collection__acron3=collection, active=True).values_list('email', flat=True)
-    if len(collection_emails) == 0:
-        raise exceptions.UndefinedCollectionConfigError(_("Error. Please, add an E-mail Configuration for the collection."))
-    
-    send_mail(
-        subject=subject,
-        message=message,
-        from_email=settings.EMAIL_HOST_USER,
-        recipient_list=collection_emails
-    )
+    logging.info("Starting Daily Log Ingestion Pipeline")
+    task_search_log_files.apply_async(kwargs={"trigger_validation": True})
diff --git a/log_manager/tests.py b/log_manager/tests.py
index 7ce503c..51c1402 100644
--- a/log_manager/tests.py
+++ b/log_manager/tests.py
@@ -1,3 +1,58 @@
+from unittest.mock import patch
+
+from django.db import IntegrityError
 from django.test import TestCase
 
-# Create your tests here.
+from collection.models import Collection
+
+from . import choices, tasks
+from .models import LogFile
+
+
+class LogFileTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def test_create_or_update_creates_log_file(self):
+        log_file = LogFile.create_or_update(
+            collection=self.collection,
+            path="/tmp/new.log.gz",
+            stat_result={"size": 10},
+            hash="1" * 32,
+        )
+
+        self.assertEqual(log_file.collection, self.collection)
+        self.assertEqual(log_file.path, "/tmp/new.log.gz")
+        self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_CREATED)
+
+    def test_create_or_update_refetches_existing_log_after_integrity_error(self):
+        existing = LogFile.objects.create(
+            collection=self.collection,
+            path="/tmp/existing.log.gz",
+            stat_result={"size": 10},
+            hash="1" * 32,
+            status=choices.LOG_FILE_STATUS_CREATED,
+        )
+
+        with patch.object(LogFile.objects, "get_or_create", side_effect=IntegrityError):
+            log_file = LogFile.create_or_update(
+                collection=self.collection,
+                path="/tmp/existing.log.gz",
+                stat_result={"size": 10},
+                hash=existing.hash,
+            )
+
+        self.assertEqual(log_file.pk, existing.pk)
+
+
+class ValidateLogFilesTaskTests(TestCase):
+    def test_validate_log_files_returns_for_empty_visible_date_range(self):
+        with patch("log_manager.tasks.task_validate_log_file.s") as mocked_signature:
+            result = tasks.task_validate_log_files.run(
+                collections=["books"],
+                from_date="2024-02-02",
+                until_date="2024-02-01",
+            )
+
+        self.assertIsNone(result)
+        mocked_signature.assert_not_called()
diff --git a/log_manager/utils.py b/log_manager/utils.py
index 4a2b00b..c7dd2db 100644
--- a/log_manager/utils.py
+++ b/log_manager/utils.py
@@ -1,42 +1,47 @@
+import gzip
 import hashlib
+from collections import deque
 
 from scielo_log_validator import validator
 
 
-def hash_file(path, num_lines=25):
+def hash_file(path, num_lines=500):
     """
-    Calculates the MD5 hash of a file using a combination of its first and last `num_lines` lines, 
-    as well as its size.
-    
+    Calculates the MD5 hash of a file using a combination of its first and last
+    `num_lines` lines.
+
+    For gzip-compressed files, the content is decompressed before hashing,
+    so that different compressions of the same data produce the same hash.
+    File size is intentionally NOT included because it varies between
+    compressions and between growing log files, causing false duplicates.
+
     Args:
         path (str): The path to the file.
-        num_lines (int): The number of lines to consider from the beginning and end of the file. Default is 25.
+        num_lines (int): The number of lines to consider from the beginning
+            and end of the file. Default is 500.
 
     Returns:
         The MD5 hash digest as a hexadecimal string.
     """
     md5_hash = hashlib.md5()
 
-    with open(path, 'rb') as file:
-        # Read the first `num_lines` lines of the file
+    opener = gzip.open if _is_gzip(path) else open
+
+    with opener(path, 'rb') as file:
         first_lines = b''.join([file.readline() for _ in range(num_lines)])
         md5_hash.update(first_lines)
 
-        # Move the file pointer to the end of the file
-        file.seek(0, 2)
+        tail = deque(maxlen=num_lines)
+        for line in file:
+            tail.append(line)
+        md5_hash.update(b''.join(tail))
 
-        # Get the size of the file
-        size = file.tell()
-        md5_hash.update(str(size).encode())
-
-        # Move the file pointer to the start of the file
-        file.seek(-size, 2)
+    return md5_hash.hexdigest()
 
-        # Read the last `num_lines` lines of the file
-        last_lines = file.readlines()[-num_lines:]
-        md5_hash.update(b''.join(last_lines))
 
-    return md5_hash.hexdigest()
+def _is_gzip(path):
+    with open(path, 'rb') as f:
+        return f.read(2) == b'\x1f\x8b'
 
 def validate_file(path, sample_size=0.1, buffer_size=2048, days_delta=5, apply_path_validation=True, apply_content_validation=True):
     return validator.pipeline_validate(
diff --git a/log_manager/wagtail_hooks.py b/log_manager/wagtail_hooks.py
index aeb6908..1548ad3 100644
--- a/log_manager/wagtail_hooks.py
+++ b/log_manager/wagtail_hooks.py
@@ -3,54 +3,10 @@
 from wagtail.snippets.models import register_snippet
 
 from config.menu import get_menu_order
+from log_manager_config.wagtail_hooks import LogManagerCollectionConfigSnippetViewSet
+from metrics.wagtail_hooks import DailyMetricJobSnippetViewSet
 
-from log_manager.models import (
-    CollectionLogFileDateCount,
-    LogFile,
-    LogFileDate,
-)
-
-
-class LogFileDateViewSet(SnippetViewSet):
-    model = LogFileDate
-    menu_label = _("Log Files per Day")
-    icon = "folder"
-    menu_order = 300
-
-    list_display = (
-        "date",
-        "log_file",
-    )
-    list_filter = (
-        "date",
-        "log_file__collection",
-    )
-    search_fields = ()
-
-
-class CollectionLogFileDateCountViewSet(SnippetViewSet):
-    model = CollectionLogFileDateCount
-    menu_label = _("Expected and Found Log Files")
-    icon = "folder"
-    menu_order = 400
-
-    list_display = (
-        "collection",
-        "date",
-        "found_log_files",
-        "expected_log_files",
-        "status",
-        "exported_files_count",
-        "is_usage_metric_computed",
-    )
-    list_filter = (
-        "collection",
-        "status",
-        "exported_files_count",
-        "is_usage_metric_computed",
-        "year",
-        "month"
-    )
+from log_manager.models import LogFile
 
 
 class LogFileSnippetViewSet(SnippetViewSet):
@@ -60,16 +16,17 @@ class LogFileSnippetViewSet(SnippetViewSet):
     menu_order = 500
     list_display = (
         "path",
-        "stat_result",
         "collection", 
         "status", 
+        "date",
         "validation",
         "summary",
         "last_processed_line",
+        "parse_heartbeat_at",
         "hash"
     )
-    list_filter = ("status", "collection")
-    search_fields = ("file",)
+    list_filter = ("status", "collection", "date")
+    search_fields = ("path", "hash", "collection__acron3", "collection__main_name")
 
 
 class LogSnippetViewSetGroup(SnippetViewSetGroup):
@@ -78,9 +35,9 @@ class LogSnippetViewSetGroup(SnippetViewSetGroup):
     menu_icon = "folder-open-inverse"
     menu_order = get_menu_order("log_manager")
     items = (
-        LogFileDateViewSet,
-        CollectionLogFileDateCountViewSet,
+        LogManagerCollectionConfigSnippetViewSet,
         LogFileSnippetViewSet, 
+        DailyMetricJobSnippetViewSet,
     )
 
 
diff --git a/log_manager_config/exceptions.py b/log_manager_config/exceptions.py
index ad7581a..0a6a6a9 100644
--- a/log_manager_config/exceptions.py
+++ b/log_manager_config/exceptions.py
@@ -4,11 +4,5 @@ class UndefinedCollectionLogDirectoryError(Exception):
 class UndefinedCollectionEmailError(Exception):
     ...
 
-class UndefinedCollectionFilesPerDayError(Exception):
-    ...
-
 class UndefinedSupportedLogFile(Exception):
     ...
-
-class MultipleFilesPerDayForTheSameDateError(Exception):
-    ...
diff --git a/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py b/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py
new file mode 100644
index 0000000..5b6351c
--- /dev/null
+++ b/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py
@@ -0,0 +1,223 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:27
+
+import django.db.models.deletion
+import modelcluster.fields
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("collection", "0001_initial"),
+        ("log_manager_config", "0003_alter_collectionemail_options_and_more"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="LogManagerCollectionConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True, verbose_name="Creation date"
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True, verbose_name="Last update date"
+                    ),
+                ),
+                (
+                    "sample_size",
+                    models.FloatField(default=0.1, verbose_name="Sample Size"),
+                ),
+                (
+                    "buffer_size",
+                    models.IntegerField(default=2048, verbose_name="Buffer Size"),
+                ),
+                (
+                    "expected_logs_per_day",
+                    models.IntegerField(
+                        default=1, verbose_name="Expected Logs Per Day"
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Log Manager Collection Config",
+                "verbose_name_plural": "Log Manager Collection Configs",
+            },
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="directory",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="supportedlogfile",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="supportedlogfile",
+            name="updated_by",
+        ),
+        migrations.RemoveConstraint(
+            model_name="collectionemail",
+            name="unique_collection_email",
+        ),
+        migrations.RemoveConstraint(
+            model_name="collectionlogdirectory",
+            name="unique_collection_path",
+        ),
+        migrations.RemoveField(
+            model_name="collectionemail",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogdirectory",
+            name="collection",
+        ),
+        migrations.AddField(
+            model_name="collectionemail",
+            name="sort_order",
+            field=models.IntegerField(blank=True, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="sort_order",
+            field=models.IntegerField(blank=True, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="translator_class",
+            field=models.CharField(
+                default="URLTranslatorClassicSite", verbose_name="URL Translator Class"
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="collection",
+            field=models.OneToOneField(
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="log_manager_config",
+                to="collection.collection",
+                verbose_name="Collection",
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="creator",
+            field=models.ForeignKey(
+                editable=False,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="%(class)s_creator",
+                to=settings.AUTH_USER_MODEL,
+                verbose_name="Creator",
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="updated_by",
+            field=models.ForeignKey(
+                blank=True,
+                editable=False,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="%(class)s_last_mod_user",
+                to=settings.AUTH_USER_MODEL,
+                verbose_name="Updater",
+            ),
+        ),
+        migrations.AddField(
+            model_name="collectionemail",
+            name="config",
+            field=modelcluster.fields.ParentalKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="emails",
+                to="log_manager_config.logmanagercollectionconfig",
+            ),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="config",
+            field=modelcluster.fields.ParentalKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="directories",
+                to="log_manager_config.logmanagercollectionconfig",
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name="collectionemail",
+            constraint=models.UniqueConstraint(
+                fields=("config", "email"), name="unique_config_email"
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name="collectionlogdirectory",
+            constraint=models.UniqueConstraint(
+                fields=("config", "path"), name="unique_config_path"
+            ),
+        ),
+        migrations.DeleteModel(
+            name="CollectionLogFilesPerDay",
+        ),
+        migrations.DeleteModel(
+            name="CollectionURLTranslatorClass",
+        ),
+        migrations.DeleteModel(
+            name="CollectionValidationParameters",
+        ),
+        migrations.DeleteModel(
+            name="SupportedLogFile",
+        ),
+    ]
diff --git a/log_manager_config/models.py b/log_manager_config/models.py
index 384368e..8cf3e34 100644
--- a/log_manager_config/models.py
+++ b/log_manager_config/models.py
@@ -4,38 +4,57 @@
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 
+from modelcluster.models import ClusterableModel
+from modelcluster.fields import ParentalKey
+from wagtail.models import Orderable
+from wagtail.admin.panels import FieldPanel, InlinePanel
+from wagtailautocomplete.edit_handlers import AutocompletePanel
+
 from collection.models import Collection
 from core.models import CommonControlField
 
-from .exceptions import MultipleFilesPerDayForTheSameDateError, UndefinedCollectionFilesPerDayError
 
 
-class CollectionLogDirectory(CommonControlField):
-    collection = models.ForeignKey(
+class LogManagerCollectionConfig(ClusterableModel, CommonControlField):
+    collection = models.OneToOneField(
         Collection,
         verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+        on_delete=models.CASCADE,
+        related_name="log_manager_config"
     )
-    path = models.CharField(
-        verbose_name=_('Path'),
-        max_length=255, 
-        blank=False, 
+    sample_size = models.FloatField(
+        verbose_name=_('Sample Size'),
+        blank=False,
         null=False,
+        default=0.1,
     )
-    directory_name = models.CharField(
-        verbose_name=_('Directory Name'),
-        max_length=255, 
-        blank=True,
-        null=True,
+    buffer_size = models.IntegerField(
+        verbose_name=_('Buffer Size'),
+        blank=False,
+        null=False,
+        default=2048,
     )
-    active = models.BooleanField(
-        verbose_name=_('Active'),
-        default=True,
+    expected_logs_per_day = models.IntegerField(
+        verbose_name=_('Expected Logs Per Day'),
+        default=1,
     )
 
+    panels = [
+        AutocompletePanel("collection"),
+        FieldPanel("sample_size"),
+        FieldPanel("buffer_size"),
+        FieldPanel("expected_logs_per_day"),
+        InlinePanel("directories", label=_("Directories")),
+        InlinePanel("emails", label=_("Emails")),
+    ]
+
     def __str__(self):
-        return f'{self.collection} - {self.path} - {self.directory_name}'
-    
+        return f'{self.collection.acron3} Config'
+
+    class Meta:
+        verbose_name = _('Log Manager Collection Config')
+        verbose_name_plural = _('Log Manager Collection Configs')
+
     @classmethod
     def load(cls, data, user):
         for item in data:
@@ -45,13 +64,12 @@ def load(cls, data, user):
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
 
-            logging.info(item)
             cls.create_or_update(
                 user=user,
                 collection=collection,
-                directory_name=item.get('directory_name'),
-                path=item.get('path'),
-                active=item.get('active', True),
+                sample_size=item.get('sample_size', 0.1),
+                buffer_size=item.get('buffer_size', 2048),
+                expected_logs_per_day=item.get('quantity', 1),
             )
 
     @classmethod
@@ -59,81 +77,66 @@ def create_or_update(
         cls,
         user,
         collection,
-        directory_name,
-        path,
-        active,
+        sample_size,
+        buffer_size,
+        expected_logs_per_day,
     ):
-        try:
-            obj = cls.objects.get(collection=collection, path=path)
-        except cls.DoesNotExist:
-            obj = cls()
+        obj, created = cls.objects.get_or_create(collection=collection)
+        if created:
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
         
         obj.updated_by = user
         obj.updated = timezone.now()
-        obj.directory_name = directory_name
-        obj.path = path
-        obj.active = active
-     
+        obj.sample_size = sample_size
+        obj.buffer_size = buffer_size
+        obj.expected_logs_per_day = expected_logs_per_day
         obj.save()
-        logging.info(f'{collection.acron3} - {directory_name} - {path}')
+        logging.info(f'Config for {collection.acron3} updated.')
         return obj
 
-    class Meta:
-        verbose_name = _('Collection Log Directory')
-        verbose_name_plural = _('Collection Log Directories')
-        constraints = [
-            models.UniqueConstraint(fields=['collection', 'path'], name='unique_collection_path')
-        ]
 
 
-class CollectionLogFilesPerDay(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+class CollectionLogDirectory(Orderable, CommonControlField):
+    config = ParentalKey(
+        'LogManagerCollectionConfig',
+        related_name='directories',
+        on_delete=models.CASCADE,
+        null=True,
+        blank=True,
     )
-    start_date = models.DateField(
-        verbose_name=_('Start Date'),
-        blank=False,
+    path = models.CharField(
+        verbose_name=_('Path'),
+        max_length=255, 
+        blank=False, 
         null=False,
     )
-    end_date = models.DateField(
-        verbose_name=_('End Date'),
+    directory_name = models.CharField(
+        verbose_name=_('Directory Name'),
+        max_length=255, 
         blank=True,
         null=True,
     )
-    quantity = models.IntegerField(
-        verbose_name=_('Quantity'),
-        default=1,
+    active = models.BooleanField(
+        verbose_name=_('Active'),
+        default=True,
+    )
+    translator_class = models.CharField(
+        verbose_name=_('URL Translator Class'),
+        blank=False,
+        null=False,
+        default='URLTranslatorClassicSite',
     )
 
     def __str__(self):
-        return f'{self.start_date} - {self.quantity}'
+        return f'{self.config.collection} - {self.path} - {self.directory_name}'
     
-    @classmethod
-    def get_number_of_expected_files_by_day(cls, collection, date):
-        files_by_day = cls.objects.filter(
-            models.Q(collection__acron3=collection) &
-            models.Q(start_date__lte=date) &
-            (models.Q(end_date__gte=date) | models.Q(end_date__isnull=True))
-        )
-
-        if files_by_day.count() > 1:
-            raise MultipleFilesPerDayForTheSameDateError(_("ERROR. Please, set the field end_date for the collection {collection}."))
-
-        if files_by_day.count() == 0:
-            raise UndefinedCollectionFilesPerDayError(_("ERROR. Please, set the number of files per day for the collection {collection}."))
-        
-        return int(files_by_day.get().quantity)
-
     @classmethod
     def load(cls, data, user):
         for item in data:
             try:
                 collection = Collection.objects.get(acron3=item.get('acronym'))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -141,52 +144,55 @@ def load(cls, data, user):
             logging.info(item)
             cls.create_or_update(
                 user=user,
-                collection=collection,
-                start_date=item.get('start_date'),
-                quantity=item.get('quantity'),
-                end_date=item.get('end_date'),
+                config=config,
+                directory_name=item.get('directory_name'),
+                path=item.get('path'),
+                active=item.get('active', True),
             )
 
     @classmethod
     def create_or_update(
         cls,
         user,
-        collection,
-        start_date,
-        quantity,
-        end_date,
+        config,
+        directory_name,
+        path,
+        active,
     ):
         try:
-            obj = cls.objects.get(collection=collection, start_date=start_date)
+            obj = cls.objects.get(config=config, path=path)
         except cls.DoesNotExist:
             obj = cls()
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
-
+            obj.config = config
+        
         obj.updated_by = user
         obj.updated = timezone.now()
-        obj.start_date = start_date
-        obj.quantity = quantity
-        obj.end_date = end_date
-        
+        obj.directory_name = directory_name
+        obj.path = path
+        obj.active = active
+     
         obj.save()
-        logging.info(f'{collection.acron3} - {start_date} - {quantity}')
+        logging.info(f'{config.collection.acron3} - {directory_name} - {path}')
         return obj
 
     class Meta:
-        verbose_name = _('Collection Log Files Per Day')
-        verbose_name_plural = _('Collection Log Files Per Day')
+        verbose_name = _('Collection Log Directory')
+        verbose_name_plural = _('Collection Log Directories')
         constraints = [
-            models.UniqueConstraint(fields=['collection', 'start_date'], name='unique_collection_start_date')
+            models.UniqueConstraint(fields=['config', 'path'], name='unique_config_path')
         ]
 
 
-class CollectionEmail(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+
+class CollectionEmail(Orderable, CommonControlField):
+    config = ParentalKey(
+        'LogManagerCollectionConfig',
+        related_name='emails',
+        on_delete=models.CASCADE,
+        null=True,
+        blank=True,
     )
     name = models.CharField(
         verbose_name=_('Name'),
@@ -218,6 +224,7 @@ def load(cls, data, user):
         for item in data:
             try:
                 collection = Collection.objects.get(acron3=item.get('acronym'))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -225,7 +232,7 @@ def load(cls, data, user):
             logging.info(item)
             cls.create_or_update(
                 user=user,
-                collection=collection,
+                config=config,
                 email=item.get('e-mail'),
                 name=item.get('name'),
                 position=item.get('position'),
@@ -236,19 +243,19 @@ def load(cls, data, user):
     def create_or_update(
         cls,
         user,
-        collection,
+        config,
         email,
         name,
         position,
         active,
     ):
         try:
-            obj = cls.objects.get(collection=collection, email=email)
+            obj = cls.objects.get(config=config, email=email)
         except cls.DoesNotExist:
             obj = cls()
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
+            obj.config = config
             obj.email = email
 
         obj.updated_by = user
@@ -258,213 +265,14 @@ def create_or_update(
         obj.active = active
         
         obj.save()
-        logging.info(f'{collection.acron3} - {name} - {position} - {email}')
+        logging.info(f'{config.collection.acron3} - {name} - {position} - {email}')
         return obj
     
     class Meta:
         verbose_name = _('Collection Email')
         verbose_name_plural = _('Collection Emails')
         constraints = [
-            models.UniqueConstraint(fields=['collection', 'email'], name='unique_collection_email')
-        ]
-
-
-class CollectionValidationParameters(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
-        primary_key=True,
-    )
-    sample_size = models.FloatField(
-        verbose_name=_('Sample Size'),
-        blank=False,
-        null=False,
-        default=0.1,
-    )
-    buffer_size = models.IntegerField(
-        verbose_name=_('Buffer Size'),
-        blank=False,
-        null=False,
-        default=2048,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.sample_size} - {self.buffer_size}'
-
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-            except Collection.DoesNotExist:
-                logging.warning(f'Collection {item.get("acronym")} not found.')
-                continue
-
-            logging.info(item)
-            cls.create_or_update(
-                user=user,
-                collection=collection,
-                sample_size=item.get('sample_size'),
-                buffer_size=item.get('buffer_size'),
-            )
-
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        collection,
-        sample_size,
-        buffer_size,
-    ):
-        try:
-            obj = cls.objects.get(collection=collection)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.sample_size = sample_size
-        obj.buffer_size = buffer_size
-        
-        obj.save()
-        logging.info(f'{collection.acron3} - {sample_size} - {buffer_size}')
-        return obj
-    
-    class Meta:
-        verbose_name = _('Collection Validation Parameters')
-        verbose_name_plural = _('Collection Validation Parameters')
-
-
-class CollectionURLTranslatorClass(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
-    )
-    directory = models.ForeignKey(
-        CollectionLogDirectory,
-        verbose_name=_('Directory'),
-        on_delete=models.DO_NOTHING,
-    )
-    translator_class = models.CharField(
-        verbose_name=_('URL Translator Class'),
-        blank=False,
-        null=False,
-        default='URLTranslatorClassicSite',
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.directory} - {self.translator_class}'
-
-    class Meta:
-        verbose_name = _('Collection URL Translator Class')
-        verbose_name_plural = _('Collection URL Translator Classes')
-        constraints = [
-            models.UniqueConstraint(fields=['collection', 'directory'], name='unique_collection_directory')
+            models.UniqueConstraint(fields=['config', 'email'], name='unique_config_email')
         ]
 
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-            except Collection.DoesNotExist:
-                logging.warning(f'Collection {item.get("acronym")} not found.')
-                continue
-
-            try:
-                directory = CollectionLogDirectory.objects.get(collection=collection, path=item.get('path'))
-                logging.info(item)
-                cls.create_or_update(
-                    user=user,
-                    collection=collection,
-                    directory=directory,
-                    translator_class=item.get('translator_class'),
-                )
-            except CollectionLogDirectory.DoesNotExist:
-                logging.warning(f'Directory {item.get("path")} not found.')
-                continue
 
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        collection,
-        directory,
-        translator_class,
-    ):
-        try:
-            obj = cls.objects.get(collection=collection)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-            obj.directory = directory
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.translator_class = translator_class
-        
-        obj.save()
-        logging.info(f'{collection.acron3} - {directory.path} - {translator_class}')
-        return obj
-    
-
-class SupportedLogFile(CommonControlField):
-    file_extension = models.CharField(
-        verbose_name=_('File Extension'),
-        max_length=255, 
-        unique=True,
-        blank=False,
-        null=False,
-    )
-    description = models.TextField(
-        verbose_name=_('Description'),
-        blank=True,
-        null=True,
-    )
-
-    def __str__(self):
-        return f'{self.file_extension}'
-
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            logging.info(item)
-            cls.create_or_update(
-                user=user,
-                file_extension=item.get('file_extension'),
-                description=item.get('description'),
-            )
-
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        file_extension,
-        description,
-    ):
-        try:
-            obj = cls.objects.get(file_extension=file_extension)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.file_extension = file_extension
-        obj.description = description
-        
-        obj.save()
-        logging.info(f'{file_extension}')
-        return obj
-    
-    class Meta:
-        verbose_name = _('Supported Log File')
-        verbose_name_plural = _('Supported Log Files')
diff --git a/log_manager_config/tasks.py b/log_manager_config/tasks.py
index f15262b..c4ff399 100644
--- a/log_manager_config/tasks.py
+++ b/log_manager_config/tasks.py
@@ -1,59 +1,25 @@
-from django.contrib.auth import get_user_model
+from django.conf import settings
 from django.utils.translation import gettext as _
 
-from core.utils.utils import _get_user
 from config import celery_app
+from config.collections import COLLECTION_SIZE_SAMPLE_MAP, LOG_MANAGER_SEED_DATA
+from core.utils.request_utils import _get_user
 
 from . import models
 
 
-User = get_user_model()
-
-
-@celery_app.task(bind=True, name=_('Load log manager collection settings'))
-def task_load_log_manager_collection_settings(self, data={}, user_id=None, username=None):
+@celery_app.task(bind=True, name=_('[Log Pipeline] Load Log Manager Settings (Seed)'))
+def task_load_log_manager_collection_settings(self, data=None, user_id=None, username=None):
     user = _get_user(self.request, username=username, user_id=user_id)
 
     if not data:
-        data = [
-            {'acronym': 'arg', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.ar', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'bol', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.bo', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'chl', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.cl', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'col', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.co', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'cri', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.cr', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'cub', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.cu', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'data', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-dataverse', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'dataverse'},
-            {'acronym': 'ecu', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.ec', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'esp', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.es', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'mex', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.mx', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'per', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.pe', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'preprints', 'directory_name': _('Site clássico') , 'path': '/app/logs/submission-node01', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'preprints'},
-            {'acronym': 'prt', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.pt', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'}, 
-            {'acronym': 'pry', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.py', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'psi', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.pepsic', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'rve', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.revenf', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'rvt', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.revtur', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'scl', 'directory_name': _('Site novo') , 'path': '/app/logs/bkp-ratchet/scielo.nbr', 'quantity': 2, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'opac'},
-            {'acronym': 'spa', 'directory_name': _('Site novo - versão prévia') , 'path': '/app/logs/bkp-ratchet/scielo.sp', 'quantity': 2, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'opac_alpha'},
-            {'acronym': 'sss', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.ss', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'sza', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.za', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'ury', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.uy', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'ven', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.ve', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-            {'acronym': 'wid', 'directory_name': _('Site clássico') , 'path': '/app/logs/bkp-ratchet/scielo.wi', 'quantity': 1, 'start_date': '2020-01-01', 'e-mail': 'tecnologia@scielo.org', 'translator_class': 'classic'},
-        ]
+        data = LOG_MANAGER_SEED_DATA
 
         for i in data:
-            i['sample_size'] = 0.1 if i['acronym'] not in ['data', 'preprints', 'wid'] else 1.0
+            size = getattr(settings, 'COLLECTION_ACRON3_SIZE_MAP', {}).get(i['acronym'], 'small')
+            i['sample_size'] = COLLECTION_SIZE_SAMPLE_MAP.get(size, 1.0)
             i['buffer_size'] = 2048
 
-    data_extensions = [
-        {'file_extension': '.log', 'description': ''},
-        {'file_extension': '.gz', 'description': ''}
-    ]
-
+    models.LogManagerCollectionConfig.load(data, user)
     models.CollectionLogDirectory.load(data, user)
     models.CollectionEmail.load(data, user)
-    models.CollectionLogFilesPerDay.load(data, user)
-    models.CollectionValidationParameters.load(data, user)
-    models.CollectionURLTranslatorClass.load(data, user)
-    models.SupportedLogFile.load(data_extensions, user)
\ No newline at end of file
diff --git a/log_manager_config/wagtail_hooks.py b/log_manager_config/wagtail_hooks.py
index 2ecf908..f91c0b1 100644
--- a/log_manager_config/wagtail_hooks.py
+++ b/log_manager_config/wagtail_hooks.py
@@ -1,134 +1,24 @@
 from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
-from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import SnippetViewSet
 
-from config.menu import get_menu_order
+from log_manager_config.models import LogManagerCollectionConfig
 
-from log_manager_config.models import (
-    CollectionLogDirectory,
-    CollectionLogFilesPerDay,
-    CollectionEmail,
-    CollectionValidationParameters,
-    CollectionURLTranslatorClass,
-    SupportedLogFile,
-)
-
-
-class CollectionLogDirectorySnippetViewSet(SnippetViewSet):
-    model = CollectionLogDirectory
-    menu_label = _("Collection Log Directory")
-    icon = "folder"
+class LogManagerCollectionConfigSnippetViewSet(SnippetViewSet):
+    model = LogManagerCollectionConfig
+    menu_label = _("Log Manager Configurations")
+    icon = "cogs"
     menu_order = 300
 
-    list_display = (
-        "collection",
-        "directory_name",
-        "path",
-        "active",
-    )
-    list_filter = (
-        "collection",
-        "active",
-    )
-    search_fields = (
-        "path",
-    )
-
-
-class CollectionLogFilesPerDaySnippetViewSet(SnippetViewSet):
-    model = CollectionLogFilesPerDay
-    menu_label = _("Collection Log Files Per Day")
-    icon = "folder"
-    menu_order = 400
-
-    list_display = (
-        "collection",
-        "start_date",
-        "end_date",
-        "quantity",
-    )
-    list_filter = (
-        "collection",
-    )
-
-
-class CollectionEmailSnippetViewSet(SnippetViewSet):
-    model = CollectionEmail
-    menu_label = _("Collection Email")
-    icon = "folder"
-    menu_order = 500
-
-    list_display = (
-        "collection",
-        "name",
-        "position",
-        "email",
-        "active",
-    )
-    list_filter = (
-        "collection",
-        "active",
-    )
-    search_fields = (
-        "name", 
-        "email"
-    )
-
-class CollectionValidationParametersSnippetViewSet(SnippetViewSet):
-    model = CollectionValidationParameters
-    menu_label = _("Collection Validation Parameters")
-    icon = "folder"
-    menu_order = 550
-
     list_display = (
         "collection",
         "sample_size",
         "buffer_size",
+        "expected_logs_per_day",
+        "updated",
     )
     list_filter = (
         "collection",
     )
-
-class CollectionURLTranslatorClassSnippetViewSet(SnippetViewSet):
-    model = CollectionURLTranslatorClass
-    menu_label = _("Collection URL Translator Class")
-    icon = "folder"
-    menu_order = 600
-
-    list_display = (
-        "collection",
-        "directory",
-        "translator_class",
-    )
-    list_filter = (
-        "collection",
-    )
-
-class SupportedLogFileSnippetViewSet(SnippetViewSet):
-    model = SupportedLogFile
-    menu_label = _("Supported Log File Formats")
-    icon = "folder"
-    menu_order = 600
-
-    list_display = (
-        "file_extension",
-        "description",
-    )
-
-
-class LogManagerConfigSnippetViewSetGroup(SnippetViewSetGroup):
-    menu_name = 'log_manager_config'
-    menu_label = _("Log Manager Config")
-    menu_icon = "folder-open-inverse"
-    menu_order = get_menu_order("log_manager_config")
-    items = (
-        CollectionLogDirectorySnippetViewSet,
-        CollectionLogFilesPerDaySnippetViewSet,
-        CollectionEmailSnippetViewSet,
-        CollectionValidationParametersSnippetViewSet,
-        CollectionURLTranslatorClassSnippetViewSet,
-        SupportedLogFileSnippetViewSet,
+    search_fields = (
+        "collection__acron3",
     )
-
-
-register_snippet(LogManagerConfigSnippetViewSetGroup)
diff --git a/merge_production_dotenvs_in_dotenv.py b/merge_production_dotenvs_in_dotenv.py
deleted file mode 100644
index d1170ef..0000000
--- a/merge_production_dotenvs_in_dotenv.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import os
-from pathlib import Path
-from typing import Sequence
-
-import pytest
-
-ROOT_DIR_PATH = Path(__file__).parent.resolve()
-PRODUCTION_DOTENVS_DIR_PATH = ROOT_DIR_PATH / ".envs" / ".production"
-PRODUCTION_DOTENV_FILE_PATHS = [
-    PRODUCTION_DOTENVS_DIR_PATH / ".django",
-    PRODUCTION_DOTENVS_DIR_PATH / ".postgres",
-]
-DOTENV_FILE_PATH = ROOT_DIR_PATH / ".env"
-
-
-def merge(
-    output_file_path: str, merged_file_paths: Sequence[str], append_linesep: bool = True
-) -> None:
-    with open(output_file_path, "w") as output_file:
-        for merged_file_path in merged_file_paths:
-            with open(merged_file_path, "r") as merged_file:
-                merged_file_content = merged_file.read()
-                output_file.write(merged_file_content)
-                if append_linesep:
-                    output_file.write(os.linesep)
-
-
-def main():
-    merge(DOTENV_FILE_PATH, PRODUCTION_DOTENV_FILE_PATHS)
-
-
-@pytest.mark.parametrize("merged_file_count", range(3))
-@pytest.mark.parametrize("append_linesep", [True, False])
-def test_merge(tmpdir_factory, merged_file_count: int, append_linesep: bool):
-    tmp_dir_path = Path(str(tmpdir_factory.getbasetemp()))
-
-    output_file_path = tmp_dir_path / ".env"
-
-    expected_output_file_content = ""
-    merged_file_paths = []
-    for i in range(merged_file_count):
-        merged_file_ord = i + 1
-
-        merged_filename = ".service{}".format(merged_file_ord)
-        merged_file_path = tmp_dir_path / merged_filename
-
-        merged_file_content = merged_filename * merged_file_ord
-
-        with open(merged_file_path, "w+") as file:
-            file.write(merged_file_content)
-
-        expected_output_file_content += merged_file_content
-        if append_linesep:
-            expected_output_file_content += os.linesep
-
-        merged_file_paths.append(merged_file_path)
-
-    merge(output_file_path, merged_file_paths, append_linesep)
-
-    with open(output_file_path, "r") as output_file:
-        actual_output_file_content = output_file.read()
-
-    assert actual_output_file_content == expected_output_file_content
-
-
-if __name__ == "__main__":
-    main()
diff --git a/metrics/counter/__init__.py b/metrics/counter/__init__.py
new file mode 100644
index 0000000..c9afd92
--- /dev/null
+++ b/metrics/counter/__init__.py
@@ -0,0 +1,22 @@
+from .access import (
+    extract_item_access_data,
+    is_valid_item_access_data,
+    update_results_with_item_access_data,
+)
+from .documents import convert_raw_results_to_index_documents
+from .identifiers import (
+    generate_item_access_id,
+    generate_month_document_id,
+    generate_user_session_id,
+    generate_year_document_id,
+)
+from .parser import (
+    extract_date_from_validation_dict,
+    translator_class_name_to_obj,
+)
+from metrics.opensearch.names import (
+    extract_access_month,
+    extract_access_year,
+    generate_month_index_name,
+    generate_year_index_name,
+)
diff --git a/metrics/counter/access.py b/metrics/counter/access.py
new file mode 100644
index 0000000..12c0cc5
--- /dev/null
+++ b/metrics/counter/access.py
@@ -0,0 +1,438 @@
+import re
+from urllib.parse import unquote, urlparse
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_UNDEFINED,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_LANGUAGE_UNDEFINED,
+    MEDIA_FORMAT_UNDEFINED,
+)
+
+from core.utils.standardizer import (
+    standardize_language_code,
+    standardize_pid_generic,
+    standardize_pid_v2,
+    standardize_pid_v3,
+    standardize_year_of_publication,
+)
+from core.utils.date_utils import extract_minute_second_key, truncate_datetime_to_hour
+from metrics.counter.identifiers import generate_item_access_id, generate_user_session_id
+
+
+def extract_item_access_data(collection_acron3: str, translated_url: dict):
+    if not translated_url or not isinstance(translated_url, dict):
+        return {}
+
+    source_type = _extract_source_type(collection_acron3, translated_url)
+    source_id = _extract_source_id(collection_acron3, translated_url, source_type)
+    scielo_issn = _extract_scielo_issn(translated_url, source_type, source_id)
+    document_type = _extract_document_type(collection_acron3, translated_url, source_type)
+    publication_year = _safe_standardize(
+        standardize_year_of_publication,
+        translated_url.get("year_of_publication"),
+    )
+    source_access_type = translated_url.get("source_access_type")
+
+    return {
+        "collection": collection_acron3,
+        "source_type": source_type,
+        "source_id": source_id,
+        "scielo_issn": scielo_issn,
+        "document_type": document_type,
+        "pid_v2": _safe_standardize(standardize_pid_v2, translated_url.get("pid_v2")),
+        "pid_v3": _safe_standardize(standardize_pid_v3, translated_url.get("pid_v3")),
+        "pid_generic": _safe_standardize(
+            standardize_pid_generic,
+            translated_url.get("pid_generic"),
+        ),
+        "title_pid_generic": _safe_standardize(
+            standardize_pid_generic,
+            translated_url.get("title_pid_generic"),
+        ),
+        "segment_pid_generics": _standardize_pid_generic_list(
+            translated_url.get("segment_pid_generics"),
+        ),
+        "media_language": _safe_standardize(
+            standardize_language_code,
+            translated_url.get("media_language"),
+            default="un",
+        ),
+        "media_format": translated_url.get("media_format"),
+        "content_type": translated_url.get("content_type"),
+        "access_url": translated_url.get("access_url") or translated_url.get("normalized_url"),
+        "publication_year": publication_year,
+        "counter_access_type": _counter_access_type(source_access_type),
+        "access_method": "Regular",
+        "source_main_title": _extract_source_title(translated_url),
+        "source_subject_area_capes": translated_url.get("source_subject_area_capes")
+        or translated_url.get("journal_subject_area_capes"),
+        "source_subject_area_wos": translated_url.get("source_subject_area_wos")
+        or translated_url.get("journal_subject_area_wos"),
+        "source_acronym": translated_url.get("source_acronym")
+        or translated_url.get("journal_acronym"),
+        "source_publisher_name": translated_url.get("source_publisher_name")
+        or translated_url.get("journal_publisher_name"),
+        "source_access_type": source_access_type,
+        "source_identifiers": _extract_source_identifiers(translated_url, source_id, source_type),
+        "source_city": translated_url.get("source_city"),
+        "source_country": translated_url.get("source_country"),
+    }
+
+
+def is_valid_item_access_data(data: dict, utm=None, ignore_utm_validation=False):
+    if not isinstance(data, dict):
+        return False, {"message": "Invalid data format. Expected a dictionary.", "code": "invalid_format"}
+
+    scielo_issn = data.get("scielo_issn")
+    source_id = data.get("source_id")
+    source_type = data.get("source_type")
+    document_type = data.get("document_type") or "article"
+    media_format = data.get("media_format")
+    media_language = data.get("media_language")
+    content_type = data.get("content_type")
+    pid_v2 = data.get("pid_v2")
+    pid_v3 = data.get("pid_v3")
+    pid_generic = data.get("pid_generic")
+    has_source_identity = bool(source_id) or bool(
+        scielo_issn and scielo_issn != DEFAULT_SCIELO_ISSN
+    )
+    has_media_language = bool(media_language and media_language != MEDIA_LANGUAGE_UNDEFINED)
+    has_pid = bool(pid_v2 or pid_v3 or pid_generic)
+
+    if not all([media_format and media_format != MEDIA_FORMAT_UNDEFINED, content_type and content_type != CONTENT_TYPE_UNDEFINED, has_pid]):
+        return False, {"message": "Missing required fields in item access data.", "code": "missing_fields"}
+
+    if document_type in {"article", "book", "chapter"} and not has_media_language:
+        return False, {"message": "Missing media language in item access data.", "code": "missing_fields"}
+
+    if document_type == "article" and not has_source_identity:
+        return False, {"message": "Missing article source identity.", "code": "missing_fields"}
+
+    if document_type in {"book", "chapter"} and not source_id:
+        return False, {"message": "Missing book source identity.", "code": "missing_fields"}
+
+    if document_type in {"preprint", "dataset"} and not pid_generic:
+        return False, {"message": "Missing generic PID in item access data.", "code": "missing_fields"}
+
+    if utm and not ignore_utm_validation:
+        if (
+            source_type == "journal"
+            and scielo_issn
+            and scielo_issn != DEFAULT_SCIELO_ISSN
+            and not utm.is_valid_code(scielo_issn, utm.sources_metadata["issn_set"])
+        ):
+            return False, {"message": f"Invalid scielo_issn: {scielo_issn}", "code": "invalid_scielo_issn"}
+
+        if (
+            source_type
+            and source_type != "journal"
+            and source_id
+            and source_id not in utm.sources_metadata.get("source_id_to_type", {})
+        ):
+            return False, {"message": f"Invalid source_id: {source_id}", "code": "invalid_source_id"}
+
+        if pid_v2 and not utm.is_valid_code(pid_v2, utm.documents_metadata["pid_set"]):
+            return False, {"message": f"Invalid pid_v2: {pid_v2}", "code": "invalid_pid_v2"}
+
+        if pid_v3 and not utm.is_valid_code(pid_v3, utm.documents_metadata["pid_set"]):
+            return False, {"message": f"Invalid pid_v3: {pid_v3}", "code": "invalid_pid_v3"}
+
+        if pid_generic and not utm.is_valid_code(pid_generic, utm.documents_metadata["pid_set"]):
+            return False, {"message": f"Invalid pid_generic: {pid_generic}", "code": "invalid_pid_generic"}
+
+    return True, {"message": "Item access data is valid.", "code": "valid"}
+
+
+def update_results_with_item_access_data(results: dict, item_access_data: dict, line: dict):
+    col_acron3 = item_access_data.get("collection")
+    source_key = (
+        item_access_data.get("source_id")
+        or item_access_data.get("scielo_issn")
+        or item_access_data.get("source_type")
+        or col_acron3
+    )
+    pid_v2 = item_access_data.get("pid_v2")
+    pid_v3 = item_access_data.get("pid_v3")
+    media_format = item_access_data.get("media_format")
+    content_language = item_access_data.get("media_language")
+    content_type = item_access_data.get("content_type")
+    access_url = item_access_data.get("access_url") or _normalize_access_url(line.get("url"))
+
+    client_name = line.get("client_name")
+    client_version = line.get("client_version")
+    local_datetime = line.get("local_datetime")
+    access_country_code = line.get("country_code")
+    ip_address = line.get("ip_address")
+
+    truncated_datetime = truncate_datetime_to_hour(local_datetime)
+    ms_key = extract_minute_second_key(local_datetime)
+    if truncated_datetime is None or ms_key is None:
+        raise ValueError("Invalid local_datetime in parsed log line.")
+
+    access_date = truncated_datetime.strftime("%Y-%m-%d")
+    access_year = access_date[:4]
+    access_month = access_date[:7].replace("-", "")
+
+    user_session_id = generate_user_session_id(
+        client_name,
+        client_version,
+        ip_address,
+        truncated_datetime,
+    )
+
+    for access_target in _iter_access_targets(item_access_data):
+        item_access_id = generate_item_access_id(
+            user_session_id=user_session_id,
+            col_acron3=col_acron3,
+            source_key=source_key,
+            pid_v2=pid_v2,
+            pid_v3=pid_v3,
+            pid_generic=access_target.get("pid_generic"),
+            content_language=content_language,
+            access_country_code=access_country_code,
+            media_format=media_format,
+            content_type=content_type,
+        )
+
+        if item_access_id not in results:
+            results[item_access_id] = {
+                "collection": col_acron3,
+                "source_key": source_key,
+                "document_type": access_target.get("document_type"),
+                "pid_v2": pid_v2,
+                "pid_v3": pid_v3,
+                "pid_generic": access_target.get("pid_generic"),
+                "title_pid_generic": (
+                    item_access_data.get("title_pid_generic")
+                    or access_target.get("pid_generic")
+                ),
+                "user_session_id": user_session_id,
+                "click_timestamps": {ms_key: 0},
+                "click_timestamps_by_url": {},
+                "access_url": access_url,
+                "media_format": media_format,
+                "content_language": content_language,
+                "content_type": content_type,
+                "access_country_code": access_country_code,
+                "access_date": access_date,
+                "access_year": access_year,
+                "access_month": access_month,
+                "publication_year": item_access_data.get("publication_year"),
+                "counter_access_type": item_access_data.get("counter_access_type") or "Open",
+                "access_method": item_access_data.get("access_method") or "Regular",
+                "source": {
+                    "source_type": item_access_data.get("source_type"),
+                    "source_id": item_access_data.get("source_id"),
+                    "scielo_issn": item_access_data.get("scielo_issn"),
+                    "main_title": item_access_data.get("source_main_title"),
+                    "identifiers": item_access_data.get("source_identifiers"),
+                    "access_type": item_access_data.get("source_access_type"),
+                    "city": item_access_data.get("source_city"),
+                    "country": item_access_data.get("source_country"),
+                    "subject_area_capes": item_access_data.get("source_subject_area_capes"),
+                    "subject_area_wos": item_access_data.get("source_subject_area_wos"),
+                    "acronym": item_access_data.get("source_acronym"),
+                    "publisher_name": item_access_data.get("source_publisher_name"),
+                },
+            }
+
+        if ms_key not in results[item_access_id]["click_timestamps"]:
+            results[item_access_id]["click_timestamps"][ms_key] = 0
+
+        results[item_access_id]["click_timestamps"][ms_key] += 1
+
+        access_url_key = access_url or _fallback_access_url_key(
+            access_target.get("pid_generic"),
+            media_format,
+            content_type,
+        )
+        timestamps_by_url = results[item_access_id].setdefault("click_timestamps_by_url", {})
+        url_timestamps = timestamps_by_url.setdefault(access_url_key, {})
+        if ms_key not in url_timestamps:
+            url_timestamps[ms_key] = 0
+        url_timestamps[ms_key] += 1
+
+
+def _extract_source_type(collection_acron3, translated_url):
+    source_type = translated_url.get("source_type")
+    if source_type:
+        return source_type
+
+    if collection_acron3 == "preprints":
+        return "preprint_server"
+
+    if collection_acron3 == "data":
+        return "data_repository"
+
+    if collection_acron3 == "books":
+        return "book"
+
+    if translated_url.get("book_id"):
+        return "book"
+
+    if (
+        translated_url.get("scielo_issn")
+        and translated_url.get("scielo_issn") != DEFAULT_SCIELO_ISSN
+    ):
+        return "journal"
+
+    if translated_url.get("journal_acronym") or translated_url.get("journal_main_title"):
+        return "journal"
+
+    return "other"
+
+
+def _extract_source_id(collection_acron3, translated_url, source_type):
+    source_id = translated_url.get("source_id")
+    if source_id:
+        return source_id
+
+    if source_type == "preprint_server":
+        return translated_url.get("preprint_server_id") or "scielo-preprints"
+
+    if source_type == "data_repository":
+        return translated_url.get("repository_id") or "scielo-data"
+
+    if source_type == "book":
+        return (
+            translated_url.get("book_id")
+            or _extract_book_id_from_pid(translated_url.get("title_pid_generic"))
+            or _extract_book_id_from_pid(translated_url.get("pid_generic"))
+        )
+
+    if source_type == "journal":
+        return translated_url.get("scielo_issn")
+
+    return None
+
+
+def _extract_scielo_issn(translated_url, source_type, source_id):
+    scielo_issn = translated_url.get("scielo_issn")
+    if scielo_issn:
+        return scielo_issn
+
+    if source_type == "journal" and source_id:
+        return source_id
+
+    if source_type in {"book", "other"} or translated_url.get("book_id"):
+        return DEFAULT_SCIELO_ISSN
+
+    return None
+
+
+def _extract_source_title(translated_url):
+    return (
+        translated_url.get("source_main_title")
+        or translated_url.get("journal_main_title")
+        or translated_url.get("book_title")
+    )
+
+
+def _extract_document_type(collection_acron3, translated_url, source_type):
+    document_type = translated_url.get("document_type")
+    if document_type:
+        return document_type
+
+    if collection_acron3 == "preprints":
+        return "preprint"
+
+    if collection_acron3 == "data":
+        return "dataset"
+
+    if collection_acron3 == "books" or source_type == "book":
+        pid_generic = translated_url.get("pid_generic") or ""
+        if translated_url.get("chapter_id") or "/CHAPTER:" in pid_generic.upper():
+            return "chapter"
+        if translated_url.get("book_id"):
+            return "book"
+        return "book"
+
+    if source_type == "journal":
+        return "article"
+
+    return "article"
+
+
+def _extract_source_identifiers(translated_url, source_id, source_type):
+    identifiers = translated_url.get("source_identifiers")
+    if isinstance(identifiers, dict):
+        compact = {key: value for key, value in identifiers.items() if value not in (None, "", [], {}, ())}
+        if compact:
+            return compact
+
+    if source_type != "book":
+        return None
+
+    compact = {
+        "book_id": source_id or translated_url.get("book_id"),
+        "isbn": translated_url.get("isbn"),
+        "eisbn": translated_url.get("eisbn"),
+        "doi": translated_url.get("doi"),
+    }
+    compact = {key: value for key, value in compact.items() if value not in (None, "", [], {}, ())}
+    return compact or None
+
+
+def _extract_book_id_from_pid(value):
+    if not value:
+        return None
+    normalized = str(value).upper()
+    if not normalized.startswith("BOOK:"):
+        return None
+    return normalized.split("BOOK:", 1)[1].split("/", 1)[0] or None
+
+
+def _counter_access_type(source_access_type):
+    normalized = str(source_access_type or "").strip().lower()
+    if normalized == "commercial":
+        return "Controlled"
+    if normalized in {"free_to_read", "free-to-read", "free"}:
+        return "Free_To_Read"
+    return "Open"
+
+
+def _safe_standardize(func, value, default=""):
+    try:
+        return func(value)
+    except Exception:
+        return default
+
+
+def _standardize_pid_generic_list(values):
+    if not isinstance(values, (list, tuple, set)):
+        return []
+    items = []
+    for value in values:
+        item = _safe_standardize(standardize_pid_generic, value)
+        if item and item not in items:
+            items.append(item)
+    return items
+
+
+def _iter_access_targets(item_access_data):
+    return [
+        {
+            "pid_generic": item_access_data.get("pid_generic"),
+            "document_type": item_access_data.get("document_type"),
+        }
+    ]
+
+
+def _normalize_access_url(url):
+    if not url:
+        return None
+    parsed_url = urlparse(str(url).strip())
+    path = parsed_url.path if parsed_url.scheme or parsed_url.netloc else str(url).strip()
+    path = unquote(path or "")
+    path = path.split("?", 1)[0].split("#", 1)[0].split()[0]
+    path = re.sub(r"/+", "/", path)
+    path = path.rstrip(".,;:")
+    return path or None
+
+
+def _fallback_access_url_key(pid_generic, media_format, content_type):
+    return "|".join([
+        str(pid_generic or ""),
+        str(media_format or ""),
+        str(content_type or ""),
+    ])
diff --git a/metrics/counter/aggregation.py b/metrics/counter/aggregation.py
new file mode 100644
index 0000000..d047e7a
--- /dev/null
+++ b/metrics/counter/aggregation.py
@@ -0,0 +1,124 @@
+from scielo_usage_counter.counter import get_valid_clicks, is_request
+
+
+def apply_unique_metrics(
+    document,
+    unique_state,
+    scope,
+    document_id,
+    user_session_id,
+    is_request_event,
+):
+    if not user_session_id:
+        return
+
+    inv_bucket = unique_state[f"{scope}_investigations"]
+    inv_key = (document_id, user_session_id)
+    add_investigation = inv_key not in inv_bucket
+    if add_investigation:
+        inv_bucket.add(inv_key)
+
+    add_request = False
+    if is_request_event:
+        req_bucket = unique_state[f"{scope}_requests"]
+        req_key = (document_id, user_session_id)
+        add_request = req_key not in req_bucket
+        if add_request:
+            req_bucket.add(req_key)
+
+    increment_document_uniques(
+        document=document,
+        add_investigation=add_investigation,
+        add_request=add_request,
+    )
+
+
+def increment_document_totals(document, click_timestamps, content_type, click_timestamps_by_url=None):
+    number_of_clicks = _count_valid_clicks(
+        click_timestamps=click_timestamps,
+        click_timestamps_by_url=click_timestamps_by_url,
+    )
+
+    document["total_investigations"] += number_of_clicks
+    if is_request(content_type):
+        document["total_requests"] += number_of_clicks
+
+    if "daily_metrics" in document:
+        day_key = list(document["daily_metrics"].keys())[0]
+        document["daily_metrics"][day_key]["total_investigations"] += number_of_clicks
+        if is_request(content_type):
+            document["daily_metrics"][day_key]["total_requests"] += number_of_clicks
+
+
+def _count_valid_clicks(click_timestamps, click_timestamps_by_url=None):
+    if isinstance(click_timestamps_by_url, dict) and click_timestamps_by_url:
+        return sum(
+            get_valid_clicks(timestamps or {})
+            for timestamps in click_timestamps_by_url.values()
+        )
+    return get_valid_clicks(click_timestamps or {})
+
+
+def increment_document_uniques(document, add_investigation=False, add_request=False):
+    if add_investigation:
+        document["unique_investigations"] += 1
+    if add_request:
+        document["unique_requests"] += 1
+
+    if "daily_metrics" in document:
+        day_key = list(document["daily_metrics"].keys())[0]
+        if add_investigation:
+            document["daily_metrics"][day_key]["unique_investigations"] += 1
+        if add_request:
+            document["daily_metrics"][day_key]["unique_requests"] += 1
+
+
+def counter_data_type(document_type):
+    if document_type == "dataset":
+        return "Dataset"
+    if document_type in {"article", "preprint"}:
+        return "Article"
+    if document_type == "book":
+        return "Book"
+    if document_type == "chapter":
+        return "Book_Segment"
+    return "Other"
+
+
+def parent_data_type(document_type, source_type=None):
+    if document_type == "chapter":
+        return "Book"
+    if document_type == "article" and source_type == "journal":
+        return "Journal"
+    return None
+
+
+def article_version(document_type):
+    if document_type == "preprint":
+        return "Preprint"
+    return None
+
+
+def should_create_book_item_document(value):
+    if not value.get("pid_generic"):
+        return False
+    if value.get("document_type") == "book" and not is_request(value.get("content_type")):
+        return False
+    return True
+
+
+def extract_title_pid_generic(value, fallback=None):
+    title_pid_generic = value.get("title_pid_generic")
+    if title_pid_generic:
+        return title_pid_generic
+
+    pid_generic = value.get("pid_generic")
+    if "/CHAPTER:" in (pid_generic or "").upper():
+        return pid_generic.upper().split("/CHAPTER:")[0]
+
+    source = value.get("source") or {}
+    source_id = source.get("source_id")
+    if source_id:
+        return f"BOOK:{str(source_id).upper()}"
+
+    return fallback
diff --git a/metrics/counter/documents.py b/metrics/counter/documents.py
new file mode 100644
index 0000000..63730ae
--- /dev/null
+++ b/metrics/counter/documents.py
@@ -0,0 +1,322 @@
+from scielo_usage_counter.counter import is_request
+
+from metrics.counter.aggregation import (
+    apply_unique_metrics,
+    article_version,
+    counter_data_type,
+    extract_title_pid_generic,
+    increment_document_totals,
+    parent_data_type,
+    should_create_book_item_document,
+)
+from metrics.counter.identifiers import generate_month_document_id, generate_year_document_id
+
+
+def convert_to_month_index_documents(data: dict):
+    if not isinstance(data, dict):
+        return {}
+
+    metrics_data = {}
+    unique_state = _initialize_unique_state()
+
+    for value in data.values():
+        _accumulate_documents(
+            data=metrics_data,
+            unique_state=unique_state,
+            value=value,
+            granularity="month",
+        )
+
+    return metrics_data
+
+
+def convert_to_year_index_documents(data: dict):
+    if not isinstance(data, dict):
+        return {}
+
+    metrics_data = {}
+    unique_state = _initialize_unique_state()
+
+    for value in data.values():
+        _accumulate_documents(
+            data=metrics_data,
+            unique_state=unique_state,
+            value=value,
+            granularity="year",
+        )
+
+    return metrics_data
+
+
+def convert_raw_results_to_index_documents(data: dict):
+    return {
+        "month": convert_to_month_index_documents(data),
+        "year": convert_to_year_index_documents(data),
+    }
+
+
+def _initialize_unique_state():
+    return {
+        "item_investigations": set(),
+        "item_requests": set(),
+        "title_investigations": set(),
+        "title_requests": set(),
+    }
+
+
+def _accumulate_documents(data, unique_state, value, granularity):
+    if not isinstance(value, dict):
+        return
+
+    if value.get("collection") == "books":
+        _accumulate_books_documents(data, unique_state, value, granularity)
+        return
+
+    _accumulate_standard_documents(data, unique_state, value, granularity)
+
+
+def _accumulate_standard_documents(data, unique_state, value, granularity):
+    document_id = _generate_document_id(value, granularity)
+    document = data.setdefault(
+        document_id,
+        _build_base_document(value=value, granularity=granularity),
+    )
+
+    increment_document_totals(
+        document=document,
+        click_timestamps=value.get("click_timestamps"),
+        click_timestamps_by_url=value.get("click_timestamps_by_url"),
+        content_type=value.get("content_type"),
+    )
+    apply_unique_metrics(
+        document=document,
+        unique_state=unique_state,
+        scope="item",
+        document_id=document_id,
+        user_session_id=value.get("user_session_id"),
+        is_request_event=is_request(value.get("content_type")),
+    )
+
+
+def _accumulate_books_documents(data, unique_state, value, granularity):
+    if should_create_book_item_document(value):
+        item_document_id = _generate_document_id(
+            value,
+            granularity,
+            metric_scope="item",
+        )
+        item_document = data.setdefault(
+            item_document_id,
+            _build_base_document(
+                value=value,
+                granularity=granularity,
+                metric_scope="item",
+            ),
+        )
+        increment_document_totals(
+            document=item_document,
+            click_timestamps=value.get("click_timestamps"),
+            click_timestamps_by_url=value.get("click_timestamps_by_url"),
+            content_type=value.get("content_type"),
+        )
+        apply_unique_metrics(
+            document=item_document,
+            unique_state=unique_state,
+            scope="item",
+            document_id=item_document_id,
+            user_session_id=value.get("user_session_id"),
+            is_request_event=is_request(value.get("content_type")),
+        )
+
+    title_pid_generic = extract_title_pid_generic(value)
+    if not title_pid_generic:
+        return
+
+    title_document_id = _generate_document_id(
+        value,
+        granularity,
+        metric_scope="title",
+        pid_generic=title_pid_generic,
+    )
+    title_document = data.setdefault(
+        title_document_id,
+        _build_base_document(
+            value=value,
+            granularity=granularity,
+            metric_scope="title",
+            pid_generic=title_pid_generic,
+            document_type="book",
+        ),
+    )
+    increment_document_totals(
+        document=title_document,
+        click_timestamps=value.get("click_timestamps"),
+        click_timestamps_by_url=value.get("click_timestamps_by_url"),
+        content_type=value.get("content_type"),
+    )
+    apply_unique_metrics(
+        document=title_document,
+        unique_state=unique_state,
+        scope="title",
+        document_id=title_document_id,
+        user_session_id=value.get("user_session_id"),
+        is_request_event=is_request(value.get("content_type")),
+    )
+
+
+def _generate_document_id(value, granularity, metric_scope=None, pid_generic=None):
+    pid_generic = pid_generic or value.get("pid_generic")
+    publication_year = str(value.get("publication_year") or "0001")
+    if granularity == "month":
+        access_month = value.get("access_date", "")[:7] if value.get("access_date") else ""
+        return generate_month_document_id(
+            collection=value.get("collection"),
+            source_key=value.get("source_key"),
+            pid_v2=value.get("pid_v2"),
+            pid_v3=value.get("pid_v3"),
+            pid_generic=pid_generic,
+            access_month=access_month,
+            counter_access_type=value.get("counter_access_type") or "Open",
+            access_method=value.get("access_method") or "Regular",
+            publication_year=publication_year,
+            metric_scope="title" if metric_scope == "title" else None,
+        )
+
+    return generate_year_document_id(
+        collection=value.get("collection"),
+        source_key=value.get("source_key"),
+        pid_v2=value.get("pid_v2"),
+        pid_v3=value.get("pid_v3"),
+        pid_generic=pid_generic,
+        content_language=value.get("content_language"),
+        access_country_code=value.get("access_country_code"),
+        access_year=value.get("access_year"),
+        counter_access_type=value.get("counter_access_type") or "Open",
+        access_method=value.get("access_method") or "Regular",
+        publication_year=publication_year,
+        metric_scope="title" if metric_scope == "title" else None,
+    )
+
+
+def _build_base_document(value, granularity, metric_scope=None, pid_generic=None, document_type=None):
+    collection = value.get("collection")
+    if collection == "books":
+        normalized_pid_generic = pid_generic or value.get("pid_generic")
+        title_pid_generic = extract_title_pid_generic(value, fallback=normalized_pid_generic)
+        base_document = {
+            "collection": collection,
+            "source": _build_books_source(value.get("source")),
+            "document_type": document_type or value.get("document_type"),
+            "scielo_document_type": document_type or value.get("document_type"),
+            "metric_scope": metric_scope or "item",
+            "counter_data_type": "Book" if metric_scope == "title" else "Book_Segment",
+            "parent_data_type": "Book" if metric_scope != "title" else None,
+            "title_pid_generic": title_pid_generic,
+            "pid": normalized_pid_generic,
+            "pid_generic": normalized_pid_generic,
+            "publication_year": value.get("publication_year"),
+            "counter_access_type": value.get("counter_access_type") or "Open",
+            "access_method": value.get("access_method") or "Regular",
+            "total_requests": 0,
+            "total_investigations": 0,
+            "unique_requests": 0,
+            "unique_investigations": 0,
+        }
+        _apply_access_fields(base_document, value, granularity)
+        if granularity == "year":
+            base_document["content_language"] = value.get("content_language")
+            base_document["access_country_code"] = value.get("access_country_code")
+        return base_document
+
+    base_document = {
+        "collection": collection,
+        "source": _build_standard_source(value.get("source")),
+        "document_type": value.get("document_type"),
+        "scielo_document_type": value.get("document_type"),
+        "metric_scope": "item",
+        "counter_data_type": counter_data_type(value.get("document_type")),
+        "parent_data_type": parent_data_type(
+            value.get("document_type"),
+            (value.get("source") or {}).get("source_type"),
+        ),
+        "article_version": article_version(value.get("document_type")),
+        "pid": value.get("pid_v3") or value.get("pid_v2") or value.get("pid_generic"),
+        "pid_v2": value.get("pid_v2"),
+        "pid_v3": value.get("pid_v3"),
+        "pid_generic": value.get("pid_generic"),
+        "publication_year": value.get("publication_year"),
+        "counter_access_type": value.get("counter_access_type") or "Open",
+        "access_method": value.get("access_method") or "Regular",
+        "total_requests": 0,
+        "total_investigations": 0,
+        "unique_requests": 0,
+        "unique_investigations": 0,
+    }
+    _apply_access_fields(base_document, value, granularity)
+    if granularity == "year":
+        base_document["content_language"] = value.get("content_language")
+        base_document["access_country_code"] = value.get("access_country_code")
+    return base_document
+
+
+def _apply_access_fields(base_document, value, granularity):
+    if granularity == "month":
+        base_document["access_month"] = value.get("access_date", "")[:7] if value.get("access_date") else ""
+        day = value.get("access_date", "")[-2:] if value.get("access_date") else "01"
+        base_document["daily_metrics"] = {
+            day: {
+                "total_requests": 0,
+                "total_investigations": 0,
+                "unique_requests": 0,
+                "unique_investigations": 0,
+            }
+        }
+        return
+
+    base_document["access_year"] = value.get("access_year")
+
+
+def _build_books_source(source):
+    source = source or {}
+    identifiers = source.get("identifiers") or {}
+    compact_identifiers = {
+        key: value
+        for key, value in identifiers.items()
+        if key in {"book_id", "isbn", "eisbn", "doi"} and value not in (None, "", [], {}, ())
+    }
+
+    return {
+        "source_type": source.get("source_type"),
+        "source_id": source.get("source_id"),
+        "main_title": source.get("main_title"),
+        "access_type": source.get("access_type"),
+        "publisher": source.get("publisher_name"),
+        "city": source.get("city"),
+        "country": source.get("country"),
+        "identifiers": compact_identifiers,
+    }
+
+
+def _build_standard_source(source):
+    source = source or {}
+    identifiers = source.get("identifiers") or {}
+    compact_identifiers = {
+        key: value
+        for key, value in identifiers.items()
+        if value not in (None, "", [], {}, ())
+    }
+
+    return {
+        "source_type": source.get("source_type"),
+        "source_id": source.get("source_id"),
+        "scielo_issn": source.get("scielo_issn"),
+        "main_title": source.get("main_title"),
+        "acronym": source.get("acronym"),
+        "publisher_name": source.get("publisher_name"),
+        "subject_area_capes": source.get("subject_area_capes"),
+        "subject_area_wos": source.get("subject_area_wos"),
+        "access_type": source.get("access_type"),
+        "city": source.get("city"),
+        "country": source.get("country"),
+        "identifiers": compact_identifiers,
+    }
diff --git a/metrics/counter/identifiers.py b/metrics/counter/identifiers.py
new file mode 100644
index 0000000..bef7b8d
--- /dev/null
+++ b/metrics/counter/identifiers.py
@@ -0,0 +1,110 @@
+def generate_user_session_id(client_name, client_version, ip_address, datetime, sep="|"):
+    dt_year_month_day = datetime.strftime("%Y-%m-%d")
+    dt_hour = datetime.strftime("%H")
+
+    return sep.join(
+        [
+            str(client_name),
+            str(client_version),
+            str(ip_address),
+            str(dt_year_month_day),
+            str(dt_hour),
+        ]
+    )
+
+
+def generate_item_access_id(
+    col_acron3,
+    source_key,
+    pid_v2,
+    pid_v3,
+    pid_generic,
+    user_session_id,
+    access_country_code,
+    content_language,
+    media_format,
+    content_type,
+    sep="|",
+):
+    return sep.join(
+        [
+            col_acron3,
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            str(user_session_id or ""),
+            str(access_country_code or ""),
+            str(content_language or ""),
+            str(media_format or ""),
+            str(content_type or ""),
+        ]
+    )
+
+
+def generate_month_document_id(
+    collection: str,
+    source_key: str,
+    pid_v2: str,
+    pid_v3: str,
+    pid_generic: str,
+    access_month: str,
+    counter_access_type: str,
+    access_method: str,
+    publication_year: str,
+    metric_scope: str = None,
+) -> str:
+    parts = []
+    if metric_scope:
+        parts.append(metric_scope)
+
+    parts.extend(
+        [
+            str(collection or ""),
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            str(access_month or ""),
+            str(counter_access_type or ""),
+            str(access_method or ""),
+            str(publication_year or ""),
+        ]
+    )
+    return "|".join(parts)
+
+
+def generate_year_document_id(
+    collection: str,
+    source_key: str,
+    pid_v2: str,
+    pid_v3: str,
+    pid_generic: str,
+    content_language: str,
+    access_country_code: str,
+    access_year: str,
+    counter_access_type: str,
+    access_method: str,
+    publication_year: str,
+    metric_scope: str = None,
+) -> str:
+    parts = []
+    if metric_scope:
+        parts.append(metric_scope)
+
+    parts.extend(
+        [
+            str(collection or ""),
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            content_language or "",
+            access_country_code or "",
+            str(access_year or ""),
+            str(counter_access_type or ""),
+            str(access_method or ""),
+            str(publication_year or ""),
+        ]
+    )
+    return "|".join(parts)
diff --git a/metrics/utils/parser_utils.py b/metrics/counter/parser.py
similarity index 92%
rename from metrics/utils/parser_utils.py
rename to metrics/counter/parser.py
index ef142e6..2081e5d 100644
--- a/metrics/utils/parser_utils.py
+++ b/metrics/counter/parser.py
@@ -1,6 +1,7 @@
 import logging
 
 from scielo_usage_counter.translator.classic import URLTranslatorClassicSite
+from scielo_usage_counter.translator.books import URLTranslatorBooksSite
 from scielo_usage_counter.translator.dataverse import URLTranslatorDataverseSite
 from scielo_usage_counter.translator.opac import URLTranslatorOPACSite
 from scielo_usage_counter.translator.opac_alpha import URLTranslatorOPACAlphaSite
@@ -38,6 +39,7 @@ def translator_class_name_to_obj(name: str):
         return None
     
     translator_classes = {
+        'books': URLTranslatorBooksSite,
         'classic': URLTranslatorClassicSite,
         'dataverse': URLTranslatorDataverseSite,
         'opac': URLTranslatorOPACSite,
diff --git a/metrics/es.py b/metrics/es.py
deleted file mode 100644
index 25ad701..0000000
--- a/metrics/es.py
+++ /dev/null
@@ -1,385 +0,0 @@
-import logging
-
-from elasticsearch import Elasticsearch, helpers, NotFoundError
-from django.conf import settings
-
-from .utils import index_utils
-
-
-DEFAULT_ES_INDEX_USAGE_MAPPINGS = {
-    "properties": {
-        "collection": {
-            "type": "keyword"
-        },
-        "journal": {
-            "properties": {
-                "scielo_issn": {
-                    "type": "keyword"
-                },
-                "main_title": {
-                    "type": "keyword"
-                },
-                "subject_area_capes": {
-                    "type": "keyword"
-                },
-                "subject_area_wos": {
-                    "type": "keyword"
-                },
-                "acronym": {
-                    "type": "keyword"
-                },
-                "publisher": {
-                    "type": "keyword"
-                }
-            }
-        },
-        "pid": {
-            "type": "keyword"
-        },
-        "pid_v2": {
-            "type": "keyword"
-        },
-        "pid_v3": {
-            "type": "keyword"
-        },
-        "pid_generic": {
-            "type": "keyword"
-        },
-        "year_of_publication": {
-            "type": "integer"
-        },
-        "media_language": {
-            "type": "keyword"
-        },
-        "country_code": {
-            "type": "keyword"
-        },
-        "date": {
-            "type": "date",
-            "format": "yyyy-MM-dd"
-        },
-        "total_requests": {
-            "type": "integer"
-        },
-        "total_investigations": {
-            "type": "integer"
-        },
-        "unique_requests": {
-            "type": "integer"
-        },
-        "unique_investigations": {
-            "type": "integer"
-        }
-    }
-}
-
-
-class ElasticSearchUsageWrapper:
-    """
-    Wrapper for Elasticsearch usage metrics operations.
-    This class provides methods to interact with Elasticsearch for indexing,
-    deleting, and managing usage metrics data.
-    """
-
-    def __init__(self, url=None, basic_auth=None, api_key=None, verify_certs=False):
-        self.client = self.get_elasticsearch_client(url, basic_auth, api_key, verify_certs)
-
-
-    def get_elasticsearch_client(self, url=None, basic_auth=None, api_key=None, verify_certs=False):
-        """
-        Create an Elasticsearch client instance using Django settings.
-
-        :param url: Elasticsearch URL. If None, it will be taken from Django settings.
-        :param basic_auth: Basic authentication credentials. If None, it will be taken from Django settings.
-        :param api_key: API key. If None, it will be taken from Django settings.
-        :param verify_certs: Whether to verify SSL certificates. If None, it will be taken from Django settings.
-        """
-        if not url:
-            url = getattr(settings, "ES_URL", None)
-
-        if not basic_auth:
-            basic_auth = getattr(settings, "ES_BASIC_AUTH", None)
-
-        if not api_key:
-            api_key = getattr(settings, "ES_API_KEY", None)
-
-        if not verify_certs:
-            verify_certs = getattr(settings, "ES_VERIFY_CERTS", False)
-
-        if basic_auth:
-            client = Elasticsearch(url, basic_auth=basic_auth, verify_certs=verify_certs)
-        elif api_key:
-            client = Elasticsearch(url, api_key=api_key, verify_certs=verify_certs)
-        else:
-            client = Elasticsearch(url, verify_certs=verify_certs)
-
-        return client
-    
-
-    def ping(self):
-        """
-        Check if the Elasticsearch client is available.
-        Returns True if the client is available, False otherwise.
-        """
-        try:
-            return self.client.ping()
-        except Exception as e:
-            logging.error(f"Error pinging Elasticsearch client: {e}")
-            return False
-
-
-    def create_index(self, index_name, mappings=None, ping_client=False):
-        """
-        Create an Elasticsearch index. 
-
-        :param index_name: Name of the index to create.
-        :param mappings: Mappings for the index. If None, default mappings will be used.
-        :param ping_client: If True, checks if the Elasticsearch client is available before creating the index.
-        """
-        if ping_client and not self.ping():
-            return
-
-        if not mappings:
-            mappings = DEFAULT_ES_INDEX_USAGE_MAPPINGS
-
-        resp = self.client.indices.create(
-            index=index_name,
-            mappings=mappings,
-        )
-        logging.info(f"Index {index_name} created: {resp}")
-
-
-    def create_index_if_not_exists(self, index_name, mappings=None, ping_client=False):
-        """
-        Create an Elasticsearch index if it does not already exist.
-
-        :param index_name: Name of the index to create.
-        :param mappings: Mappings for the index. If None, default mappings will be used.
-        :param ping_client: If True, checks if the Elasticsearch client is available before creating the index.
-        """
-        if ping_client and not self.ping():
-            return
-
-        if not self.client.indices.exists(index=index_name):
-            self.create_index(index_name, mappings, ping_client)
-        else:
-            logging.info(f"Index {index_name} already exists. Skipping creation.")
-
-
-    def delete_index(self, index_name, ping_client=False):
-        """
-        Delete an Elasticsearch index.
-
-        :param index_name: Name of the index to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the index.
-        """
-        if ping_client and not self.ping():
-            return
-        
-        self.client.indices.delete(index=index_name)
-
-
-    def index_document(self, index_name, doc_id, document, ping_client=False):
-        """
-        Index a document in Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param doc_id: ID of the document.
-        :param document: Document to index.
-        :param ping_client: If True, checks if the Elasticsearch client is available before indexing the document.
-        """
-        if ping_client and not self.ping():
-            return
-            
-        self.client.index(index=index_name, id=doc_id, document=document)
-
-
-    def index_documents(self, index_name, documents, ping_client=False):
-        """
-        Index multiple documents in Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param documents: Dictionary of documents to index, where keys are document IDs and values are the documents.
-        :param ping_client: If True, checks if the Elasticsearch client is available before indexing the documents.
-        """
-        if ping_client and not self.ping():
-            return
-        
-        helpers.bulk(
-            self.client,
-            (
-                {
-                    "_index": index_name,
-                    "_id": doc_id,
-                    "_source": document,
-                }
-                for doc_id, document in documents.items()
-            ),
-        )
-
-
-    def delete_document(self, index_name, doc_id, ping_client=False):
-        """
-        Delete a document from Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param doc_id: ID of the document to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the document.
-        """
-        if ping_client and not self.ping():
-            return
-
-        try:
-            self.client.delete(index=index_name, id=doc_id)
-        except NotFoundError as e:
-            logging.error(f"Failed to delete document {doc_id} from Elasticsearch: {e}")
-
-
-    def delete_documents(self, index_name, doc_ids, ping_client=False):
-        """
-        Delete multiple documents from Elasticsearch using bulk.
-        :param index_name: Name of the index.
-        :param doc_ids: List of document IDs to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the documents.
-        """
-        if ping_client and not self.ping():
-            return
-                    
-        actions = (
-            {
-                "_op_type": "delete",
-                "_index": index_name,
-                "_id": doc_id,
-            }
-            for doc_id in doc_ids
-        )
-
-        try:
-            helpers.bulk(self.client, actions)
-        except helpers.BulkIndexError as e:
-            logging.error(f"BulkIndexError occurred: {e.errors}")
-
-
-    def delete_documents_by_key(self, index_name, data, ping_client=False):
-        """
-        Delete multiple documents from Elasticsearch based on specific key-value pairs.
-
-        :param index_name: Name of the index.
-        :param data: Dictionary where keys are field names and values are single values or lists of values.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the documents.
-        """
-        if ping_client and not self.ping():
-            return
-
-        query = {
-            "query": {
-                "bool": {
-                    "must": [
-                        {
-                            "terms": {
-                                key: values if isinstance(values, list) else [values]
-                            }
-                        }
-                        for key, values in data.items()
-                    ]
-                }
-            }
-        }
-
-        try:
-            self.client.delete_by_query(index=index_name, body=query)
-            return True
-        except Exception as e:
-            logging.error(f"Failed to delete documents: {e}")
-
-        return False
-
-
-    def fetch_and_update_documents_locally(self, index_name, documents, batch_size=5000, ping_client=False):
-        """
-        Fetch existing documents from Elasticsearch and update local documents with accumulated metrics.
-        This function retrieves documents from Elasticsearch in batches and merges their metric fields
-        with the provided local documents. The merge operation adds values for specific metric fields
-        or sets them if they don't exist in the local documents.
-
-        Args:
-            index_name (str): Name of the Elasticsearch index to fetch documents from.
-            documents (dict): Dictionary of documents to be updated, where keys are document IDs and values
-                are dictionaries containing metric data.
-            batch_size (int, optional): Number of documents to fetch in each batch from Elasticsearch.
-                Defaults to 5000.
-            ping_client (bool, optional): If True, checks if the Elasticsearch client is available before
-                fetching documents. Defaults to False.
-        
-        Returns:
-            None: The function modifies the input documents dictionary in-place.
-        """
-        if ping_client and not self.ping():
-            return
-
-        existing_docs = {}
-        ids = list(documents.keys())
-
-        for i in range(0, len(ids), batch_size):
-            batch_ids = ids[i:i+batch_size]
-            resp = self.client.mget(index=index_name, ids=batch_ids)
-            for doc in resp.get('docs', []):
-                if doc.get('found'):
-                    existing_docs[doc['_id']] = doc['_source']
-        logging.info(f'Found {len(existing_docs)} existing documents in Elasticsearch for update.')
-
-        for doc_id, existing in existing_docs.items():
-            current = documents[doc_id]
-            for field in [
-                "total_requests",
-                "unique_requests",
-                "total_investigations",
-                "unique_investigations",
-            ]:
-                if field in existing and field in current:
-                    current[field] += existing[field]
-                elif field in existing:
-                    current[field] = existing[field]
-        
-
-    def export_to_index(self, index_name, data, batch_size=5000, ping_client=False):
-        """
-        Export data to Elasticsearch index in bulk operations.
-        This function converts input data to index documents, processes them locally,
-        and then indexes them to Elasticsearch in batches to optimize performance.
-        
-        Args:
-            index_name (str): Name of the Elasticsearch index to export data to.
-            data: The data to be exported to the Elasticsearch index
-            batch_size (int, optional): Number of documents to process in each bulk operation.
-                Defaults to 5000.
-            ping_client (bool, optional): If True, checks if the Elasticsearch client is available
-
-        Returns:
-            None: Function performs side effects by indexing data to Elasticsearch
-        """
-        if ping_client and not self.ping():
-            return
-        
-        bulk_data = []
-        documents = index_utils.convert_to_index_documents(data)
-        self.fetch_and_update_documents_locally(index_name=index_name, documents=documents)
-
-        for key, metric_data in documents.items():
-            metric_data['pid'] = metric_data.get('pid_v3') or metric_data.get('pid_v2') or metric_data.get('pid_generic', '')
-            bulk_data.append({
-                "_id": key,
-                "_source": metric_data,
-            })
-
-            if len(bulk_data) >= batch_size:
-                self.index_documents(
-                    index_name=index_name,
-                    documents={doc["_id"]: doc["_source"] for doc in bulk_data},
-                )
-                bulk_data = []
-
-        self.index_documents(
-            index_name=index_name,
-            documents={doc["_id"]: doc["_source"] for doc in bulk_data},
-        )
diff --git a/metrics/fixtures/top100articles.csv b/metrics/fixtures/top100articles.csv
deleted file mode 100755
index 9d979f3..0000000
--- a/metrics/fixtures/top100articles.csv
+++ /dev/null
@@ -1,97 +0,0 @@
-print_issn	online_issn	pid_issn	collection	pid	yop	year_month_day	total_item_requests	total_item_investigations	unique_item_requests	unique_item_investigations
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300005	2005	2024-05-26	13	16	13	16
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100020	2009	2024-05-26	9	10	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200012	2009	2024-05-26	8	9	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200018	2009	2024-05-26	8	8	8	8
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300004	2005	2024-05-26	8	11	8	11
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200011	2009	2024-05-26	8	9	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200001	2009	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200010	2009	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300007	2005	2024-05-26	7	10	7	10
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200003	2009	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400010	2008	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300008	2005	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400008	2009	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400009	2006	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200009	2006	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100007	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300003	2007	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100022	2009	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100006	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200002	2009	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100002	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000200014	2007	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100021	2009	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000400010	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200001	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200002	2010	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200014	2009	2024-05-26	5	6	5	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100014	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000200009	2005	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200004	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100016	2006	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200015	2006	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300005	2007	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000300009	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200010	2010	2024-05-26	4	4	4	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100015	2008	2024-05-26	3	4	3	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300002	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200015	2009	2024-05-26	2	3	2	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300001	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300009	2005	2024-05-26	2	4	2	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200005	2009	2024-05-26	2	4	2	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200008	2009	2024-05-26	2	3	2	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300006	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300010	2005	2024-05-26	2	2	2	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300003	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000300001	2006	2024-05-26	2	2	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100005	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200016	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400004	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100009	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000100014	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200009	2009	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100019	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200013	2009	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400007	2008	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000300010	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200006	2009	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200018	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400002	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000300005	2010	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200007	2006	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400004	2006	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100004	2007	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000200021	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100002	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100004	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400004	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400006	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400005	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000300006	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400011	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300001	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100020	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400002	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000100005	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200017	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100005	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200007	2009	2024-05-26	1	4	1	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100023	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100008	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400008	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400005	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200006	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400007	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000200013	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400003	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400006	2009	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300008	2007	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000200008	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000200006	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400004	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400006	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300006	2007	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000300003	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100007	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100009	2006	2024-05-26	0	1	0	1
diff --git a/metrics/fixtures/top100articles.tar.gz b/metrics/fixtures/top100articles.tar.gz
deleted file mode 100644
index cd49556..0000000
Binary files a/metrics/fixtures/top100articles.tar.gz and /dev/null differ
diff --git a/metrics/management/__init__.py b/metrics/management/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/metrics/management/__init__.py
@@ -0,0 +1 @@
+
diff --git a/metrics/management/commands/__init__.py b/metrics/management/commands/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/metrics/management/commands/__init__.py
@@ -0,0 +1 @@
+
diff --git a/metrics/management/commands/export_book_r51_monthly_metrics.py b/metrics/management/commands/export_book_r51_monthly_metrics.py
new file mode 100644
index 0000000..9889387
--- /dev/null
+++ b/metrics/management/commands/export_book_r51_monthly_metrics.py
@@ -0,0 +1,431 @@
+import csv
+import json
+from collections import defaultdict
+from pathlib import Path
+
+from device_detector import DeviceDetector
+from django.core.management.base import BaseCommand, CommandError
+
+from collection.models import Collection
+from document.models import Document
+from metrics.counter import access, documents as index_docs
+from resources.models import MMDB, RobotUserAgent
+from scielo_usage_counter import log_handler, url_translator
+from scielo_usage_counter.translator.books import URLTranslatorBooksSite
+from source.models import Source
+
+
+class Command(BaseCommand):
+    help = (
+        "Generate COUNTER R5.1 monthly book metrics from one or more log files, "
+        "writing item and title CSV outputs."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--input",
+            dest="inputs",
+            action="append",
+            required=True,
+            help="Input log file path. Repeat --input for multiple files.",
+        )
+        parser.add_argument(
+            "--item-output",
+            required=True,
+            help="Output CSV path for item-level monthly metrics.",
+        )
+        parser.add_argument(
+            "--title-output",
+            required=True,
+            help="Output CSV path for title-level monthly metrics.",
+        )
+        parser.add_argument(
+            "--summary-output",
+            help="Optional JSON path with parse and totals summary.",
+        )
+        parser.add_argument(
+            "--collection",
+            default="books",
+            help="Collection acronym (default: books).",
+        )
+        parser.add_argument(
+            "--robots-source",
+            choices=sorted(RobotUserAgent.SOURCE_CHOICES),
+            default=RobotUserAgent.SOURCE_ALL,
+            help="Which active robot list to use: all, counter, or scielo.",
+        )
+
+    def handle(self, *args, **options):
+        input_paths = [Path(value).expanduser() for value in options["inputs"]]
+        item_output = Path(options["item_output"]).expanduser()
+        title_output = Path(options["title_output"]).expanduser()
+        summary_output = (
+            Path(options["summary_output"]).expanduser()
+            if options.get("summary_output")
+            else None
+        )
+
+        for path in input_paths:
+            if not path.exists():
+                raise CommandError(f"Input file not found: {path}")
+
+        collection = Collection.objects.filter(acron3=options["collection"]).first()
+        if not collection:
+            raise CommandError(f"Collection not found: {options['collection']}")
+
+        robots_source = options["robots_source"]
+        robots_list = RobotUserAgent.get_patterns(source=robots_source)
+        if not robots_list:
+            raise CommandError(
+                f"No robot user agents found in database for source {robots_source}."
+            )
+
+        mmdb = MMDB.objects.order_by("-created").first()
+        if not mmdb:
+            raise CommandError("No MMDB found in database.")
+
+        parser = log_handler.LogParser(
+            mmdb_data=mmdb.data,
+            robots_list=robots_list,
+            output_mode="dict",
+        )
+        utm = url_translator.URLTranslationManager(
+            documents_metadata=Document.metadata(collection=collection),
+            sources_metadata=Source.metadata(collection=collection),
+            translator=URLTranslatorBooksSite,
+        )
+
+        results = {}
+        parse_summaries = []
+        ua_cache = {}
+
+        for path in input_paths:
+            self.stdout.write(f"Processing {path}...")
+            parse_summaries.append(
+                self._parse_file(
+                    path=path,
+                    parser=parser,
+                    utm=utm,
+                    collection=collection,
+                    ua_cache=ua_cache,
+                    results=results,
+                )
+            )
+
+        monthly_documents = self._build_monthly_documents(results)
+
+        self._write_item_csv(item_output, monthly_documents["item"])
+        self._write_title_csv(title_output, monthly_documents["title"])
+
+        summary = {
+            "robots_source": robots_source,
+            "raw_result_count": len(results),
+            "parse_summaries": parse_summaries,
+            "totals": {
+                "total_item_requests": sum(
+                    doc.get("total_requests", 0) for doc in monthly_documents["item"]
+                ),
+                "total_item_investigations": sum(
+                    doc.get("total_investigations", 0)
+                    for doc in monthly_documents["item"]
+                ),
+                "unique_item_requests": sum(
+                    doc.get("unique_requests", 0) for doc in monthly_documents["item"]
+                ),
+                "unique_item_investigations": sum(
+                    doc.get("unique_investigations", 0)
+                    for doc in monthly_documents["item"]
+                ),
+                "title_total_item_requests": sum(
+                    doc.get("total_requests", 0) for doc in monthly_documents["title"]
+                ),
+                "title_total_item_investigations": sum(
+                    doc.get("total_investigations", 0)
+                    for doc in monthly_documents["title"]
+                ),
+                "unique_title_requests": sum(
+                    doc.get("unique_requests", 0) for doc in monthly_documents["title"]
+                ),
+                "unique_title_investigations": sum(
+                    doc.get("unique_investigations", 0)
+                    for doc in monthly_documents["title"]
+                ),
+            },
+        }
+
+        if summary_output:
+            summary_output.parent.mkdir(parents=True, exist_ok=True)
+            summary_output.write_text(json.dumps(summary, indent=2, sort_keys=True))
+
+        self.stdout.write(self.style.SUCCESS(f"Item CSV written to {item_output}"))
+        self.stdout.write(self.style.SUCCESS(f"Title CSV written to {title_output}"))
+        if summary_output:
+            self.stdout.write(self.style.SUCCESS(f"Summary JSON written to {summary_output}"))
+
+    def _parse_file(self, path, parser, utm, collection, ua_cache, results):
+        stats = defaultdict(int)
+        imported = 0
+
+        with path.open("rb") as fh:
+            for raw_line in fh:
+                stats["lines_parsed"] += 1
+
+                try:
+                    line = raw_line.decode().strip()
+                except UnicodeDecodeError:
+                    line = raw_line.decode("utf-8", errors="ignore").strip()
+
+                match, ip_value = parser.match_with_best_pattern(line)
+                if not match:
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                data = match.groupdict()
+                is_bunny = "unix_ts" in data
+                method = "GET" if is_bunny else data.get("method")
+                status = data.get("status")
+                user_agent = parser.format_user_agent(data.get("user_agent"))
+                url = data.get("path")
+                ip_address = ip_value
+
+                if not parser.has_valid_method(method):
+                    stats["ignored_lines_invalid_method"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if not parser.has_valid_status(status):
+                    if parser.status_is_redirect(status):
+                        stats["ignored_lines_http_redirects"] += 1
+                    elif parser.status_is_error(status):
+                        stats["ignored_lines_http_errors"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if parser.user_agent_is_bot(user_agent):
+                    stats["ignored_lines_bot"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if not parser.has_supported_url(url):
+                    stats["ignored_lines_static_resources"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if is_bunny:
+                    local_datetime = parser.format_date(data.get("unix_ts"), None)
+                    country_code = data.get("country") or parser.geoip.ip_to_country_code(
+                        ip_address
+                    )
+                else:
+                    local_datetime = parser.format_date(data.get("date"), data.get("timezone"))
+                    country_code = parser.geoip.ip_to_country_code(ip_address)
+
+                if not local_datetime:
+                    stats["ignored_lines_invalid_local_datetime"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if not country_code:
+                    stats["ignored_lines_invalid_country_code"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                device = ua_cache.get(user_agent)
+                if device is None:
+                    try:
+                        device = DeviceDetector(user_agent).parse()
+                    except ZeroDivisionError:
+                        stats["ignored_lines_invalid_user_agent"] += 1
+                        stats["total_ignored_lines"] += 1
+                        ua_cache[user_agent] = False
+                        continue
+                    ua_cache[user_agent] = device
+                elif device is False:
+                    stats["ignored_lines_invalid_user_agent"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                client_name = parser.format_client_name(device)
+                client_version = parser.format_client_version(device)
+
+                if not client_name:
+                    stats["ignored_lines_invalid_client_name"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                if not client_version:
+                    stats["ignored_lines_invalid_client_version"] += 1
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                translated = utm.translate(url)
+                item_access_data = access.extract_item_access_data(
+                    collection.acron3,
+                    translated,
+                )
+                is_valid, _ = access.is_valid_item_access_data(
+                    item_access_data,
+                    utm,
+                    ignore_utm_validation=True,
+                )
+                if not is_valid:
+                    stats["total_ignored_lines"] += 1
+                    continue
+
+                access.update_results_with_item_access_data(
+                    results,
+                    item_access_data,
+                    {
+                        "client_name": client_name,
+                        "client_version": client_version,
+                        "ip_address": ip_address,
+                        "country_code": country_code,
+                        "local_datetime": local_datetime,
+                        "url": url,
+                    },
+                )
+                imported += 1
+                stats["total_imported_lines"] += 1
+
+        return {"path": str(path), "valid_lines_used": imported, **stats}
+
+    def _build_monthly_documents(self, results):
+        documents = index_docs.convert_raw_results_to_index_documents(results)
+        item_documents = {}
+        title_documents = {}
+
+        for doc in documents["month"].values():
+            year_month = doc.get("access_month", "")
+            scope = doc.get("metric_scope", "item")
+            if scope == "title":
+                key = (
+                    year_month,
+                    doc.get("title_pid_generic") or doc.get("pid_generic"),
+                    doc.get("document_type"),
+                )
+                if key not in title_documents:
+                    title_documents[key] = {
+                        "year_month": year_month,
+                        "title_pid_generic": doc.get("title_pid_generic")
+                        or doc.get("pid_generic"),
+                        "document_type": doc.get("document_type"),
+                        "total_requests": 0,
+                        "total_investigations": 0,
+                        "unique_requests": 0,
+                        "unique_investigations": 0,
+                    }
+                title_documents[key]["total_requests"] += doc.get("total_requests", 0)
+                title_documents[key]["total_investigations"] += doc.get(
+                    "total_investigations", 0
+                )
+                title_documents[key]["unique_requests"] += doc.get("unique_requests", 0)
+                title_documents[key]["unique_investigations"] += doc.get(
+                    "unique_investigations", 0
+                )
+                continue
+
+            key = (
+                year_month,
+                doc.get("title_pid_generic"),
+                doc.get("pid_generic"),
+                doc.get("document_type"),
+            )
+            if key not in item_documents:
+                item_documents[key] = {
+                    "year_month": year_month,
+                    "title_pid_generic": doc.get("title_pid_generic"),
+                    "segment_pid_generic": doc.get("pid_generic"),
+                    "document_type": doc.get("document_type"),
+                    "total_requests": 0,
+                    "total_investigations": 0,
+                    "unique_requests": 0,
+                    "unique_investigations": 0,
+                }
+            item_documents[key]["total_requests"] += doc.get("total_requests", 0)
+            item_documents[key]["total_investigations"] += doc.get(
+                "total_investigations", 0
+            )
+            item_documents[key]["unique_requests"] += doc.get("unique_requests", 0)
+            item_documents[key]["unique_investigations"] += doc.get(
+                "unique_investigations", 0
+            )
+
+        return {
+            "item": list(item_documents.values()),
+            "title": list(title_documents.values()),
+        }
+
+    @staticmethod
+    def _write_item_csv(path, item_documents):
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", newline="") as fh:
+            writer = csv.DictWriter(
+                fh,
+                fieldnames=[
+                    "year_month",
+                    "title_pid_generic",
+                    "segment_pid_generic",
+                    "document_type",
+                    "total_item_requests",
+                    "total_item_investigations",
+                    "unique_item_requests",
+                    "unique_item_investigations",
+                ],
+            )
+            writer.writeheader()
+            for doc in sorted(
+                item_documents,
+                key=lambda item: (
+                    item.get("year_month", ""),
+                    item.get("title_pid_generic") or "",
+                    item.get("segment_pid_generic") or "",
+                ),
+            ):
+                writer.writerow(
+                    {
+                        "year_month": doc.get("year_month", ""),
+                        "title_pid_generic": doc.get("title_pid_generic"),
+                        "segment_pid_generic": doc.get("segment_pid_generic"),
+                        "document_type": doc.get("document_type"),
+                        "total_item_requests": doc.get("total_requests", 0),
+                        "total_item_investigations": doc.get("total_investigations", 0),
+                        "unique_item_requests": doc.get("unique_requests", 0),
+                        "unique_item_investigations": doc.get("unique_investigations", 0),
+                    }
+                )
+
+    @staticmethod
+    def _write_title_csv(path, title_documents):
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", newline="") as fh:
+            writer = csv.DictWriter(
+                fh,
+                fieldnames=[
+                    "year_month",
+                    "title_pid_generic",
+                    "document_type",
+                    "total_item_requests",
+                    "total_item_investigations",
+                    "unique_title_requests",
+                    "unique_title_investigations",
+                ],
+            )
+            writer.writeheader()
+            for doc in sorted(
+                title_documents,
+                key=lambda item: (
+                    item.get("year_month", ""),
+                    item.get("title_pid_generic") or "",
+                ),
+            ):
+                writer.writerow(
+                    {
+                        "year_month": doc.get("year_month", ""),
+                        "title_pid_generic": doc.get("title_pid_generic"),
+                        "document_type": doc.get("document_type"),
+                        "total_item_requests": doc.get("total_requests", 0),
+                        "total_item_investigations": doc.get("total_investigations", 0),
+                        "unique_title_requests": doc.get("unique_requests", 0),
+                        "unique_title_investigations": doc.get("unique_investigations", 0),
+                    }
+                )
diff --git a/metrics/management/commands/schedule_cleanup_daily_payloads.py b/metrics/management/commands/schedule_cleanup_daily_payloads.py
new file mode 100644
index 0000000..285a23f
--- /dev/null
+++ b/metrics/management/commands/schedule_cleanup_daily_payloads.py
@@ -0,0 +1,68 @@
+from django.core.management.base import BaseCommand
+
+from core.utils.scheduler import schedule_task
+from metrics.tasks import task_cleanup_daily_payloads
+
+
+class Command(BaseCommand):
+    help = (
+        "Schedule the periodic cleanup of exported daily metric payload files. "
+        "Runs weekly on Sunday at 03:00 UTC by default, deleting payload files "
+        "for jobs that were exported more than 7 days ago."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--day-of-week",
+            default="0",
+            help="Crontab day of week (0=Sunday, 6=Saturday). Default: 0",
+        )
+        parser.add_argument(
+            "--hour",
+            default="3",
+            help="Crontab hour (0-23). Default: 3",
+        )
+        parser.add_argument(
+            "--minute",
+            default="0",
+            help="Crontab minute (0-59). Default: 0",
+        )
+        parser.add_argument(
+            "--older-than-days",
+            type=int,
+            default=7,
+            help="Only delete payloads exported more than N days ago. Default: 7",
+        )
+        parser.add_argument(
+            "--collection",
+            action="append",
+            dest="collections",
+            help="Limit cleanup to a specific collection acronym. Repeat for multiple.",
+        )
+
+    def handle(self, *args, **options):
+        celery_task_name = task_cleanup_daily_payloads.name
+
+        kwargs = {
+            "older_than_days": options["older_than_days"],
+            "collections": options.get("collections") or [],
+        }
+
+        schedule_task(
+            task=celery_task_name,
+            name=celery_task_name,
+            kwargs=kwargs,
+            description="Weekly cleanup of exported daily payload files from disk.",
+            day_of_week=options["day_of_week"],
+            hour=options["hour"],
+            minute=options["minute"],
+        )
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"Scheduled periodic task '{celery_task_name}' "
+                f"(day_of_week={options['day_of_week']}, hour={options['hour']}, "
+                f"minute={options['minute']}, older_than_days={kwargs['older_than_days']}, "
+                f"collections={kwargs['collections'] or 'all'})."
+            )
+        )
diff --git a/metrics/migrations/0001_initial.py b/metrics/migrations/0001_initial.py
index 30ccc96..9746d5f 100644
--- a/metrics/migrations/0001_initial.py
+++ b/metrics/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-08-30 00:52
+# Generated by Codex on 2026-04-27
 
 import django.db.models.deletion
 from django.conf import settings
@@ -9,13 +9,13 @@ class Migration(migrations.Migration):
     initial = True
 
     dependencies = [
-        ("wagtaildocs", "0013_delete_uploadeddocument"),
+        ("collection", "0001_initial"),
         migrations.swappable_dependency(settings.AUTH_USER_MODEL),
     ]
 
     operations = [
         migrations.CreateModel(
-            name="Top100ArticlesFile",
+            name="DailyMetricJob",
             fields=[
                 (
                     "id",
@@ -28,133 +28,85 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
                 ),
                 (
                     "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "access_date",
+                    models.DateField(db_index=True, verbose_name="Access Date"),
                 ),
                 (
                     "status",
                     models.CharField(
                         choices=[
-                            ("QUE", "Queued"),
-                            ("PAR", "Parsing"),
-                            ("PRO", "Processed"),
-                            ("INV", "Invalidated"),
+                            ("PEN", "Pending"),
+                            ("EXP", "Exporting"),
+                            ("SUC", "Exported"),
+                            ("ERR", "Error"),
                         ],
-                        default="QUE",
-                        max_length=5,
+                        db_index=True,
+                        default="PEN",
+                        max_length=3,
+                        verbose_name="Status",
                     ),
                 ),
                 (
-                    "attachment",
-                    models.ForeignKey(
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="+",
-                        to="wagtaildocs.document",
-                        verbose_name="Attachment",
-                    ),
+                    "input_log_hashes",
+                    models.JSONField(default=list, verbose_name="Input Log Hashes"),
                 ),
                 (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
+                    "storage_path",
+                    models.CharField(
+                        blank=True,
+                        default="",
+                        max_length=500,
+                        verbose_name="Storage Path",
                     ),
                 ),
                 (
-                    "updated_by",
-                    models.ForeignKey(
+                    "payload_hash",
+                    models.CharField(
                         blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
+                        default="",
+                        max_length=64,
+                        verbose_name="Payload Hash",
                     ),
                 ),
-            ],
-            options={
-                "verbose_name": "Top 100 Articles File",
-                "verbose_name_plural": "Top 100 Articles Files",
-            },
-        ),
-        migrations.CreateModel(
-            name="Top100Articles",
-            fields=[
                 (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
+                    "summary",
+                    models.JSONField(blank=True, default=dict, verbose_name="Summary"),
                 ),
                 (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    "attempts",
+                    models.PositiveIntegerField(default=0, verbose_name="Attempts"),
                 ),
                 (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
+                    "error_message",
+                    models.TextField(blank=True, default="", verbose_name="Error Message"),
                 ),
-                ("pid_issn", models.CharField(max_length=9, verbose_name="PID ISSN")),
-                ("year_month_day", models.DateField(verbose_name="Date of access")),
                 (
-                    "print_issn",
-                    models.CharField(
-                        blank=True, max_length=9, null=True, verbose_name="Print ISSN"
+                    "export_started_at",
+                    models.DateTimeField(
+                        blank=True,
+                        null=True,
+                        verbose_name="Export Started At",
                     ),
                 ),
                 (
-                    "online_issn",
-                    models.CharField(
-                        blank=True, max_length=9, null=True, verbose_name="Online ISSN"
-                    ),
+                    "exported_at",
+                    models.DateTimeField(blank=True, null=True, verbose_name="Exported At"),
                 ),
                 (
                     "collection",
-                    models.CharField(max_length=3, verbose_name="Collection Acronym 3"),
-                ),
-                ("pid", models.CharField(verbose_name="Publication ID")),
-                (
-                    "yop",
-                    models.PositiveSmallIntegerField(
-                        verbose_name="Year of Publication"
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
                     ),
                 ),
-                (
-                    "total_item_requests",
-                    models.IntegerField(verbose_name="Total Item Requests"),
-                ),
-                (
-                    "total_item_investigations",
-                    models.IntegerField(verbose_name="Total Item Investigations"),
-                ),
-                (
-                    "unique_item_requests",
-                    models.IntegerField(verbose_name="Unique Item Requests"),
-                ),
-                (
-                    "unique_item_investigations",
-                    models.IntegerField(verbose_name="Unique Item Investigations"),
-                ),
                 (
                     "creator",
                     models.ForeignKey(
@@ -180,18 +132,23 @@ class Migration(migrations.Migration):
                 ),
             ],
             options={
-                "verbose_name_plural": "Top 100 Articles",
-                "indexes": [
-                    models.Index(
-                        fields=["pid_issn"], name="metrics_top_pid_iss_c1fba9_idx"
-                    ),
-                    models.Index(
-                        fields=["year_month_day"], name="metrics_top_year_mo_8cda7b_idx"
-                    ),
-                ],
-                "unique_together": {
-                    ("collection", "pid_issn", "pid", "year_month_day")
-                },
+                "verbose_name": "Daily Metric Job",
+                "verbose_name_plural": "Daily Metric Jobs",
+                "unique_together": {("collection", "access_date")},
             },
         ),
+        migrations.AddIndex(
+            model_name="dailymetricjob",
+            index=models.Index(
+                fields=["collection", "access_date"],
+                name="metrics_daily_coll_date_idx",
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="dailymetricjob",
+            index=models.Index(
+                fields=["status", "export_started_at"],
+                name="metrics_daily_status_exp_idx",
+            ),
+        ),
     ]
diff --git a/metrics/migrations/0002_alter_top100articlesfile_status.py b/metrics/migrations/0002_alter_top100articlesfile_status.py
deleted file mode 100644
index b2b98c5..0000000
--- a/metrics/migrations/0002_alter_top100articlesfile_status.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Generated by Django 5.0.7 on 2024-08-30 21:27
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="top100articlesfile",
-            name="status",
-            field=models.CharField(
-                choices=[
-                    ("QUE", "Queued"),
-                    ("PAR", "Parsing"),
-                    ("PRO", "Processed"),
-                    ("ERR", "Error"),
-                    ("INV", "Invalidated"),
-                ],
-                default="QUE",
-                max_length=5,
-            ),
-        ),
-    ]
diff --git a/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py b/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py
deleted file mode 100644
index 8b01d80..0000000
--- a/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0001_initial"),
-        ("collection", "0001_initial"),
-        ("journal", "0001_initial"),
-        ("metrics", "0002_alter_top100articlesfile_status"),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="attachment",
-        ),
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="creator",
-        ),
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="updated_by",
-        ),
-        migrations.CreateModel(
-            name="Item",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "article",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="article.article",
-                        verbose_name="Article",
-                    ),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "journal",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="journal.journal",
-                        verbose_name="Journal",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Item",
-                "verbose_name_plural": "Items",
-            },
-        ),
-        migrations.CreateModel(
-            name="UserAgent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "name",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="Name"
-                    ),
-                ),
-                (
-                    "version",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="Version"
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "User Agent",
-                "verbose_name_plural": "User Agents",
-                "unique_together": {("name", "version")},
-            },
-        ),
-        migrations.CreateModel(
-            name="UserSession",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                ("datetime", models.DateTimeField(verbose_name="Datetime")),
-                (
-                    "user_ip",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="User IP"
-                    ),
-                ),
-                (
-                    "user_agent",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.useragent",
-                        verbose_name="User Agent",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "User Session",
-                "verbose_name_plural": "User Sessions",
-            },
-        ),
-        migrations.CreateModel(
-            name="ItemAccess",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "country_code",
-                    models.CharField(
-                        db_index=True, max_length=2, verbose_name="Country"
-                    ),
-                ),
-                (
-                    "media_language",
-                    models.CharField(
-                        db_index=True, max_length=2, verbose_name="Media Language"
-                    ),
-                ),
-                (
-                    "media_format",
-                    models.CharField(max_length=10, verbose_name="Media Format"),
-                ),
-                (
-                    "item",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.item",
-                        verbose_name="Item",
-                    ),
-                ),
-                (
-                    "user_session",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.usersession",
-                        verbose_name="User Session",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Item Access",
-                "verbose_name_plural": "Items Access",
-            },
-        ),
-        migrations.DeleteModel(
-            name="Top100Articles",
-        ),
-    ]
diff --git a/metrics/migrations/0004_delete_top100articlesfile_and_more.py b/metrics/migrations/0004_delete_top100articlesfile_and_more.py
deleted file mode 100644
index b10c41b..0000000
--- a/metrics/migrations/0004_delete_top100articlesfile_and_more.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0003_remove_top100articlesfile_attachment_and_more"),
-        ("tracker", "0003_logfilediscardedline_delete_top100articlesfileevent"),
-    ]
-
-    operations = [
-        migrations.DeleteModel(
-            name="Top100ArticlesFile",
-        ),
-        migrations.AddIndex(
-            model_name="item",
-            index=models.Index(
-                fields=["collection", "journal", "article"],
-                name="metrics_ite_collect_6971a5_idx",
-            ),
-        ),
-        migrations.AddIndex(
-            model_name="item",
-            index=models.Index(
-                fields=["collection", "journal"], name="metrics_ite_collect_b5f79b_idx"
-            ),
-        ),
-        migrations.AlterUniqueTogether(
-            name="item",
-            unique_together={("collection", "journal", "article")},
-        ),
-        migrations.AlterUniqueTogether(
-            name="usersession",
-            unique_together={("datetime", "user_agent", "user_ip")},
-        ),
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together={
-                (
-                    "item",
-                    "user_session",
-                    "country_code",
-                    "media_format",
-                    "media_language",
-                )
-            },
-        ),
-    ]
diff --git a/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py b/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py
deleted file mode 100644
index 7bfafff..0000000
--- a/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-27 20:40
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0004_delete_top100articlesfile_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together=set(),
-        ),
-        migrations.AddField(
-            model_name="itemaccess",
-            name="click_timestamps",
-            field=models.JSONField(default=dict, verbose_name="Click Timestamps"),
-        ),
-        migrations.AddField(
-            model_name="itemaccess",
-            name="content_type",
-            field=models.CharField(
-                default="undefined", max_length=16, verbose_name="Content Type"
-            ),
-            preserve_default=False,
-        ),
-        migrations.AlterField(
-            model_name="itemaccess",
-            name="media_format",
-            field=models.CharField(
-                db_index=True, max_length=10, verbose_name="Media Format"
-            ),
-        ),
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together={
-                (
-                    "item",
-                    "user_session",
-                    "country_code",
-                    "media_format",
-                    "media_language",
-                    "content_type",
-                )
-            },
-        ),
-    ]
diff --git a/metrics/migrations/0006_alter_itemaccess_content_type.py b/metrics/migrations/0006_alter_itemaccess_content_type.py
deleted file mode 100644
index 0e81287..0000000
--- a/metrics/migrations/0006_alter_itemaccess_content_type.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-31 21:07
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0005_alter_itemaccess_unique_together_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="itemaccess",
-            name="content_type",
-            field=models.CharField(max_length=32, verbose_name="Content Type"),
-        ),
-    ]
diff --git a/metrics/migrations/0007_alter_usersession_datetime_and_more.py b/metrics/migrations/0007_alter_usersession_datetime_and_more.py
deleted file mode 100644
index e45036e..0000000
--- a/metrics/migrations/0007_alter_usersession_datetime_and_more.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0006_alter_itemaccess_content_type"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="usersession",
-            name="datetime",
-            field=models.DateTimeField(db_index=True, verbose_name="Datetime"),
-        ),
-        migrations.AddIndex(
-            model_name="itemaccess",
-            index=models.Index(
-                fields=["item", "user_session"], name="metrics_ite_item_id_8799c9_idx"
-            ),
-        ),
-    ]
diff --git a/metrics/migrations/0008_remove_a_few_models.py b/metrics/migrations/0008_remove_a_few_models.py
deleted file mode 100644
index dfd14ec..0000000
--- a/metrics/migrations/0008_remove_a_few_models.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-22 17:45
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0007_alter_usersession_datetime_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together=None,
-        ),
-        migrations.AlterUniqueTogether(
-            name="useragent",
-            unique_together=None,
-        ),
-        migrations.AlterUniqueTogether(
-            name="usersession",
-            unique_together=None,
-        ),
-        migrations.RemoveField(
-            model_name="itemaccess",
-            name="user_session",
-        ),
-        migrations.RemoveField(
-            model_name="usersession",
-            name="user_agent",
-        ),
-        migrations.RemoveField(
-            model_name="itemaccess",
-            name="item",
-        ),
-        migrations.DeleteModel(
-            name="Item",
-        ),
-        migrations.DeleteModel(
-            name="ItemAccess",
-        ),
-        migrations.DeleteModel(
-            name="UserAgent",
-        ),
-        migrations.DeleteModel(
-            name="UserSession",
-        ),
-    ]
diff --git a/metrics/models.py b/metrics/models.py
index e69de29..aa789b5 100644
--- a/metrics/models.py
+++ b/metrics/models.py
@@ -0,0 +1,108 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+
+
+class DailyMetricJob(CommonControlField):
+    STATUS_PENDING = "PEN"
+    STATUS_EXPORTING = "EXP"
+    STATUS_EXPORTED = "SUC"
+    STATUS_ERROR = "ERR"
+    STATUS_CHOICES = (
+        (STATUS_PENDING, _("Pending")),
+        (STATUS_EXPORTING, _("Exporting")),
+        (STATUS_EXPORTED, _("Exported")),
+        (STATUS_ERROR, _("Error")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        db_index=True,
+    )
+
+    access_date = models.DateField(
+        verbose_name=_("Access Date"),
+        db_index=True,
+    )
+
+    status = models.CharField(
+        verbose_name=_("Status"),
+        max_length=3,
+        choices=STATUS_CHOICES,
+        default=STATUS_PENDING,
+        db_index=True,
+    )
+
+    input_log_hashes = models.JSONField(
+        verbose_name=_("Input Log Hashes"),
+        default=list,
+    )
+
+    storage_path = models.CharField(
+        verbose_name=_("Storage Path"),
+        max_length=500,
+        blank=True,
+        default="",
+    )
+
+    payload_hash = models.CharField(
+        verbose_name=_("Payload Hash"),
+        max_length=64,
+        blank=True,
+        default="",
+    )
+
+    summary = models.JSONField(
+        verbose_name=_("Summary"),
+        default=dict,
+        blank=True,
+    )
+
+    attempts = models.PositiveIntegerField(
+        verbose_name=_("Attempts"),
+        default=0,
+    )
+
+    error_message = models.TextField(
+        verbose_name=_("Error Message"),
+        blank=True,
+        default="",
+    )
+
+    export_started_at = models.DateTimeField(
+        verbose_name=_("Export Started At"),
+        null=True,
+        blank=True,
+    )
+
+    exported_at = models.DateTimeField(
+        verbose_name=_("Exported At"),
+        null=True,
+        blank=True,
+    )
+
+    @property
+    def input_log_count(self):
+        return len(self.input_log_hashes or [])
+
+    @property
+    def job_id(self):
+        if not self.payload_hash:
+            return ""
+        return f"{self.collection.acron3}|{self.access_date.isoformat()}|{self.payload_hash}"
+
+    class Meta:
+        verbose_name = _("Daily Metric Job")
+        verbose_name_plural = _("Daily Metric Jobs")
+        unique_together = (("collection", "access_date"),)
+        indexes = [
+            models.Index(fields=["collection", "access_date"], name="metrics_daily_coll_date_idx"),
+            models.Index(fields=["status", "export_started_at"], name="metrics_daily_status_exp_idx"),
+        ]
+
+    def __str__(self):
+        return f"{self.collection.acron3}-{self.access_date}"
diff --git a/metrics/opensearch/__init__.py b/metrics/opensearch/__init__.py
new file mode 100644
index 0000000..fb9df20
--- /dev/null
+++ b/metrics/opensearch/__init__.py
@@ -0,0 +1,8 @@
+from .client import OpenSearchUsageClient
+from .mappings import (
+    BOOKS_MONTH_INDEX_MAPPINGS,
+    BOOKS_YEAR_INDEX_MAPPINGS,
+    MONTH_INDEX_MAPPINGS,
+    YEAR_INDEX_MAPPINGS,
+    get_index_mappings,
+)
diff --git a/metrics/opensearch/client.py b/metrics/opensearch/client.py
new file mode 100644
index 0000000..ce0de5c
--- /dev/null
+++ b/metrics/opensearch/client.py
@@ -0,0 +1,225 @@
+import logging
+
+from django.conf import settings
+from opensearchpy import NotFoundError, OpenSearch, helpers
+
+from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
+
+from .mappings import get_index_mappings
+from .scripts import (
+    IDEMPOTENT_JOB_INCREMENT_SCRIPT,
+    METRIC_FIELDS,
+    build_idempotent_job_increment_action,
+    merge_metric_document,
+)
+
+
+class OpenSearchUsageClient:
+    def __init__(self, url=None, basic_auth=None, api_key=None, verify_certs=None):
+        self.client = self.get_opensearch_client(url, basic_auth, api_key, verify_certs)
+
+    def get_opensearch_client(self, url=None, basic_auth=None, api_key=None, verify_certs=None):
+        url = url or getattr(settings, "OPENSEARCH_URL", None)
+        basic_auth = basic_auth or getattr(settings, "OPENSEARCH_BASIC_AUTH", None)
+        api_key = api_key or getattr(settings, "OPENSEARCH_API_KEY", None)
+        if verify_certs is None:
+            verify_certs = getattr(settings, "OPENSEARCH_VERIFY_CERTS", False)
+
+        if basic_auth:
+            return OpenSearch(url, http_auth=tuple(basic_auth), verify_certs=verify_certs)
+        if api_key:
+            return OpenSearch(url, api_key=api_key, verify_certs=verify_certs)
+        return OpenSearch(url, verify_certs=verify_certs)
+
+    def ping(self):
+        try:
+            return self.client.ping()
+        except Exception as exc:
+            logging.error("Error pinging OpenSearch client: %s", exc)
+            return False
+
+    def create_index(self, index_name, mappings, ping_client=False):
+        if ping_client and not self.ping():
+            return
+
+        response = self.client.indices.create(
+            index=index_name,
+            body={
+                "settings": {"index": {"number_of_replicas": 0}},
+                "mappings": mappings,
+            },
+        )
+        logging.info("Index %s created: %s", index_name, response)
+
+    def create_index_if_not_exists(self, index_name, mappings, ping_client=False):
+        if ping_client and not self.ping():
+            return
+
+        if not self.client.indices.exists(index=index_name):
+            self.create_index(index_name=index_name, mappings=mappings, ping_client=False)
+
+    def ensure_usage_indexes(self, collection, access_date, index_prefix=None):
+        index_prefix = index_prefix or getattr(settings, "OPENSEARCH_INDEX_NAME", "usage")
+        year_index = generate_year_index_name(index_prefix, collection, access_date)
+        month_index = generate_month_index_name(index_prefix, collection, access_date)
+
+        self.create_index_if_not_exists(year_index, get_index_mappings(collection, "year"))
+        self.create_index_if_not_exists(month_index, get_index_mappings(collection, "month"))
+
+        return {"year": year_index, "month": month_index}
+
+    def delete_index(self, index_name, ping_client=False):
+        if ping_client and not self.ping():
+            return
+        self.client.indices.delete(index=index_name)
+
+    def index_documents(self, index_name, documents, ping_client=False):
+        if ping_client and not self.ping():
+            return
+
+        if not documents:
+            return
+
+        helpers.bulk(
+            self.client,
+            (
+                {"_index": index_name, "_id": doc_id, "_source": document}
+                for doc_id, document in documents.items()
+            ),
+        )
+
+    def increment_documents_for_daily_job(
+        self,
+        index_name,
+        documents,
+        job_id,
+        ping_client=False,
+    ):
+        if ping_client and not self.ping():
+            return
+
+        if not documents:
+            return
+
+        helpers.bulk(
+            self.client,
+            (
+                build_idempotent_job_increment_action(
+                    index_name=index_name,
+                    doc_id=doc_id,
+                    document=document,
+                    job_id=job_id,
+                )
+                for doc_id, document in documents.items()
+            ),
+        )
+
+    def delete_documents(self, index_name, doc_ids, ping_client=False):
+        if ping_client and not self.ping():
+            return
+
+        if not doc_ids:
+            return
+
+        helpers.bulk(
+            self.client,
+            (
+                {"_op_type": "delete", "_index": index_name, "_id": doc_id}
+                for doc_id in doc_ids
+            ),
+        )
+
+    def delete_documents_by_key(self, index_name, data, ping_client=False):
+        if ping_client and not self.ping():
+            return False
+
+        query = {
+            "query": {
+                "bool": {
+                    "must": [
+                        {
+                            "terms": {
+                                key: values if isinstance(values, list) else [values],
+                            }
+                        }
+                        for key, values in data.items()
+                    ]
+                }
+            }
+        }
+
+        try:
+            self.client.delete_by_query(index=index_name, body=query)
+            return True
+        except Exception as exc:
+            logging.error("Failed to delete documents from %s: %s", index_name, exc)
+            return False
+
+    def fetch_documents_by_ids(self, index_name, doc_ids, ping_client=False):
+        if ping_client and not self.ping():
+            return {}
+
+        if not doc_ids:
+            return {}
+
+        try:
+            response = self.client.mget(index=index_name, body={"ids": doc_ids})
+        except NotFoundError:
+            return {}
+
+        documents = {}
+        for document in response.get("docs", []):
+            if document.get("found"):
+                documents[document["_id"]] = document["_source"]
+        return documents
+
+    def fetch_documents_by_key(self, index_name, data, ping_client=False):
+        if ping_client and not self.ping():
+            return {}
+
+        query = {
+            "query": {
+                "bool": {
+                    "must": [
+                        {
+                            "terms": {
+                                key: values if isinstance(values, list) else [values],
+                            }
+                        }
+                        for key, values in data.items()
+                    ]
+                }
+            }
+        }
+
+        try:
+            return {
+                hit["_id"]: hit["_source"]
+                for hit in helpers.scan(self.client, index=index_name, query=query)
+            }
+        except NotFoundError:
+            return {}
+
+    def sync_documents(self, index_name, documents, operation="add", ping_client=False):
+        if ping_client and not self.ping():
+            return
+
+        if not documents:
+            return
+
+        existing_documents = self.fetch_documents_by_ids(index_name=index_name, doc_ids=list(documents.keys()))
+        upserts = {}
+        deletes = []
+
+        for doc_id, document in documents.items():
+            merged = merge_metric_document(existing_documents.get(doc_id), document, operation=operation)
+            if merged is None:
+                if doc_id in existing_documents:
+                    deletes.append(doc_id)
+                continue
+            upserts[doc_id] = merged
+
+        if upserts:
+            self.index_documents(index_name=index_name, documents=upserts)
+        if deletes:
+            self.delete_documents(index_name=index_name, doc_ids=deletes)
diff --git a/metrics/opensearch/mappings.py b/metrics/opensearch/mappings.py
new file mode 100644
index 0000000..5825c1b
--- /dev/null
+++ b/metrics/opensearch/mappings.py
@@ -0,0 +1,177 @@
+YEAR_INDEX_MAPPINGS = {
+    "properties": {
+        "collection": {"type": "keyword"},
+        "source": {
+            "properties": {
+                "source_type": {"type": "keyword"},
+                "source_id": {"type": "keyword"},
+                "scielo_issn": {"type": "keyword"},
+                "main_title": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 512
+                        }
+                    }
+                },
+                "subject_area_capes": {"type": "keyword"},
+                "subject_area_wos": {"type": "keyword"},
+                "acronym": {"type": "keyword"},
+                "publisher_name": {"type": "keyword"},
+                "access_type": {"type": "keyword"},
+                "city": {"type": "keyword"},
+                "country": {"type": "keyword"},
+                "identifiers": {"type": "object"},
+            }
+        },
+        "document_type": {"type": "keyword"},
+        "scielo_document_type": {"type": "keyword"},
+        "metric_scope": {"type": "keyword"},
+        "counter_data_type": {"type": "keyword"},
+        "parent_data_type": {"type": "keyword"},
+        "article_version": {"type": "keyword"},
+        "pid": {"type": "keyword"},
+        "pid_v2": {"type": "keyword"},
+        "pid_v3": {"type": "keyword"},
+        "pid_generic": {"type": "keyword"},
+        "publication_year": {"type": "integer"},
+        "counter_access_type": {"type": "keyword"},
+        "access_method": {"type": "keyword"},
+        "access_year": {"type": "date", "format": "yyyy"},
+        "access_country_code": {"type": "keyword"},
+        "content_language": {"type": "keyword"},
+        "applied_jobs": {"type": "keyword", "index": False},
+        "total_requests": {"type": "integer"},
+        "total_investigations": {"type": "integer"},
+        "unique_requests": {"type": "integer"},
+        "unique_investigations": {"type": "integer"},
+    }
+}
+
+
+MONTH_INDEX_MAPPINGS = {
+    "properties": {
+        "collection": {"type": "keyword"},
+        "source": YEAR_INDEX_MAPPINGS["properties"]["source"],
+        "document_type": {"type": "keyword"},
+        "scielo_document_type": {"type": "keyword"},
+        "metric_scope": {"type": "keyword"},
+        "counter_data_type": {"type": "keyword"},
+        "parent_data_type": {"type": "keyword"},
+        "article_version": {"type": "keyword"},
+        "pid": {"type": "keyword"},
+        "pid_v2": {"type": "keyword"},
+        "pid_v3": {"type": "keyword"},
+        "pid_generic": {"type": "keyword"},
+        "publication_year": {"type": "integer"},
+        "counter_access_type": {"type": "keyword"},
+        "access_method": {"type": "keyword"},
+        "access_month": {"type": "date", "format": "yyyy-MM"},
+        "applied_jobs": {"type": "keyword", "index": False},
+        "daily_metrics": {"type": "object", "dynamic": True},
+        "total_requests": {"type": "integer"},
+        "total_investigations": {"type": "integer"},
+        "unique_requests": {"type": "integer"},
+        "unique_investigations": {"type": "integer"},
+    }
+}
+
+
+BOOKS_YEAR_INDEX_MAPPINGS = {
+    "properties": {
+        "collection": {"type": "keyword"},
+        "source": {
+            "properties": {
+                "source_type": {"type": "keyword"},
+                "source_id": {"type": "keyword"},
+                "main_title": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 512
+                        }
+                    }
+                },
+                "access_type": {"type": "keyword"},
+                "publisher": {"type": "keyword"},
+                "city": {"type": "keyword"},
+                "country": {"type": "keyword"},
+                "identifiers": {
+                    "properties": {
+                        "book_id": {"type": "keyword"},
+                        "isbn": {"type": "keyword"},
+                        "eisbn": {"type": "keyword"},
+                        "doi": {"type": "keyword"},
+                    }
+                },
+            }
+        },
+        "document_type": {"type": "keyword"},
+        "scielo_document_type": {"type": "keyword"},
+        "metric_scope": {"type": "keyword"},
+        "counter_data_type": {"type": "keyword"},
+        "parent_data_type": {"type": "keyword"},
+        "article_version": {"type": "keyword"},
+        "pid": {"type": "keyword"},
+        "pid_generic": {"type": "keyword"},
+        "title_pid_generic": {"type": "keyword"},
+        "publication_year": {"type": "integer"},
+        "counter_access_type": {"type": "keyword"},
+        "access_method": {"type": "keyword"},
+        "access_year": {"type": "date", "format": "yyyy"},
+        "access_country_code": {"type": "keyword"},
+        "content_language": {"type": "keyword"},
+        "applied_jobs": {"type": "keyword", "index": False},
+        "total_requests": {"type": "integer"},
+        "total_investigations": {"type": "integer"},
+        "unique_requests": {"type": "integer"},
+        "unique_investigations": {"type": "integer"},
+    }
+}
+
+
+BOOKS_MONTH_INDEX_MAPPINGS = {
+    "properties": {
+        "collection": {"type": "keyword"},
+        "source": BOOKS_YEAR_INDEX_MAPPINGS["properties"]["source"],
+        "document_type": {"type": "keyword"},
+        "scielo_document_type": {"type": "keyword"},
+        "metric_scope": {"type": "keyword"},
+        "counter_data_type": {"type": "keyword"},
+        "parent_data_type": {"type": "keyword"},
+        "article_version": {"type": "keyword"},
+        "pid": {"type": "keyword"},
+        "pid_generic": {"type": "keyword"},
+        "title_pid_generic": {"type": "keyword"},
+        "publication_year": {"type": "integer"},
+        "counter_access_type": {"type": "keyword"},
+        "access_method": {"type": "keyword"},
+        "access_month": {"type": "date", "format": "yyyy-MM"},
+        "applied_jobs": {"type": "keyword", "index": False},
+        "daily_metrics": {"type": "object", "dynamic": True},
+        "total_requests": {"type": "integer"},
+        "total_investigations": {"type": "integer"},
+        "unique_requests": {"type": "integer"},
+        "unique_investigations": {"type": "integer"},
+    }
+}
+
+
+METRIC_FIELDS = (
+    "total_requests",
+    "total_investigations",
+    "unique_requests",
+    "unique_investigations",
+)
+
+
+def get_index_mappings(collection, granularity):
+    if granularity not in {"month", "year"}:
+        raise ValueError("Granularity must be 'month' or 'year'.")
+
+    if collection == "books":
+        return BOOKS_MONTH_INDEX_MAPPINGS if granularity == "month" else BOOKS_YEAR_INDEX_MAPPINGS
+
+    return MONTH_INDEX_MAPPINGS if granularity == "month" else YEAR_INDEX_MAPPINGS
diff --git a/metrics/opensearch/names.py b/metrics/opensearch/names.py
new file mode 100644
index 0000000..1ecd493
--- /dev/null
+++ b/metrics/opensearch/names.py
@@ -0,0 +1,41 @@
+from django.conf import settings
+
+
+def _validate_index_inputs(index_prefix: str, collection: str, date: str):
+    if not date or not isinstance(date, str):
+        raise ValueError("Date must be a non-empty string in 'YYYY-MM-DD' format.")
+    if not collection or not isinstance(collection, str):
+        raise ValueError("Collection must be a non-empty string.")
+    if not index_prefix or not isinstance(index_prefix, str):
+        raise ValueError("Index prefix must be a non-empty string.")
+
+
+def _get_collection_size(collection: str) -> str:
+    return getattr(settings, "COLLECTION_ACRON3_SIZE_MAP", {}).get(collection, "small")
+
+
+def extract_access_year(date: str) -> str:
+    _validate_index_inputs("usage", "tmp", date)
+    return date.split("-")[0]
+
+
+def extract_access_month(date: str) -> str:
+    _validate_index_inputs("usage", "tmp", date)
+    year, month, _ = date.split("-")
+    return f"{year}{month}"
+
+
+def generate_month_index_name(index_prefix: str, collection: str, date: str) -> str:
+    _validate_index_inputs(index_prefix, collection, date)
+    size = _get_collection_size(collection)
+    if size in ("xlarge", "large"):
+        return f"{index_prefix}_monthly_{collection}_{extract_access_year(date)}"
+    return f"{index_prefix}_monthly_{collection}"
+
+
+def generate_year_index_name(index_prefix: str, collection: str, date: str) -> str:
+    _validate_index_inputs(index_prefix, collection, date)
+    size = _get_collection_size(collection)
+    if size in ("xlarge", "large"):
+        return f"{index_prefix}_yearly_{collection}_{extract_access_year(date)}"
+    return f"{index_prefix}_yearly_{collection}"
diff --git a/metrics/opensearch/scripts.py b/metrics/opensearch/scripts.py
new file mode 100644
index 0000000..a6a5e1c
--- /dev/null
+++ b/metrics/opensearch/scripts.py
@@ -0,0 +1,102 @@
+METRIC_FIELDS = (
+    "total_requests",
+    "total_investigations",
+    "unique_requests",
+    "unique_investigations",
+)
+
+IDEMPOTENT_JOB_INCREMENT_SCRIPT = """
+if (ctx._source.applied_jobs == null) {
+  ctx._source.applied_jobs = [];
+}
+if (ctx._source.applied_jobs.contains(params.job_id)) {
+  ctx.op = 'none';
+  return;
+}
+for (entry in params.document.entrySet()) {
+  if (!params.metric_fields.contains(entry.getKey()) && !'applied_jobs'.equals(entry.getKey()) && !'daily_metrics'.equals(entry.getKey())) {
+    if (!ctx._source.containsKey(entry.getKey()) || ctx._source[entry.getKey()] != entry.getValue()) {
+      ctx._source[entry.getKey()] = entry.getValue();
+    }
+  }
+}
+for (field in params.metric_fields) {
+  def currentValue = ctx._source.containsKey(field) ? ctx._source[field] : 0;
+  def increment = params.document.containsKey(field) ? params.document[field] : 0;
+  ctx._source[field] = currentValue + increment;
+}
+if (params.document.containsKey('daily_metrics')) {
+  if (!ctx._source.containsKey('daily_metrics') || ctx._source.daily_metrics == null) {
+    ctx._source.daily_metrics = new HashMap();
+  }
+  for (dayEntry in params.document.daily_metrics.entrySet()) {
+    def day = dayEntry.getKey();
+    def dayMetrics = dayEntry.getValue();
+    if (!ctx._source.daily_metrics.containsKey(day) || ctx._source.daily_metrics[day] == null) {
+      ctx._source.daily_metrics[day] = new HashMap();
+    }
+    for (metric in params.metric_fields) {
+      def currentValue = ctx._source.daily_metrics[day].containsKey(metric) ? ctx._source.daily_metrics[day][metric] : 0;
+      def increment = dayMetrics.containsKey(metric) ? dayMetrics[metric] : 0;
+      ctx._source.daily_metrics[day][metric] = currentValue + increment;
+    }
+  }
+}
+ctx._source.applied_jobs.add(params.job_id);
+"""
+
+
+def build_idempotent_job_increment_action(index_name, doc_id, document, job_id):
+    return {
+        "_op_type": "update",
+        "_index": index_name,
+        "_id": doc_id,
+        "retry_on_conflict": 5,
+        "scripted_upsert": True,
+        "script": {
+            "lang": "painless",
+            "source": IDEMPOTENT_JOB_INCREMENT_SCRIPT,
+            "params": {
+                "document": document,
+                "job_id": job_id,
+                "metric_fields": list(METRIC_FIELDS),
+            },
+        },
+        "upsert": {
+            "applied_jobs": [],
+        },
+    }
+
+
+def merge_metric_document(existing, current, operation="add"):
+    if existing is None:
+        if operation == "subtract":
+            return None
+        return current
+
+    merged = dict(existing)
+    merged.update(
+        {
+            key: value
+            for key, value in current.items()
+            if key not in METRIC_FIELDS and key != "daily_metrics"
+        }
+    )
+
+    signal = -1 if operation == "subtract" else 1
+    for field in METRIC_FIELDS:
+        merged[field] = existing.get(field, 0) + signal * current.get(field, 0)
+
+    if "daily_metrics" in current:
+        merged_daily = dict(existing.get("daily_metrics") or {})
+        for day, metrics in current["daily_metrics"].items():
+            day_merged = dict(merged_daily.get(day) or {})
+            for field in METRIC_FIELDS:
+                day_merged[field] = day_merged.get(field, 0) + signal * metrics.get(field, 0)
+            merged_daily[day] = day_merged
+        merged["daily_metrics"] = merged_daily
+
+    if all(merged.get(field, 0) <= 0 for field in METRIC_FIELDS):
+        return None
+
+    return merged
diff --git a/metrics/services/__init__.py b/metrics/services/__init__.py
new file mode 100644
index 0000000..b305681
--- /dev/null
+++ b/metrics/services/__init__.py
@@ -0,0 +1,26 @@
+from .jobs import (
+    acquire_daily_metric_job,
+    create_or_update_daily_metric_job,
+    mark_daily_metric_job_exported,
+    mark_daily_metric_job_failed,
+    release_stale_daily_metric_jobs,
+)
+from .resources import (
+    build_search_client,
+    extract_celery_queue_name,
+    fetch_required_resources,
+    get_log_files_for_collection_date,
+)
+from .parser import (
+    is_stale_parsing_log,
+    process_daily_metric_job,
+    process_line,
+    requeue_stale_parsing_log,
+    setup_parsing_environment,
+    touch_parse_heartbeat,
+)
+from .export import (
+    export_daily_metric_payload,
+    export_documents,
+    load_daily_metric_payload,
+)
diff --git a/metrics/services/daily_payloads.py b/metrics/services/daily_payloads.py
new file mode 100644
index 0000000..0e06af9
--- /dev/null
+++ b/metrics/services/daily_payloads.py
@@ -0,0 +1,127 @@
+import hashlib
+import json
+import logging
+import os
+from datetime import timedelta
+from pathlib import Path
+
+from django.conf import settings
+from django.utils import timezone
+
+
+def get_daily_payload_root():
+    return Path(settings.MEDIA_ROOT) / "metrics" / "daily_payloads"
+
+
+def build_daily_storage_path(collection, access_date):
+    return (
+        Path(collection.acron3)
+        / access_date.strftime("%Y")
+        / access_date.strftime("%m")
+        / f"{access_date.isoformat()}.json"
+    )
+
+
+def resolve_storage_path(storage_path):
+    return get_daily_payload_root() / storage_path
+
+
+def serialize_payload(payload):
+    return json.dumps(
+        payload,
+        ensure_ascii=True,
+        sort_keys=True,
+        separators=(",", ":"),
+    )
+
+
+def write_payload(storage_path, payload):
+    resolved_path = resolve_storage_path(storage_path)
+    resolved_path.parent.mkdir(parents=True, exist_ok=True)
+
+    payload_json = serialize_payload(payload)
+    payload_hash = hashlib.sha256(payload_json.encode("utf-8")).hexdigest()
+
+    tmp_path = resolved_path.with_suffix(f"{resolved_path.suffix}.tmp")
+    tmp_path.write_text(payload_json, encoding="utf-8")
+    tmp_path.replace(resolved_path)
+
+    return payload_hash
+
+
+def read_payload(storage_path):
+    resolved_path = resolve_storage_path(storage_path)
+    return json.loads(resolved_path.read_text(encoding="utf-8"))
+
+
+def delete_payload(storage_path):
+    resolved_path = resolve_storage_path(storage_path)
+    if resolved_path.exists():
+        resolved_path.unlink()
+
+
+def cleanup_exported_payloads(collections=None, older_than_days=7):
+    from metrics.models import DailyMetricJob
+
+    root = get_daily_payload_root()
+    if not root.exists():
+        return 0
+
+    cutoff = timezone.now() - timedelta(days=older_than_days) if older_than_days and older_than_days > 0 else None
+
+    storage_path_to_job = {}
+    db_queryset = DailyMetricJob.objects.exclude(storage_path="")
+    if collections:
+        db_queryset = db_queryset.filter(collection__acron3__in=collections)
+    for job in db_queryset.iterator(chunk_size=500):
+        storage_path_to_job[job.storage_path] = job
+
+    json_files = root.rglob("*.json")
+    if collections:
+        json_files = [p for p in json_files if p.relative_to(root).parts[0] in collections]
+
+    deleted_count = 0
+    for file_path in json_files:
+        if cutoff and _file_is_recent(file_path, cutoff):
+            continue
+
+        storage_path = file_path.relative_to(root).as_posix()
+        job = storage_path_to_job.get(storage_path)
+
+        if job is not None and job.status != DailyMetricJob.STATUS_EXPORTED:
+            continue
+
+        try:
+            file_path.unlink()
+        except FileNotFoundError:
+            pass
+        deleted_count += 1
+
+        if job is not None:
+            job.storage_path = ""
+            job.payload_hash = ""
+            job.save(update_fields=["storage_path", "payload_hash", "updated"])
+
+    _cleanup_empty_dirs(root)
+
+    logging.info(
+        "Cleaned up %s daily payload files (collections=%s, older_than_days=%s).",
+        deleted_count,
+        collections or "all",
+        older_than_days,
+    )
+    return deleted_count
+
+
+def _file_is_recent(file_path, cutoff):
+    return file_path.stat().st_mtime >= cutoff.timestamp()
+
+
+def _cleanup_empty_dirs(root):
+    for dirpath, dirnames, filenames in os.walk(root, topdown=False):
+        if dirpath == str(root):
+            continue
+        try:
+            os.rmdir(dirpath)
+        except OSError:
+            pass
diff --git a/metrics/services/export.py b/metrics/services/export.py
new file mode 100644
index 0000000..03efbc6
--- /dev/null
+++ b/metrics/services/export.py
@@ -0,0 +1,94 @@
+import logging
+
+from django.conf import settings
+
+from metrics import opensearch
+from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
+
+from . import daily_payloads
+
+
+def load_daily_metric_payload(job):
+    if not job.storage_path:
+        return None
+    try:
+        return daily_payloads.read_payload(job.storage_path)
+    except FileNotFoundError:
+        logging.warning("Daily metric payload not found for job %s.", job.pk)
+        return None
+
+
+def export_daily_metric_payload(search_client, job, payload):
+    if not job.job_id:
+        raise RuntimeError("Daily metric job has no payload hash.")
+
+    export_documents(
+        search_client=search_client,
+        documents=payload.get("documents") or {},
+        collection=payload.get("collection") or job.collection.acron3,
+        job_id=job.job_id,
+    )
+
+
+def export_documents(search_client, documents, collection, job_id):
+    if not documents:
+        return
+
+    _sync_documents_group(
+        search_client=search_client,
+        collection=collection,
+        documents=documents.get("month", {}),
+        granularity="month",
+        job_id=job_id,
+    )
+    _sync_documents_group(
+        search_client=search_client,
+        collection=collection,
+        documents=documents.get("year", {}),
+        granularity="year",
+        job_id=job_id,
+    )
+
+
+def _sync_documents_group(
+    search_client,
+    collection,
+    documents,
+    granularity,
+    job_id,
+):
+    if not documents:
+        return
+
+    grouped_documents = {}
+    index_prefix = settings.OPENSEARCH_INDEX_NAME
+
+    for doc_id, document in documents.items():
+        if granularity == "month":
+            index_name = generate_month_index_name(
+                index_prefix=index_prefix,
+                collection=collection,
+                date=f"{document.get('access_month')}-01",
+            )
+            mappings = opensearch.get_index_mappings(collection, "month")
+        else:
+            index_name = generate_year_index_name(
+                index_prefix=index_prefix,
+                collection=collection,
+                date=f"{document.get('access_year')}-01-01",
+            )
+            mappings = opensearch.get_index_mappings(collection, "year")
+
+        grouped_documents.setdefault(index_name, {"mappings": mappings, "documents": {}})
+        grouped_documents[index_name]["documents"][doc_id] = document
+
+    for index_name, payload in grouped_documents.items():
+        search_client.create_index_if_not_exists(
+            index_name=index_name,
+            mappings=payload["mappings"],
+        )
+        search_client.increment_documents_for_daily_job(
+            index_name=index_name,
+            documents=payload["documents"],
+            job_id=job_id,
+        )
diff --git a/metrics/services/jobs.py b/metrics/services/jobs.py
new file mode 100644
index 0000000..78f5100
--- /dev/null
+++ b/metrics/services/jobs.py
@@ -0,0 +1,153 @@
+import logging
+from datetime import timedelta
+
+from django.db import transaction
+from django.utils import timezone
+
+from log_manager import choices
+from log_manager.models import LogFile
+
+from metrics.models import DailyMetricJob
+
+
+def create_or_update_daily_metric_job(collection, access_date, log_files):
+    input_log_hashes = sorted(log_file.hash for log_file in log_files if log_file.hash)
+    with transaction.atomic():
+        job, _ = DailyMetricJob.objects.select_for_update().get_or_create(
+            collection=collection,
+            access_date=access_date,
+        )
+
+        if job.status == DailyMetricJob.STATUS_EXPORTED:
+            if job.input_log_hashes != input_log_hashes:
+                raise RuntimeError(
+                    f"Daily metric job already exported for {collection.acron3} {access_date}. "
+                    "Recompute requires deleting/recreating the affected day or period first."
+                )
+            LogFile.objects.filter(hash__in=input_log_hashes).update(
+                status=choices.LOG_FILE_STATUS_PROCESSED,
+                parse_heartbeat_at=None,
+                updated=timezone.now(),
+            )
+            return job
+
+        keep_payload = (
+            job.status == DailyMetricJob.STATUS_ERROR
+            and job.input_log_hashes == input_log_hashes
+            and job.storage_path
+            and job.payload_hash
+        )
+
+        job.input_log_hashes = input_log_hashes
+        job.status = DailyMetricJob.STATUS_PENDING
+        job.error_message = ""
+        job.export_started_at = None
+        job.exported_at = None
+        if not keep_payload:
+            job.storage_path = ""
+            job.payload_hash = ""
+            job.summary = {}
+        job.save(
+            update_fields=[
+                "input_log_hashes",
+                "status",
+                "error_message",
+                "export_started_at",
+                "exported_at",
+                "storage_path",
+                "payload_hash",
+                "summary",
+                "updated",
+            ]
+        )
+    return job
+
+
+def acquire_daily_metric_job(job_id):
+    with transaction.atomic():
+        job = (
+            DailyMetricJob.objects.select_for_update()
+            .select_related("collection")
+            .get(pk=job_id)
+        )
+        if job.status in {
+            DailyMetricJob.STATUS_EXPORTING,
+            DailyMetricJob.STATUS_EXPORTED,
+        }:
+            logging.info("Daily metric job %s is already in final/active state.", job_id)
+            return None
+
+        job.status = DailyMetricJob.STATUS_EXPORTING
+        job.attempts += 1
+        job.error_message = ""
+        job.export_started_at = timezone.now()
+        job.save(
+            update_fields=[
+                "status",
+                "attempts",
+                "error_message",
+                "export_started_at",
+                "updated",
+            ]
+        )
+    return job
+
+
+def mark_daily_metric_job_failed(job, error_message):
+    DailyMetricJob.objects.filter(pk=job.pk).update(
+        status=DailyMetricJob.STATUS_ERROR,
+        error_message=str(error_message),
+        updated=timezone.now(),
+    )
+    LogFile.objects.filter(hash__in=job.input_log_hashes).update(
+        status=choices.LOG_FILE_STATUS_ERROR,
+        parse_heartbeat_at=None,
+        updated=timezone.now(),
+    )
+
+
+def mark_daily_metric_job_exported(job, user=None):
+    DailyMetricJob.objects.filter(pk=job.pk).update(
+        status=DailyMetricJob.STATUS_EXPORTED,
+        error_message="",
+        exported_at=timezone.now(),
+        updated=timezone.now(),
+    )
+    LogFile.objects.filter(hash__in=job.input_log_hashes).update(
+        status=choices.LOG_FILE_STATUS_PROCESSED,
+        parse_heartbeat_at=None,
+        updated=timezone.now(),
+    )
+
+
+def release_stale_daily_metric_jobs(collections=None, from_date=None, until_date=None, stale_after_minutes=60):
+    cutoff = timezone.now() - timedelta(minutes=stale_after_minutes)
+    queryset = DailyMetricJob.objects.filter(
+        status=DailyMetricJob.STATUS_EXPORTING,
+        export_started_at__lt=cutoff,
+    )
+    if collections:
+        queryset = queryset.filter(collection__acron3__in=collections)
+    if from_date:
+        queryset = queryset.filter(access_date__gte=from_date)
+    if until_date:
+        queryset = queryset.filter(access_date__lte=until_date)
+
+    stale_jobs = list(queryset.only("pk", "input_log_hashes"))
+    released = queryset.update(
+        status=DailyMetricJob.STATUS_ERROR,
+        error_message="Job marked for retry after stale exporting state.",
+        updated=timezone.now(),
+    )
+    stale_hashes = {
+        log_hash
+        for job in stale_jobs
+        for log_hash in (job.input_log_hashes or [])
+    }
+    if stale_hashes:
+        LogFile.objects.filter(hash__in=stale_hashes).update(
+            status=choices.LOG_FILE_STATUS_ERROR,
+            parse_heartbeat_at=None,
+            updated=timezone.now(),
+        )
+    return released
diff --git a/metrics/services/parser.py b/metrics/services/parser.py
new file mode 100644
index 0000000..5eb3dbf
--- /dev/null
+++ b/metrics/services/parser.py
@@ -0,0 +1,249 @@
+import logging
+from datetime import timedelta
+from time import monotonic
+
+from django.conf import settings
+from django.utils import timezone
+
+from scielo_usage_counter import log_handler, url_translator
+
+from log_manager import choices
+from log_manager.models import LogFile
+from log_manager_config.models import CollectionLogDirectory
+from source.models import Source
+from document.models import Document
+from tracker.choices import (
+    LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT,
+    LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE,
+)
+from tracker.models import LogFileDiscardedLine
+
+from metrics.counter import access, documents as index_docs
+from metrics.counter import parser
+
+from .resources import get_log_files_for_collection_date
+from . import daily_payloads
+
+
+def process_daily_metric_job(job, robots_list, mmdb, track_errors=False):
+    log_files = get_log_files_for_collection_date(
+        collection=job.collection,
+        access_date=job.access_date,
+    )
+    if not log_files:
+        raise RuntimeError(f"No log files found for {job.collection.acron3} {job.access_date}.")
+
+    results = {}
+    summary = {
+        "log_files": len(log_files),
+        "input_log_hashes": sorted(log_file.hash for log_file in log_files if log_file.hash),
+        "lines_parsed": 0,
+        "valid_lines": 0,
+        "discarded_lines": 0,
+    }
+
+    LogFile.objects.filter(pk__in=[log_file.pk for log_file in log_files]).update(
+        status=choices.LOG_FILE_STATUS_PARSING,
+        summary={},
+        last_processed_line=0,
+        parse_heartbeat_at=timezone.now(),
+        updated=timezone.now(),
+    )
+    LogFileDiscardedLine.objects.filter(log_file_id__in=[log_file.pk for log_file in log_files]).delete()
+
+    heartbeat_interval_seconds = getattr(settings, "METRICS_PARSE_HEARTBEAT_INTERVAL_SECONDS", 30)
+
+    for log_file in log_files:
+        log_parser, url_translator_manager = setup_parsing_environment(
+            log_file=log_file,
+            robots_list=robots_list,
+            mmdb=mmdb,
+        )
+        line_count = 0
+        valid_count = 0
+        errors = []
+        last_heartbeat_monotonic = monotonic()
+
+        for line in log_parser.parse():
+            line_count += 1
+            if monotonic() - last_heartbeat_monotonic >= heartbeat_interval_seconds:
+                touch_parse_heartbeat(log_file, log_parser.stats.lines_parsed)
+                last_heartbeat_monotonic = monotonic()
+
+            is_valid_line, error_obj = process_line(
+                results=results,
+                line=line,
+                utm=url_translator_manager,
+                log_file=log_file,
+                track_errors=track_errors,
+            )
+            if is_valid_line:
+                valid_count += 1
+            else:
+                summary["discarded_lines"] += 1
+                if error_obj:
+                    errors.append(error_obj)
+
+        if errors:
+            LogFileDiscardedLine.objects.bulk_create(errors)
+
+        summary["lines_parsed"] += line_count
+        summary["valid_lines"] += valid_count
+        log_file.summary = {
+            "parsing_completed": True,
+            "lines_parsed": line_count,
+            "valid_lines": valid_count,
+        }
+        log_file.last_processed_line = log_parser.stats.lines_parsed
+        log_file.parse_heartbeat_at = timezone.now()
+        log_file.save(
+            update_fields=[
+                "summary",
+                "last_processed_line",
+                "parse_heartbeat_at",
+                "updated",
+            ]
+        )
+
+    documents = index_docs.convert_raw_results_to_index_documents(results)
+    storage_path = daily_payloads.build_daily_storage_path(job.collection, job.access_date)
+    payload = {
+        "collection": job.collection.acron3,
+        "access_date": job.access_date.isoformat(),
+        "input_log_hashes": summary["input_log_hashes"],
+        "documents": documents,
+        "summary": summary,
+    }
+    payload_hash = daily_payloads.write_payload(storage_path, payload)
+
+    job.input_log_hashes = summary["input_log_hashes"]
+    job.storage_path = storage_path.as_posix()
+    job.payload_hash = payload_hash
+    job.summary = {
+        **summary,
+        "month_document_count": len(documents.get("month", {})),
+        "year_document_count": len(documents.get("year", {})),
+    }
+    job.save(
+        update_fields=[
+            "input_log_hashes",
+            "storage_path",
+            "payload_hash",
+            "summary",
+            "updated",
+        ]
+    )
+
+    return payload
+
+
+def setup_parsing_environment(log_file, robots_list, mmdb):
+    lp = log_handler.LogParser(mmdb_data=mmdb.data, robots_list=robots_list, output_mode="dict")
+    lp.logfile = log_file.path
+
+    translator_class = None
+    for cld in CollectionLogDirectory.objects.filter(config__collection=log_file.collection):
+        if cld.path in log_file.path:
+            if cld.translator_class:
+                translator_class = parser.translator_class_name_to_obj(cld.translator_class)
+                break
+
+    if not translator_class:
+        raise Exception(f"No URL translator class found for collection {log_file.collection}.")
+
+    utm = url_translator.URLTranslationManager(
+        documents_metadata=Document.metadata(collection=log_file.collection),
+        sources_metadata=Source.metadata(collection=log_file.collection),
+        translator=translator_class,
+    )
+    return lp, utm
+
+
+def process_line(results, line, utm, log_file, track_errors=False):
+    try:
+        translated_url = utm.translate(line.get("url"))
+    except Exception as exc:
+        logging.error("Error translating URL %s: %s", line.get("url"), exc)
+        return False, None
+
+    try:
+        item_access_data = access.extract_item_access_data(log_file.collection.acron3, translated_url)
+    except Exception as exc:
+        logging.error("Error extracting item access data from URL %s: %s", line.get("url"), exc)
+        return False, None
+
+    ignore_utm_validation = not track_errors
+    is_valid, check_result = access.is_valid_item_access_data(
+        item_access_data,
+        utm,
+        ignore_utm_validation,
+    )
+
+    if not is_valid:
+        if track_errors:
+            error_code = check_result.get("code")
+            if error_code in {
+                "invalid_scielo_issn",
+                "invalid_source_id",
+                "invalid_pid_v3",
+                "invalid_pid_v2",
+                "invalid_pid_generic",
+            }:
+                tracker_error_type = (
+                    LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT
+                    if "pid" in error_code
+                    else LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE
+                )
+
+                return False, LogFileDiscardedLine.create(
+                    log_file=log_file,
+                    error_type=tracker_error_type,
+                    message=check_result.get("message"),
+                    data={"line": line, "item_access_data": item_access_data},
+                    save=False,
+                )
+
+        return False, None
+
+    try:
+        access.update_results_with_item_access_data(results, item_access_data, line)
+    except Exception as exc:
+        logging.error("Error updating metrics results for URL %s: %s", line.get("url"), exc)
+        return False, None
+
+    return True, None
+
+
+def touch_parse_heartbeat(log_file, last_processed_line=None):
+    heartbeat_at = timezone.now()
+    update_kwargs = {
+        "parse_heartbeat_at": heartbeat_at,
+        "updated": heartbeat_at,
+    }
+    if last_processed_line is not None:
+        update_kwargs["last_processed_line"] = last_processed_line or 0
+        log_file.last_processed_line = last_processed_line or 0
+    LogFile.objects.filter(pk=log_file.pk).update(**update_kwargs)
+    log_file.parse_heartbeat_at = heartbeat_at
+
+
+def is_stale_parsing_log(log_file, stale_after_minutes=60):
+    if log_file.status != choices.LOG_FILE_STATUS_PARSING:
+        return False
+
+    if not log_file.parse_heartbeat_at:
+        return True
+
+    cutoff = timezone.now() - timedelta(minutes=stale_after_minutes)
+    return log_file.parse_heartbeat_at < cutoff
+
+
+def requeue_stale_parsing_log(log_file):
+    now = timezone.now()
+    LogFile.objects.filter(pk=log_file.pk).update(
+        status=choices.LOG_FILE_STATUS_ERROR,
+        parse_heartbeat_at=None,
+        updated=now,
+    )
+    log_file.status = choices.LOG_FILE_STATUS_ERROR
+    log_file.parse_heartbeat_at = None
diff --git a/metrics/services/resources.py b/metrics/services/resources.py
new file mode 100644
index 0000000..dc31400
--- /dev/null
+++ b/metrics/services/resources.py
@@ -0,0 +1,54 @@
+import logging
+
+from django.conf import settings
+
+from log_manager.models import LogFile
+from resources.models import MMDB, RobotUserAgent
+
+from metrics import opensearch
+
+
+def extract_celery_queue_name(collection_acronym):
+    return f"parse_{settings.COLLECTION_ACRON3_SIZE_MAP.get(collection_acronym, 'small')}"
+
+
+def fetch_required_resources(robot_source=None):
+    robots_list = RobotUserAgent.get_patterns(source=robot_source)
+    if not robots_list:
+        logging.error(
+            "There are no robots available in the database for source %s.",
+            RobotUserAgent.normalize_source(robot_source),
+        )
+        return None, None
+
+    try:
+        mmdb = MMDB.objects.latest("created")
+    except MMDB.DoesNotExist:
+        logging.error("There are no MMDB files available in the database.")
+        return None, None
+
+    return robots_list, mmdb
+
+
+def build_search_client():
+    return opensearch.OpenSearchUsageClient(
+        settings.OPENSEARCH_URL,
+        settings.OPENSEARCH_BASIC_AUTH,
+        settings.OPENSEARCH_API_KEY,
+        settings.OPENSEARCH_VERIFY_CERTS,
+    )
+
+
+def get_log_files_for_collection_date(collection, access_date, status_filters=None):
+    queryset = (
+        LogFile.objects.filter(
+            collection=collection,
+            date=access_date,
+        )
+        .select_related("collection")
+        .order_by("path", "hash")
+    )
+    if status_filters:
+        queryset = queryset.filter(status__in=status_filters)
+
+    return list(queryset)
diff --git a/metrics/tasks.py b/metrics/tasks.py
deleted file mode 100644
index 026bfb5..0000000
--- a/metrics/tasks.py
+++ /dev/null
@@ -1,508 +0,0 @@
-import logging
-
-from django.conf import settings
-from django.contrib.auth import get_user_model
-from django.utils.translation import gettext as _
-
-from scielo_usage_counter import log_handler
-from scielo_usage_counter import url_translator
-
-from config import celery_app
-from core.utils.utils import _get_user
-from core.utils.date_utils import get_date_obj, get_date_range_str
-from article.models import Article
-from collection.models import Collection
-from journal.models import Journal
-from log_manager import choices
-from log_manager_config.models import CollectionURLTranslatorClass, CollectionLogFilesPerDay, CollectionLogDirectory
-from log_manager.models import LogFile, CollectionLogFileDateCount, LogFileDate
-from resources.models import MMDB, RobotUserAgent
-from tracker.models import LogFileDiscardedLine
-from tracker.choices import LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE, LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL
-
-from . import es
-from .utils import parser_utils, index_utils
-
-
-User = get_user_model()
-
-
-def extract_celery_queue_name(collection_acronym):
-    return f"parse_{settings.COLLECTION_ACRON3_SIZE_MAP.get(collection_acronym, 'small')}"
-
-
-@celery_app.task(bind=True, name=_('Parse logs'), timelimit=-1)
-def task_parse_logs(self, collections=[], include_logs_with_error=True, batch_size=5000, replace=False, track_errors=False, from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None):
-    """
-    Parses log files associated with a given collection.
-
-    Args:
-        collections (list, optional): List of collection acronyms to parse logs for. Defaults to all collections.
-        include_logs_with_error (bool, optional): Whether to include logs with errors. Defaults to True.
-        batch_size (int, optional): Number of records to process in a single batch. Defaults to 5000.
-        replace (bool, optional): Whether to replace existing records. Defaults to False.
-        track_errors (bool, optional): Whether to track errors in log parsing. Defaults to False.
-        from_date (str, optional): Start date for log parsing in 'YYYY-MM-DD' format. Defaults to None.
-        until_date (str, optional): End date for log parsing in 'YYYY-MM-DD' format. Defaults to None.
-        days_to_go_back (int, optional): Number of days to go back from the current date to parse logs. Defaults to None.
-        user_id
-        username
-
-    Returns:
-        None.
-    """
-    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
-    
-    from_date_obj = get_date_obj(from_date)
-    until_date_obj = get_date_obj(until_date)
-
-    # Set status filters based on the include_logs_with_error and replace flags
-    status_filters = [choices.LOG_FILE_STATUS_QUEUED]
-    if include_logs_with_error:
-        status_filters.append(choices.LOG_FILE_STATUS_ERROR)
-    if replace:
-        status_filters.append(choices.LOG_FILE_STATUS_PROCESSED)
-
-    for collection in collections or Collection.acron3_list():
-        for lf in LogFile.objects.filter(status__in=status_filters, collection__acron3=collection):
-            probably_date = parser_utils.extract_date_from_validation_dict(lf.validation)
-            if not probably_date:
-                logging.debug(f'Log file {lf.path} does not have a valid probably date.')
-                continue
-
-            if probably_date < from_date_obj or probably_date > until_date_obj:
-                continue
-
-            queue_name = extract_celery_queue_name(collection)
-
-            logging.info(f'PARSING file {lf.path}')
-            task_parse_log.apply_async(
-                args=(lf.hash, batch_size, replace, track_errors, user_id, username),
-                queue=queue_name,
-            )
-
-
-@celery_app.task(bind=True, name=_('Parse one log'), timelimit=-1)
-def task_parse_log(self, log_file_hash, batch_size=5000, replace=False, track_errors=False, user_id=None, username=None):
-    """
-    Parses a log file, extracts relevant information, and creates processed log records in the database.
-
-    Args:
-        log_file_hash (str): Hash representing the log file to be parsed.
-        batch_size (int, optional): Number of records to process in a single batch. Defaults to 5000.
-        replace (bool, optional): Whether to replace existing records. Defaults to False.
-        track_errors (bool, optional): Whether to track errors in log parsing. Defaults to False.
-        user_id
-        username
-
-    Returns:
-        None.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    robots_list, mmdb = _fetch_required_resources()
-    if not robots_list or not mmdb:
-        return
-    
-    log_file = _initialize_log_file(log_file_hash)
-    if not log_file:
-        return
-    
-    clfdc = create_or_update_collection_log_file_date_count(
-        user=user,
-        collection=log_file.collection,
-        date=get_date_obj(log_file.validation.get('probably_date'))
-    )
-
-    if not replace and clfdc.is_usage_metric_computed:
-        logging.info(f'Usage metric already computed for {log_file.validation.get("probably_date")}')
-        return
-    
-    if replace:
-        clfdc.exported_files_count = 0
-        clfdc.is_usage_metric_computed = False
-        clfdc.save()
-
-    log_parser, url_translator_manager = _setup_parsing_environment(log_file, robots_list, mmdb)
-    success = _process_lines(lp=log_parser, utm=url_translator_manager, log_file=log_file, batch_size=batch_size, replace=replace, track_errors=track_errors)
-
-    if not success:
-        logging.error(f'Failed to parse log file {log_file.path}.')
-        log_file.status = choices.LOG_FILE_STATUS_ERROR
-        log_file.save()
-        return
-    
-    log_file.status = choices.LOG_FILE_STATUS_PROCESSED
-    log_file.save()
-
-    _update_exported_files_count(clfdc)
-
-    logging.info(f'Log file {log_file.path} has been successfully parsed.')
-
-
-def create_or_update_collection_log_file_date_count(user, collection, date):
-    n_expected_files = CollectionLogFilesPerDay.get_number_of_expected_files_by_day(collection=collection.acron3, date=date)
-    n_found_logs = LogFileDate.get_number_of_found_files_for_date(collection=collection.acron3, date=date)
-    
-    return CollectionLogFileDateCount.create_or_update(
-        user=user,
-        collection=collection,
-        date=date,
-        expected_log_files=n_expected_files,
-        found_log_files=n_found_logs,
-    )
-
-
-def _initialize_log_file(log_file_hash):
-    """
-    Initializes the log file for parsing by setting its status to 'parsing'.
-    
-    Args:
-        log_file_hash (str): The hash of the log file to be initialized.
-    
-    Returns:
-        LogFile: The initialized log file object, or None if it does not exist.
-    """
-    try:
-        log_file = LogFile.get(hash=log_file_hash)
-        log_file.status = choices.LOG_FILE_STATUS_PARSING
-        log_file.save()
-        return log_file
-    except LogFile.DoesNotExist:
-        logging.error(f'Log file with hash {log_file_hash} does not exist.')
-        return None
-
-
-def _fetch_required_resources():
-    """
-    Fetches the necessary resources for parsing logs, including robot user agents and MMDB files.
-    
-    Returns:
-        tuple: A tuple containing the list of robot user agents and the latest MMDB object.
-    """
-    robots_list = RobotUserAgent.get_all_patterns()
-    if not robots_list:
-        logging.error('There are no robots available in the database.')
-        return None, None
-
-    mmdb = MMDB.objects.latest('created')
-    if not mmdb:
-        logging.error('There are no MMDB files available in the database.')
-        return None, None
-
-    return robots_list, mmdb
-
-
-def _setup_parsing_environment(log_file, robots_list, mmdb):
-    """
-    Sets up the environment for parsing the log file, including initializing the log parser and URL translator manager.
-    
-    Args:
-        log_file (LogFile): The log file to be parsed.
-        robots_list (list): List of robot user agents.
-        mmdb (MMDB): The MMDB object containing geolocation data.
-    
-    Returns:
-        tuple: A tuple containing the LogParser instance and URLTranslationManager instance.
-    """
-    lp = log_handler.LogParser(mmdb_data=mmdb.data, robots_list=robots_list, output_mode='dict')
-    lp.logfile = log_file.path
-
-    translator_class = None
-    for cld in CollectionLogDirectory.objects.filter(collection=log_file.collection):
-        if cld.path in log_file.path:
-            try:
-                translator_class_name = CollectionURLTranslatorClass.objects.get(collection=log_file.collection, directory=cld).translator_class
-                translator_class = parser_utils.translator_class_name_to_obj(translator_class_name)
-                break
-            except CollectionURLTranslatorClass.DoesNotExist:
-                continue
-
-    if not translator_class:
-        raise Exception(f'No URL translator class found for collection {log_file.collection}.')
-
-    logging.info(f'Creating URL translation manager for {log_file.collection}')
-    utm = url_translator.URLTranslationManager(
-        articles_metadata=Article.metadata(collection=log_file.collection),
-        journals_metadata=Journal.metadata(collection=log_file.collection),
-        translator=translator_class,
-    )
-    return lp, utm
-
-
-def _process_lines(lp, utm, log_file, batch_size=5000, replace=False, track_errors=False):
-    """
-    Processes each line of the log file, translating URLs and registering item accesses.
-    
-    Args:
-        lp (LogParser): The log parser instance.
-        utm (URLTranslationManager): The URL translation manager instance.
-        log_file (LogFile): The log file being processed.
-        batch_size (int, optional): Number of records to process in a single batch. Defaults to 5000.
-        replace (bool, optional): Whether to replace existing records. Defaults to False.
-        track_errors (bool, optional): Whether to track errors in log parsing. Defaults to False.
-    
-    Returns:
-        None.
-    """
-    logging.info(f'Processing {lp.logfile}')
-    results = {}
-    errors = []
-
-    jump = log_file.last_processed_line if not replace else 0
-
-    es_manager = es.ElasticSearchUsageWrapper(
-        settings.ES_URL, 
-        settings.ES_BASIC_AUTH, 
-        settings.ES_API_KEY,
-        settings.ES_VERIFY_CERTS
-    )
-
-    if not es_manager.ping():
-        logging.error('Elasticsearch client is not available.')
-        return False
-    
-    index_name = index_utils.generate_index_name(
-        index_prefix=settings.ES_INDEX_NAME, 
-        collection=log_file.collection.acron3, 
-        date=log_file.validation.get('probably_date')
-    )
-
-    es_manager.create_index_if_not_exists(index_name=index_name)
-
-    if replace:
-        logging.info(f'Removing existing documents for collection {log_file.collection.acron3} and date {log_file.validation.get("probably_date")}')
-        delete_success = es_manager.delete_documents_by_key(
-            index_name=index_name,
-            data={'collection': log_file.collection.acron3, 'date': log_file.validation.get('probably_date')},
-        )
-        if not delete_success:
-            logging.error(f'Failed to delete existing documents for collection {log_file.collection.acron3} and date {log_file.validation.get("probably_date")}')
-            return False
-
-    for line in lp.parse():
-        if lp.stats.lines_parsed < jump:
-            continue
-
-        if lp.stats.lines_parsed % batch_size == 0:
-            logging.info(f'Processing line {lp.stats.lines_parsed} of {lp.logfile}')
-
-        is_valid_line, error_obj = _process_line(results, line, utm, log_file, track_errors)
-        if not is_valid_line:
-            if error_obj:
-                errors.append(error_obj)
-
-            if len(errors) >= batch_size:
-                LogFileDiscardedLine.objects.bulk_create(errors)
-                errors = []
-            continue
-
-        if len(results) >= batch_size:
-            logging.info(f'Indexing data for log file {log_file.path}')
-            es_manager.export_to_index(
-                index_name=index_name, 
-                data=results, 
-                batch_size=batch_size
-            )
-            results = {}
-
-            _update_log_file_summary(log_file, lp.stats.get_stats())
-
-    logging.info(f'Indexing data for log file {log_file.path}')
-    es_manager.export_to_index(
-        index_name=index_name,
-        data=results,
-        batch_size=batch_size
-    )
-    results = {}
-
-    LogFileDiscardedLine.objects.bulk_create(errors) if errors else None
-    errors = []
-
-    _update_log_file_summary(log_file, lp.stats.get_stats())
-
-    return True
-
-
-def _update_log_file_summary(log_file, stats):
-    if not stats:
-        logging.warning(f'No stats available for log file {log_file.path}. Skipping summary update.')
-        return
-    
-    summary_k, summary_v = stats
-    log_file.summary = dict(zip(summary_k, summary_v))
-    log_file.last_processed_line = log_file.summary.get('lines_parsed', 0)
-    log_file.save()
-
-
-def _update_exported_files_count(collection_log_file_date: CollectionLogFileDateCount):
-    collection_log_file_date.exported_files_count += 1
-    collection_log_file_date.set_is_usage_metric_computed()
-    collection_log_file_date.save()
-   
-
-def _process_line(results, line, utm, log_file, track_errors=False):
-    """
-    Process a single log line to extract and validate item access data.
-    This function translates a URL from the log line, extracts item access data,
-    validates the data, and updates the results if the data is valid.
-    
-    Args:
-        results: Dictionary or data structure to store processed results
-        line (dict): Log line containing URL and other access information
-        utm: URL translation manager for converting URLs
-        log_file: Log file object containing collection information (must have collection.acron3)
-        track_errors (bool): Whether to track errors in log parsing.
-    
-    Returns:
-        tuple: A tuple containing a boolean indicating success or failure, and an optional LogFileDiscardedLine object.
-    
-    Raises:
-        Logs errors for URL translation failures and item access data extraction failures.
-        Logs debug messages for invalid item access data.
-    """
-    try:
-        translated_url = utm.translate(line.get('url'))
-    except Exception as e:
-        logging.error(f'Error translating URL {line.get("url")}: {e}')
-        return False, None
-   
-    try:
-        item_access_data = index_utils.extract_item_access_data(log_file.collection.acron3, translated_url)
-    except Exception as e:
-        logging.error(f'Error extracting item access data from URL {line.get("url")}: {e}')
-        return False, None
-    
-    ignore_utm_validation = not track_errors
-    is_valid, check_result = index_utils.is_valid_item_access_data(item_access_data, utm, ignore_utm_validation)
-
-    if not is_valid:
-        if track_errors:
-            error_code = check_result.get('code')
-
-            if error_code in {
-                'invalid_scielo_issn', 
-                'invalid_pid_v3',
-                'invalid_pid_v2', 
-                'invalid_pid_generic'
-            }:
-                if 'pid' in error_code:
-                    tracker_error_type = LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE
-                else:
-                    tracker_error_type = LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL
-
-                lfdl = LogFileDiscardedLine.create(
-                    log_file=log_file,
-                    error_type=tracker_error_type,
-                    message=check_result.get('message'),
-                    data={'line': line, 'item_access_data': item_access_data},
-                    save=False,
-                )
-                logging.debug(f'Invalid item access data: {check_result.get("message")}. Line: {line}. Item Access Data: {item_access_data}')
-                return False, lfdl
-        
-        return False, None
-    
-    index_utils.update_results_with_item_access_data(
-        results, 
-        item_access_data, 
-        line
-    )
-
-    return True, None
-
-
-@celery_app.task(bind=True, name=_('Create index'), timelimit=-1)
-def task_create_index(self, index_name, mappings=None, user_id=None, username=None):
-    """
-    Creates an Elasticsearch index with the specified settings and mappings.
-
-    Args:
-        index_name (str): The name of the index to be created.
-        mappings (dict, optional): The mappings for the index. Defaults to None.
-        user_id (int, optional): The ID of the user initiating the task. Defaults to None.
-        username (str, optional): The username of the user initiating the task. Defaults to None.
-
-    Returns:
-        None.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    es_manager = es.ElasticSearchUsageWrapper(
-        settings.ES_URL, 
-        settings.ES_BASIC_AUTH, 
-        settings.ES_API_KEY,
-        settings.ES_VERIFY_CERTS
-    )
-
-    try:
-        if es_manager.client.indices.exists(index=index_name):
-            logging.info(f"Index {index_name} already exists.")
-            return
-
-        es_manager.create_index(index_name=index_name, mappings=mappings)
-        logging.info(f"Index {index_name} created successfully.")
-    except Exception as e:
-        logging.error(f"Failed to create index {index_name}: {e}")
-
-
-@celery_app.task(bind=True, name=_('Delete index'), timelimit=-1)
-def task_delete_index(self, index_name, user_id=None, username=None):
-    """
-    Deletes an Elasticsearch index.
-
-    Args:
-        index_name (str): The name of the index to be deleted.
-        user_id (int, optional): The ID of the user initiating the task. Defaults to None.
-        username (str, optional): The username of the user initiating the task. Defaults to None.
-
-    Returns:
-        None.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    es_manager = es.ElasticSearchUsageWrapper(
-        settings.ES_URL, 
-        settings.ES_BASIC_AUTH, 
-        settings.ES_API_KEY,
-        settings.ES_VERIFY_CERTS
-    )
-
-    try:
-        if not es_manager.client.indices.exists(index=index_name):
-            logging.info(f"Index {index_name} does not exist.")
-            return
-
-        es_manager.client.indices.delete(index=index_name)
-        logging.info(f"Index {index_name} deleted successfully.")
-    except Exception as e:
-        logging.error(f"Failed to delete index {index_name}: {e}")
-
-
-@celery_app.task(bind=True, name=_('Delete documents by key'), timelimit=-1)
-def task_delete_documents_by_key(self, index_name, data, user_id=None, username=None):
-    """
-    Deletes documents from Elasticsearch based on the provided keys and values.
-
-    Args:
-        index_name (str): The name of the Elasticsearch index. Defaults to settings.ES_INDEX_NAME.
-        data (dict): A dictionary where keys are field names and values are the corresponding values to match for deletion.
-        user_id (int, optional): The ID of the user initiating the task. Defaults to None.
-        username (str, optional): The username of the user initiating the task. Defaults to None.
-
-    Returns:
-        None.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    es_manager = es.ElasticSearchUsageWrapper(
-        settings.ES_URL, 
-        settings.ES_BASIC_AUTH, 
-        settings.ES_API_KEY,
-        settings.ES_VERIFY_CERTS
-    )
-
-    try:
-        es_manager.delete_documents_by_key(
-            index_name=index_name, 
-            data=data,
-        )
-        logging.info(f"Successfully deleted documents with data: {data} from index {index_name}.")
-    except Exception as e:
-        logging.error(f"Failed to delete documents with data {data} from index {index_name}: {e}")
diff --git a/metrics/tasks/__init__.py b/metrics/tasks/__init__.py
new file mode 100644
index 0000000..f0c2d6a
--- /dev/null
+++ b/metrics/tasks/__init__.py
@@ -0,0 +1,19 @@
+from .parse import (
+    task_parse_logs,
+    task_wait_parse_logs_wave,
+)
+from .process import (
+    task_process_daily_metric_job,
+)
+from .resume import (
+    task_resume_log_exports,
+    task_resume_stale_parsing_logs,
+)
+from .index import (
+    task_create_index,
+    task_delete_index,
+    task_delete_documents_by_key,
+)
+from .cleanup import (
+    task_cleanup_daily_payloads,
+)
diff --git a/metrics/tasks/cleanup.py b/metrics/tasks/cleanup.py
new file mode 100644
index 0000000..9b3c8e0
--- /dev/null
+++ b/metrics/tasks/cleanup.py
@@ -0,0 +1,31 @@
+import logging
+
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.request_utils import _get_user
+from metrics.services import daily_payloads
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Cleanup Daily Payloads"), timelimit=-1)
+def task_cleanup_daily_payloads(
+    self,
+    collections=None,
+    older_than_days=7,
+    user_id=None,
+    username=None,
+):
+    _get_user(self.request, username=username, user_id=user_id)
+
+    deleted_count = daily_payloads.cleanup_exported_payloads(
+        collections=collections or [],
+        older_than_days=older_than_days,
+    )
+
+    logging.info(
+        "Cleanup task completed: %s payload file(s) deleted (collections=%s, older_than_days=%s).",
+        deleted_count,
+        collections or "all",
+        older_than_days,
+    )
+    return {"deleted_payloads": deleted_count}
diff --git a/metrics/tasks/index.py b/metrics/tasks/index.py
new file mode 100644
index 0000000..2635377
--- /dev/null
+++ b/metrics/tasks/index.py
@@ -0,0 +1,61 @@
+import logging
+
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.request_utils import _get_user
+
+from metrics.services.resources import build_search_client
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Create Index"), timelimit=-1)
+def task_create_index(self, index_name, mappings=None, user_id=None, username=None):
+    _get_user(self.request, username=username, user_id=user_id)
+    search_client = build_search_client()
+
+    try:
+        if search_client.client.indices.exists(index=index_name):
+            logging.info("Index %s already exists.", index_name)
+            return
+
+        search_client.create_index(index_name=index_name, mappings=mappings or {})
+        logging.info("Index %s created successfully.", index_name)
+    except Exception as exc:
+        logging.error("Failed to create index %s: %s", index_name, exc)
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Delete Index"), timelimit=-1)
+def task_delete_index(self, index_name, user_id=None, username=None):
+    _get_user(self.request, username=username, user_id=user_id)
+    search_client = build_search_client()
+
+    try:
+        if not search_client.client.indices.exists(index=index_name):
+            logging.info("Index %s does not exist.", index_name)
+            return
+
+        search_client.delete_index(index_name=index_name)
+        logging.info("Index %s deleted successfully.", index_name)
+    except Exception as exc:
+        logging.error("Failed to delete index %s: %s", index_name, exc)
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Delete Documents by Key"), timelimit=-1)
+def task_delete_documents_by_key(self, index_name, data, user_id=None, username=None):
+    _get_user(self.request, username=username, user_id=user_id)
+    search_client = build_search_client()
+
+    try:
+        search_client.delete_documents_by_key(index_name=index_name, data=data)
+        logging.info(
+            "Successfully deleted documents with data: %s from index %s.",
+            data,
+            index_name,
+        )
+    except Exception as exc:
+        logging.error(
+            "Failed to delete documents with data %s from index %s: %s",
+            data,
+            index_name,
+            exc,
+        )
diff --git a/metrics/tasks/parse.py b/metrics/tasks/parse.py
new file mode 100644
index 0000000..7748922
--- /dev/null
+++ b/metrics/tasks/parse.py
@@ -0,0 +1,286 @@
+import logging
+
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.date_utils import get_date_obj, get_date_range_str
+from core.utils.request_utils import _get_user
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+
+from metrics.services.resources import extract_celery_queue_name, get_log_files_for_collection_date
+from metrics.services.jobs import create_or_update_daily_metric_job
+from metrics.tasks.process import task_process_daily_metric_job
+
+AUTO_REEXECUTE_POLL_INTERVAL_SECONDS = 30
+
+
+@celery_app.task(bind=True, name=_("[Log Pipeline] 3. Parse Logs (Manual)"), timelimit=-1)
+def task_parse_logs(
+    self,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    robots_source=None,
+):
+    if replace:
+        raise ValueError(
+            "replace=True is not supported. Recompute requires deleting/recreating "
+            "the affected day or period first."
+        )
+
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+    enqueued_jobs = 0
+    reached_max_log_files = False
+    enqueued_wave_job_ids = []
+    claimed_status_filters = list(_build_parse_status_filters(include_logs_with_error))
+    skip_log_hashes = set(skip_log_hashes or [])
+
+    for collection in collections or Collection.acron3_list():
+        collection_obj = Collection.objects.filter(acron3=collection).first()
+        if not collection_obj:
+            continue
+
+        access_dates = _find_access_dates(
+            collection=collection_obj,
+            from_date=from_date,
+            until_date=until_date,
+            from_date_obj=from_date_obj,
+            until_date_obj=until_date_obj,
+            status_filters=claimed_status_filters,
+            skip_log_hashes=skip_log_hashes,
+        )
+
+        for access_date in access_dates:
+            log_files = get_log_files_for_collection_date(
+                collection=collection_obj,
+                access_date=access_date,
+                status_filters=claimed_status_filters,
+            )
+            log_files = [log_file for log_file in log_files if log_file.hash not in skip_log_hashes]
+            if not log_files:
+                continue
+
+            job = create_or_update_daily_metric_job(
+                collection=collection_obj,
+                access_date=access_date,
+                log_files=log_files,
+            )
+            if job.status == DailyMetricJob.STATUS_EXPORTED:
+                continue
+
+            task_process_daily_metric_job.apply_async(
+                args=(job.pk, track_errors, user_id, username, robots_source),
+                queue=queue_name or extract_celery_queue_name(collection),
+            )
+            enqueued_wave_job_ids.append(job.pk)
+            enqueued_jobs += 1
+            if max_log_files and enqueued_jobs >= max_log_files:
+                reached_max_log_files = True
+                break
+
+        if reached_max_log_files:
+            break
+
+    auto_reexecution_enqueued = _schedule_parse_logs_reexecution(
+        should_reexecute=auto_reexecute and reached_max_log_files and bool(enqueued_wave_job_ids),
+        wave_job_ids=enqueued_wave_job_ids,
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=sorted(skip_log_hashes),
+        robots_source=robots_source,
+    )
+
+    return {
+        "enqueued_logs": enqueued_jobs,
+        "enqueued_jobs": enqueued_jobs,
+        "reached_max_log_files": reached_max_log_files,
+        "auto_reexecution_enqueued": auto_reexecution_enqueued,
+    }
+
+
+def _build_parse_status_filters(include_logs_with_error):
+    status_filters = [choices.LOG_FILE_STATUS_QUEUED]
+    if include_logs_with_error:
+        status_filters.append(choices.LOG_FILE_STATUS_ERROR)
+    return tuple(status_filters)
+
+
+def _find_access_dates(
+    collection,
+    from_date,
+    until_date,
+    from_date_obj,
+    until_date_obj,
+    status_filters,
+    skip_log_hashes,
+):
+    date_queryset = (
+        LogFile.objects.filter(
+            status__in=status_filters,
+            collection=collection,
+            date__gte=from_date_obj,
+            date__lte=until_date_obj,
+        )
+        .exclude(hash__in=skip_log_hashes)
+        .values_list("date", flat=True)
+        .distinct()
+        .order_by("date")
+    )
+
+    access_dates = set()
+    for value in list(date_queryset):
+        access_date = value if hasattr(value, "isoformat") else get_date_obj(value)
+        if access_date and from_date_obj <= access_date <= until_date_obj:
+            access_dates.add(access_date)
+    return sorted(access_dates)
+
+
+def _schedule_parse_logs_reexecution(
+    should_reexecute,
+    wave_job_ids,
+    collections,
+    include_logs_with_error,
+    batch_size,
+    max_log_files,
+    auto_reexecute,
+    replace,
+    track_errors,
+    from_date,
+    until_date,
+    days_to_go_back,
+    queue_name,
+    user_id,
+    username,
+    skip_log_hashes,
+    robots_source=None,
+):
+    if not should_reexecute:
+        return False
+
+    kwargs = {
+        "wave_job_ids": wave_job_ids,
+        "collections": collections,
+        "include_logs_with_error": include_logs_with_error,
+        "batch_size": batch_size,
+        "max_log_files": max_log_files,
+        "auto_reexecute": auto_reexecute,
+        "replace": replace,
+        "track_errors": track_errors,
+        "from_date": from_date,
+        "until_date": until_date,
+        "days_to_go_back": days_to_go_back,
+        "queue_name": queue_name,
+        "user_id": user_id,
+        "username": username,
+        "skip_log_hashes": skip_log_hashes,
+        "poll_interval_seconds": AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
+    }
+    if robots_source is not None:
+        kwargs["robots_source"] = robots_source
+
+    task_wait_parse_logs_wave.apply_async(kwargs=kwargs)
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Wait Parse Logs Wave"), timelimit=-1)
+def task_wait_parse_logs_wave(
+    self,
+    wave_job_ids=None,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    poll_interval_seconds=AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
+    robots_source=None,
+    wave_log_hashes=None,
+):
+    wave_job_ids = wave_job_ids or wave_log_hashes or []
+    if DailyMetricJob.objects.filter(
+        pk__in=wave_job_ids,
+        status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_EXPORTING],
+    ).exists():
+        kwargs = {
+            "wave_job_ids": wave_job_ids,
+            "collections": collections,
+            "include_logs_with_error": include_logs_with_error,
+            "batch_size": batch_size,
+            "max_log_files": max_log_files,
+            "auto_reexecute": auto_reexecute,
+            "replace": replace,
+            "track_errors": track_errors,
+            "from_date": from_date,
+            "until_date": until_date,
+            "days_to_go_back": days_to_go_back,
+            "queue_name": queue_name,
+            "user_id": user_id,
+            "username": username,
+            "skip_log_hashes": skip_log_hashes,
+            "poll_interval_seconds": poll_interval_seconds,
+        }
+        if robots_source is not None:
+            kwargs["robots_source"] = robots_source
+
+        task_wait_parse_logs_wave.apply_async(
+            kwargs=kwargs,
+            countdown=poll_interval_seconds,
+        )
+        return {"wave_completed": False, "reexecution_enqueued": False}
+
+    kwargs = {
+        "collections": collections,
+        "include_logs_with_error": include_logs_with_error,
+        "batch_size": batch_size,
+        "max_log_files": max_log_files,
+        "auto_reexecute": auto_reexecute,
+        "replace": replace,
+        "track_errors": track_errors,
+        "from_date": from_date,
+        "until_date": until_date,
+        "days_to_go_back": days_to_go_back,
+        "queue_name": queue_name,
+        "user_id": user_id,
+        "username": username,
+        "skip_log_hashes": skip_log_hashes,
+    }
+    if robots_source is not None:
+        kwargs["robots_source"] = robots_source
+
+    task_parse_logs.apply_async(kwargs=kwargs)
+    return {"wave_completed": True, "reexecution_enqueued": True}
diff --git a/metrics/tasks/process.py b/metrics/tasks/process.py
new file mode 100644
index 0000000..ecdc7a5
--- /dev/null
+++ b/metrics/tasks/process.py
@@ -0,0 +1,63 @@
+import logging
+
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.request_utils import _get_user
+from metrics.models import DailyMetricJob
+
+from metrics.services.jobs import acquire_daily_metric_job, mark_daily_metric_job_exported, mark_daily_metric_job_failed
+from metrics.services.export import export_daily_metric_payload, load_daily_metric_payload
+from metrics.services.resources import build_search_client, fetch_required_resources
+from metrics.services.parser import process_daily_metric_job
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Process Daily Job"), timelimit=-1)
+def task_process_daily_metric_job(
+    self,
+    job_id,
+    track_errors=False,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+
+    try:
+        job = acquire_daily_metric_job(job_id)
+    except DailyMetricJob.DoesNotExist:
+        logging.error("Daily metric job %s does not exist.", job_id)
+        return
+
+    if not job:
+        return
+
+    try:
+        payload = load_daily_metric_payload(job)
+        if payload is None or not job.payload_hash:
+            robots_list, mmdb = fetch_required_resources(robot_source=robots_source)
+            if not robots_list or not mmdb:
+                raise RuntimeError("Required parsing resources are not available.")
+            payload = process_daily_metric_job(
+                job=job,
+                robots_list=robots_list,
+                mmdb=mmdb,
+                track_errors=track_errors,
+            )
+            job.refresh_from_db()
+
+        search_client = build_search_client()
+        if not search_client.ping():
+            raise RuntimeError("OpenSearch client is not available.")
+
+        export_daily_metric_payload(
+            search_client=search_client,
+            job=job,
+            payload=payload,
+        )
+    except Exception as exc:
+        logging.error("Failed to process daily metric job %s: %s", job_id, exc)
+        mark_daily_metric_job_failed(job, exc)
+        return
+
+    mark_daily_metric_job_exported(job, user=user)
diff --git a/metrics/tasks/resume.py b/metrics/tasks/resume.py
new file mode 100644
index 0000000..c0fe705
--- /dev/null
+++ b/metrics/tasks/resume.py
@@ -0,0 +1,166 @@
+import logging
+
+from django.utils import timezone
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.date_utils import get_date_obj, get_date_range_str
+from core.utils.request_utils import _get_user
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+
+from metrics.services.jobs import create_or_update_daily_metric_job, release_stale_daily_metric_jobs
+from metrics.services.resources import extract_celery_queue_name, get_log_files_for_collection_date
+from metrics.services.parser import is_stale_parsing_log, requeue_stale_parsing_log
+from metrics.counter import parser
+
+from .parse import task_parse_logs
+from .process import task_process_daily_metric_job
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Resume Log Exports"), timelimit=-1)
+def task_resume_log_exports(
+    self,
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    stale_after_minutes=60,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    _get_user(self.request, username=username, user_id=user_id)
+
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+
+    released_stale_jobs = release_stale_daily_metric_jobs(
+        collections=collections,
+        from_date=from_date_obj,
+        until_date=until_date_obj,
+        stale_after_minutes=stale_after_minutes,
+    )
+    queryset = DailyMetricJob.objects.filter(
+        status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_ERROR],
+        access_date__gte=from_date_obj,
+        access_date__lte=until_date_obj,
+    ).select_related("collection").order_by("access_date", "collection__acron3")
+    if collections:
+        queryset = queryset.filter(collection__acron3__in=collections)
+
+    resumed_jobs = 0
+    for job in queryset:
+        log_files = get_log_files_for_collection_date(
+            collection=job.collection,
+            access_date=job.access_date,
+            status_filters=[
+                choices.LOG_FILE_STATUS_QUEUED,
+                choices.LOG_FILE_STATUS_ERROR,
+            ],
+        )
+        if log_files:
+            job = create_or_update_daily_metric_job(
+                collection=job.collection,
+                access_date=job.access_date,
+                log_files=log_files,
+            )
+        elif not (job.storage_path and job.payload_hash):
+            logging.warning(
+                "Skipping daily metric job %s: no queued/error logs or stored payload.",
+                job.pk,
+            )
+            continue
+
+        if job.status == DailyMetricJob.STATUS_EXPORTED:
+            continue
+
+        task_process_daily_metric_job.apply_async(
+            args=(job.pk, False, user_id, username, robots_source),
+            queue=queue_name or extract_celery_queue_name(job.collection.acron3),
+        )
+        resumed_jobs += 1
+
+    logging.info(
+        "Resumed daily metric jobs for %s day(s); released %s stale job(s) at %s.",
+        resumed_jobs,
+        released_stale_jobs,
+        timezone.now(),
+    )
+    return {
+        "resumed_logs": resumed_jobs,
+        "resumed_jobs": resumed_jobs,
+        "released_stale_batches": released_stale_jobs,
+        "released_stale_jobs": released_stale_jobs,
+    }
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Resume Stale Parsing Logs"), timelimit=-1)
+def task_resume_stale_parsing_logs(
+    self,
+    collections=None,
+    batch_size=5000,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    stale_after_minutes=60,
+    max_log_files=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+
+    queryset = (
+        LogFile.objects.filter(status=choices.LOG_FILE_STATUS_PARSING)
+        .select_related("collection")
+        .order_by("validation__probably_date", "path", "hash")
+    )
+    if collections:
+        queryset = queryset.filter(collection__acron3__in=collections)
+
+    resumed_logs = 0
+    for log_file in queryset:
+        probably_date = parser.extract_date_from_validation_dict(log_file.validation)
+        if not probably_date or probably_date < from_date_obj or probably_date > until_date_obj:
+            continue
+        if not is_stale_parsing_log(log_file, stale_after_minutes=stale_after_minutes):
+            continue
+
+        requeue_stale_parsing_log(log_file)
+        resumed_logs += 1
+        if max_log_files and resumed_logs >= max_log_files:
+            break
+
+    apply_kwargs = {
+        "kwargs": {
+            "collections": collections,
+            "include_logs_with_error": True,
+            "batch_size": batch_size,
+            "max_log_files": max_log_files,
+            "auto_reexecute": False,
+            "replace": False,
+            "track_errors": track_errors,
+            "from_date": from_date,
+            "until_date": until_date,
+            "days_to_go_back": None,
+            "queue_name": queue_name,
+            "user_id": user_id,
+            "username": username,
+            "robots_source": robots_source,
+        }
+    }
+    if queue_name:
+        apply_kwargs["queue"] = queue_name
+    task_parse_logs.apply_async(**apply_kwargs)
+    return {
+        "stale_logs_marked_for_retry": resumed_logs,
+        "parse_logs_enqueued": True,
+    }
diff --git a/metrics/templates/search/indexes/metrics/top100articles_text.txt b/metrics/templates/search/indexes/metrics/top100articles_text.txt
deleted file mode 100644
index ccf5e94..0000000
--- a/metrics/templates/search/indexes/metrics/top100articles_text.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-{{ object.collection }}
-{{ object.key_issn }}
-{{ object.pid }}
-{{ object.yop }}
-{{ object.language }}
-{{ object.country }}
-{{ object.total_item_requests }}
-{{ object.total_item_investigations }}
-{{ object.unique_item_requests }}
-{{ object.unique_item_investigations }}
\ No newline at end of file
diff --git a/metrics/tests/test_cleanup.py b/metrics/tests/test_cleanup.py
new file mode 100644
index 0000000..e08fa9c
--- /dev/null
+++ b/metrics/tests/test_cleanup.py
@@ -0,0 +1,283 @@
+import json
+import os
+import shutil
+import tempfile
+import time
+from datetime import date
+from pathlib import Path
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from collection.models import Collection
+from metrics.models import DailyMetricJob
+from metrics.services import daily_payloads
+
+
+class CleanupExportedPayloadsTests(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._tmpdir = tempfile.TemporaryDirectory()
+        cls._patched_root = patch.object(
+            daily_payloads,
+            "get_daily_payload_root",
+            return_value=Path(cls._tmpdir.name),
+        )
+        cls._patched_root.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._patched_root.stop()
+        cls._tmpdir.cleanup()
+        super().tearDownClass()
+
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+        self.other_collection = Collection.objects.create(acron3="scl", acron2="sc")
+
+        self.payload_root = daily_payloads.get_daily_payload_root()
+        self._clean_temp_dir()
+
+    def _clean_temp_dir(self):
+        root = self.payload_root
+        if root.exists():
+            for item in root.iterdir():
+                if item.is_dir():
+                    shutil.rmtree(item)
+                else:
+                    item.unlink()
+
+    def _create_job(self, collection, access_date, status, storage_path, payload_hash):
+        return DailyMetricJob.objects.create(
+            collection=collection,
+            access_date=access_date,
+            status=status,
+            storage_path=storage_path,
+            payload_hash=payload_hash,
+        )
+
+    def _write_payload_file(self, storage_path):
+        resolved = daily_payloads.resolve_storage_path(storage_path)
+        resolved.parent.mkdir(parents=True, exist_ok=True)
+        resolved.write_text(json.dumps({"test": True}), encoding="utf-8")
+        return resolved
+
+    def _set_file_age(self, file_path, days_old):
+        old_time = time.time() - days_old * 86400
+        os.utime(file_path, (old_time, old_time))
+
+    def test_cleanup_deletes_old_exported_payloads(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+        self._set_file_age(resolved, 30)
+
+        self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path.as_posix(),
+            payload_hash="abc",
+        )
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        self.assertEqual(result, 1)
+        self.assertFalse(resolved.exists())
+
+    def test_cleanup_skips_recent_files(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+
+        self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path.as_posix(),
+            payload_hash="abc",
+        )
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        self.assertEqual(result, 0)
+        self.assertTrue(resolved.exists())
+
+    def test_cleanup_skips_non_exported_jobs(self):
+        statuses = [
+            DailyMetricJob.STATUS_PENDING,
+            DailyMetricJob.STATUS_ERROR,
+            DailyMetricJob.STATUS_EXPORTING,
+        ]
+        paths = []
+        for i, status in enumerate(statuses):
+            access_date = date(2012, 3, 10 + i)
+            path = daily_payloads.build_daily_storage_path(
+                self.collection, access_date
+            )
+            resolved = self._write_payload_file(path)
+            self._set_file_age(resolved, 30)
+            paths.append(resolved)
+
+            self._create_job(
+                collection=self.collection,
+                access_date=access_date,
+                status=status,
+                storage_path=path.as_posix(),
+                payload_hash="abc",
+            )
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        self.assertEqual(result, 0)
+        for p in paths:
+            self.assertTrue(p.exists())
+
+    def test_cleanup_filters_by_collection(self):
+        path_books = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        path_scl = daily_payloads.build_daily_storage_path(
+            self.other_collection, date(2012, 3, 10)
+        )
+        resolved_books = self._write_payload_file(path_books)
+        resolved_scl = self._write_payload_file(path_scl)
+        self._set_file_age(resolved_books, 30)
+        self._set_file_age(resolved_scl, 30)
+
+        self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path_books.as_posix(),
+            payload_hash="abc",
+        )
+        self._create_job(
+            collection=self.other_collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path_scl.as_posix(),
+            payload_hash="def",
+        )
+
+        result = daily_payloads.cleanup_exported_payloads(
+            collections=["books"],
+            older_than_days=7,
+        )
+
+        self.assertEqual(result, 1)
+        self.assertFalse(resolved_books.exists())
+        self.assertTrue(resolved_scl.exists())
+
+    def test_cleanup_deletes_orphan_files(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+        self._set_file_age(resolved, 30)
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        self.assertEqual(result, 1)
+        self.assertFalse(resolved.exists())
+
+    def test_cleanup_skips_orphan_file_with_old_db_job_not_exported(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+        self._set_file_age(resolved, 30)
+
+        self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_PENDING,
+            storage_path=path.as_posix(),
+            payload_hash="abc",
+        )
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        self.assertEqual(result, 0)
+        self.assertTrue(resolved.exists())
+
+    def test_cleanup_clears_db_fields_for_exported_jobs(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+        self._set_file_age(resolved, 30)
+
+        job = self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path.as_posix(),
+            payload_hash="abc",
+        )
+
+        daily_payloads.cleanup_exported_payloads(older_than_days=7)
+
+        job.refresh_from_db()
+        self.assertEqual(job.storage_path, "")
+        self.assertEqual(job.payload_hash, "")
+
+    def test_cleanup_with_no_matching_files(self):
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=7)
+        self.assertEqual(result, 0)
+
+    def test_cleanup_without_older_than_days_deletes_all(self):
+        path = daily_payloads.build_daily_storage_path(
+            self.collection, date(2012, 3, 10)
+        )
+        resolved = self._write_payload_file(path)
+
+        self._create_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            storage_path=path.as_posix(),
+            payload_hash="abc",
+        )
+
+        result = daily_payloads.cleanup_exported_payloads(older_than_days=0)
+
+        self.assertEqual(result, 1)
+        self.assertFalse(resolved.exists())
+
+
+class CleanupTaskTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def test_task_cleanup_daily_payloads_calls_service(self):
+        with patch("metrics.services.daily_payloads.cleanup_exported_payloads") as mock_cleanup:
+            mock_cleanup.return_value = 5
+            from metrics.tasks import task_cleanup_daily_payloads
+
+            result = task_cleanup_daily_payloads.run(
+                collections=["books"],
+                older_than_days=7,
+            )
+
+        mock_cleanup.assert_called_once_with(
+            collections=["books"],
+            older_than_days=7,
+        )
+        self.assertEqual(result, {"deleted_payloads": 5})
+
+    def test_task_cleanup_with_defaults(self):
+        with patch("metrics.services.daily_payloads.cleanup_exported_payloads") as mock_cleanup:
+            mock_cleanup.return_value = 0
+            from metrics.tasks import task_cleanup_daily_payloads
+
+            result = task_cleanup_daily_payloads.run()
+
+        mock_cleanup.assert_called_once_with(
+            collections=[],
+            older_than_days=7,
+        )
+        self.assertEqual(result, {"deleted_payloads": 0})
diff --git a/metrics/tests/test_daily_jobs.py b/metrics/tests/test_daily_jobs.py
new file mode 100644
index 0000000..f31b410
--- /dev/null
+++ b/metrics/tests/test_daily_jobs.py
@@ -0,0 +1,162 @@
+from datetime import date, timedelta
+
+from django.contrib.auth import get_user_model
+from django.test import TestCase
+from django.utils import timezone
+from scielo_usage_counter.values import CONTENT_TYPE_FULL_TEXT, MEDIA_FORMAT_HTML
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+from metrics import services
+
+
+class DailyMetricJobServiceTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def _log_file(self, hash_value, status=choices.LOG_FILE_STATUS_QUEUED):
+        return LogFile.objects.create(
+            hash=hash_value,
+            path=f"/tmp/{hash_value}.log.gz",
+            stat_result={},
+            status=status,
+            collection=self.collection,
+            validation={"probably_date": "2012-03-10"},
+        )
+
+    def test_create_or_update_blocks_implicit_recompute_after_export(self):
+        first = self._log_file("1" * 32)
+        second = self._log_file("2" * 32)
+        DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTED,
+            input_log_hashes=[first.hash],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+        )
+
+        with self.assertRaises(RuntimeError):
+            services.create_or_update_daily_metric_job(
+                collection=self.collection,
+                access_date=date(2012, 3, 10),
+                log_files=[first, second],
+            )
+
+    def test_create_or_update_keeps_payload_for_export_retry(self):
+        log_file = self._log_file("1" * 32, status=choices.LOG_FILE_STATUS_ERROR)
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=[log_file.hash],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+            summary={"month_document_count": 1},
+        )
+
+        services.create_or_update_daily_metric_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            log_files=[log_file],
+        )
+
+        job.refresh_from_db()
+        self.assertEqual(job.status, DailyMetricJob.STATUS_PENDING)
+        self.assertEqual(job.storage_path, "books/2012/03/2012-03-10.json")
+        self.assertEqual(job.payload_hash, "abc")
+        self.assertEqual(job.summary, {"month_document_count": 1})
+
+    def test_create_or_update_clears_stale_payload_when_inputs_change_before_success(self):
+        first = self._log_file("1" * 32)
+        second = self._log_file("2" * 32)
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=[first.hash],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+            summary={"month_document_count": 1},
+        )
+
+        services.create_or_update_daily_metric_job(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            log_files=[first, second],
+        )
+
+        job.refresh_from_db()
+        self.assertEqual(job.input_log_hashes, sorted([first.hash, second.hash]))
+        self.assertEqual(job.storage_path, "")
+        self.assertEqual(job.payload_hash, "")
+        self.assertEqual(job.summary, {})
+
+    def test_release_stale_daily_metric_jobs_marks_logs_for_retry(self):
+        log_file = self._log_file("1" * 32, status=choices.LOG_FILE_STATUS_PARSING)
+        DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+            input_log_hashes=[log_file.hash],
+            export_started_at=timezone.now() - timedelta(minutes=120),
+        )
+
+        released = services.release_stale_daily_metric_jobs(stale_after_minutes=60)
+
+        log_file.refresh_from_db()
+        self.assertEqual(released, 1)
+        self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_ERROR)
+        self.assertIsNone(log_file.parse_heartbeat_at)
+
+    def test_process_line_discards_invalid_local_datetime_without_raising(self):
+        class FakeUtm:
+            def translate(self, url):
+                return {
+                    "book_id": "q7gtd",
+                    "pid_generic": "book:q7gtd",
+                    "media_language": "en",
+                    "media_format": MEDIA_FORMAT_HTML,
+                    "content_type": CONTENT_TYPE_FULL_TEXT,
+                }
+
+        log_file = self._log_file("1" * 32)
+        results = {}
+
+        is_valid, error = services.process_line(
+            results=results,
+            line={
+                "url": "/id/q7gtd",
+                "client_name": "browser",
+                "client_version": "1.0",
+                "ip_address": "127.0.0.1",
+                "country_code": "BR",
+                "local_datetime": None,
+            },
+            utm=FakeUtm(),
+            log_file=log_file,
+        )
+
+        self.assertFalse(is_valid)
+        self.assertIsNone(error)
+        self.assertEqual(results, {})
+
+    def test_mark_daily_metric_job_exported_records_updated_by(self):
+        user = get_user_model().objects.create_user(
+            username="tester",
+            email="tester@example.org",
+            password="secret",
+        )
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+        )
+
+        services.mark_daily_metric_job_exported(job, user=user)
+
+        job.refresh_from_db()
+        self.assertEqual(job.status, DailyMetricJob.STATUS_EXPORTED)
+        self.assertIsNotNone(job.exported_at)
diff --git a/metrics/tests/test_index_utils.py b/metrics/tests/test_index_utils.py
index 47f1a0e..562fc42 100644
--- a/metrics/tests/test_index_utils.py
+++ b/metrics/tests/test_index_utils.py
@@ -1,104 +1,894 @@
+import csv
 import unittest
+from datetime import datetime
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
 from scielo_usage_counter.values import (
-    MEDIA_FORMAT_UNDEFINED,
-    MEDIA_FORMAT_PDF,
-    MEDIA_FORMAT_HTML,
-    CONTENT_TYPE_UNDEFINED,
-    CONTENT_TYPE_FULL_TEXT,
     CONTENT_TYPE_ABSTRACT,
+    CONTENT_TYPE_FULL_TEXT,
+    CONTENT_TYPE_UNDEFINED,
     DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_HTML,
+    MEDIA_FORMAT_PDF,
+    MEDIA_FORMAT_UNDEFINED,
 )
 
-from metrics.utils import index_utils
+from metrics.counter import access, documents as index_docs
+from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
 
 
-class TestIndexUtils(unittest.TestCase):    
+class TestIndexUtils(unittest.TestCase):
     def test_is_valid_item_access_data_valid(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_PDF,
-            'content_type': CONTENT_TYPE_FULL_TEXT,
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertTrue(result)
 
     def test_is_valid_item_access_data_missing_scielo_issn(self):
         data = {
-            'scielo_issn': '',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_PDF,
-            'content_type': CONTENT_TYPE_FULL_TEXT,
+            "scielo_issn": "",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertFalse(result)
 
+    def test_is_valid_item_access_data_valid_book_source(self):
+        data = {
+            "source_type": "book",
+            "source_id": "q7gtd",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_generic": "BOOK:Q7GTD",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = access.is_valid_item_access_data(data)
+        self.assertTrue(result)
+
     def test_is_valid_item_access_data_undefined_media_format(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_UNDEFINED,
-            'content_type': CONTENT_TYPE_FULL_TEXT,
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_UNDEFINED,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertFalse(result)
 
     def test_is_valid_item_access_data_undefined_content_type(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_PDF,
-            'content_type': CONTENT_TYPE_UNDEFINED,
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_UNDEFINED,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertFalse(result)
 
     def test_is_valid_item_access_data_missing_pid_v2_and_pid_v3(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': '',
-            'pid_v3': '',
-            'media_format': MEDIA_FORMAT_PDF,
-            'content_type': CONTENT_TYPE_FULL_TEXT,
+            "scielo_issn": "1234-5678",
+            "pid_v2": "",
+            "pid_v3": "",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertFalse(result)
 
     def test_is_valid_item_access_data_media_format_html(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_HTML,
-            'content_type': CONTENT_TYPE_FULL_TEXT,
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertTrue(result)
 
     def test_is_valid_item_access_data_content_type_abstract(self):
         data = {
-            'scielo_issn': '1234-5678',
-            'pid_v2': 'S0102-67202020000100001',
-            'pid_v3': 'jGJccQ7bFdbz6wy3nfXGVdv',
-            'media_format': MEDIA_FORMAT_PDF,
-            'content_type': CONTENT_TYPE_ABSTRACT
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_ABSTRACT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertTrue(result)
 
-    def test_is_valid_item_acess_data_dataverse(self):
+    def test_is_valid_item_access_data_dataset_without_source_or_language_is_valid(self):
         data = {
-            'scielo_issn': DEFAULT_SCIELO_ISSN,
-            'pid_v2': None,
-            'pid_v3': None,
-            'pid_generic': 'DOI:10.48331/SCIELODATA.JLMAIY',
-            'media_format': MEDIA_FORMAT_HTML,
-            'content_type': CONTENT_TYPE_ABSTRACT,
+            "document_type": "dataset",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "DOI:10.48331/SCIELODATA.JLMAIY",
+            "media_language": "un",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_ABSTRACT,
         }
-        result, _ = index_utils.is_valid_item_access_data(data)
+        result, _ = access.is_valid_item_access_data(data)
         self.assertTrue(result)
+
+    def test_is_valid_item_access_data_missing_media_language_is_invalid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = access.is_valid_item_access_data(data)
+        self.assertFalse(result)
+
+    def test_extract_item_access_data_normalizes_source_fields_for_journal(self):
+        data = access.extract_item_access_data(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v2": "S0102-67202020000100001",
+                "media_language": "en",
+                "media_format": MEDIA_FORMAT_PDF,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "publication_year": "2024",
+                "journal_main_title": "Journal Title",
+                "journal_subject_area_capes": ["Health Sciences"],
+                "journal_subject_area_wos": ["Medicine"],
+                "journal_acronym": "testjou",
+                "journal_publisher_name": ["SciELO"],
+            },
+        )
+
+        self.assertEqual(data["source_type"], "journal")
+        self.assertEqual(data["source_id"], "1234-5678")
+        self.assertEqual(data["source_main_title"], "Journal Title")
+        self.assertEqual(data["source_acronym"], "testjou")
+
+    def test_extract_item_access_data_normalizes_source_fields_for_books(self):
+        data = access.extract_item_access_data(
+            "books",
+            {
+                "book_id": "q7gtd",
+                "book_title": "Book Title",
+                "title_pid_generic": "book:q7gtd",
+                "pid_generic": "book:q7gtd/chapter:03",
+                "media_language": "en",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "publication_year": "2023",
+            },
+        )
+
+        self.assertEqual(data["source_type"], "book")
+        self.assertEqual(data["source_id"], "q7gtd")
+        self.assertEqual(data["scielo_issn"], DEFAULT_SCIELO_ISSN)
+        self.assertEqual(data["source_main_title"], "Book Title")
+        self.assertEqual(data["title_pid_generic"], "BOOK:Q7GTD")
+
+    def test_extract_item_access_data_preserves_access_url_and_free_to_read(self):
+        data = access.extract_item_access_data(
+            "books",
+            {
+                "book_id": "c2248",
+                "book_title": "Book Title",
+                "title_pid_generic": "book:c2248",
+                "pid_generic": "book:c2248",
+                "media_language": "pt",
+                "media_format": MEDIA_FORMAT_PDF,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_url": "/id/c2248/pdf/freitas-9788599662830.pdf",
+                "source_access_type": "free_to_read",
+            },
+        )
+
+        self.assertEqual(data["access_url"], "/id/c2248/pdf/freitas-9788599662830.pdf")
+        self.assertEqual(data["counter_access_type"], "Free_To_Read")
+
+    def test_extract_item_access_data_tolerates_malformed_media_language(self):
+        data = access.extract_item_access_data(
+            "books",
+            {
+                "book_id": "q7gtd",
+                "pid_generic": "book:q7gtd",
+                "media_language": "'",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+
+        self.assertEqual(data["media_language"], "un")
+
+    def test_extract_item_access_data_normalizes_scielo_collection_document_types(self):
+        preprint = access.extract_item_access_data(
+            "preprints",
+            {
+                "pid_generic": "10.1590/SciELOPreprints.1234",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+        dataset = access.extract_item_access_data(
+            "data",
+            {
+                "pid_generic": "10.48331/scielodata.abc123",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_ABSTRACT,
+            },
+        )
+        article = access.extract_item_access_data(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+
+        self.assertEqual(preprint["source_type"], "preprint_server")
+        self.assertEqual(preprint["document_type"], "preprint")
+        self.assertEqual(dataset["source_type"], "data_repository")
+        self.assertEqual(dataset["document_type"], "dataset")
+        self.assertEqual(article["source_type"], "journal")
+        self.assertEqual(article["document_type"], "article")
+
+    def test_update_results_with_item_access_data_stores_source_and_periods(self):
+        results = {}
+        item_access_data = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "q7gtd",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:Q7GTD",
+            "title_pid_generic": "BOOK:Q7GTD",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2023",
+            "source_main_title": "Book Title",
+            "source_subject_area_capes": [],
+            "source_subject_area_wos": [],
+            "source_acronym": None,
+            "source_publisher_name": ["SciELO Books"],
+        }
+        line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
+        }
+
+        access.update_results_with_item_access_data(results, item_access_data, line)
+
+        self.assertEqual(len(results), 1)
+        result = next(iter(results.values()))
+        self.assertEqual(result["source"]["source_type"], "book")
+        self.assertEqual(result["source"]["source_id"], "q7gtd")
+        self.assertEqual(result["source"]["main_title"], "Book Title")
+        self.assertEqual(result["access_date"], "2024-01-15")
+        self.assertEqual(result["access_month"], "202401")
+        self.assertEqual(result["access_year"], "2024")
+        self.assertEqual(result["access_country_code"], "BR")
+        self.assertEqual(result["content_language"], "en")
+        self.assertEqual(result["title_pid_generic"], "BOOK:Q7GTD")
+        self.assertIn("user_session_id", result)
+
+    def test_update_results_with_item_access_data_rejects_invalid_local_datetime(self):
+        results = {}
+        item_access_data = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "q7gtd",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_generic": "BOOK:Q7GTD",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "local_datetime": None,
+        }
+
+        with self.assertRaises(ValueError):
+            access.update_results_with_item_access_data(results, item_access_data, line)
+
+        self.assertEqual(results, {})
+
+    def test_update_results_with_item_access_data_does_not_expand_book_into_segments(self):
+        results = {}
+        item_access_data = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "c2248",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:C2248",
+            "title_pid_generic": "BOOK:C2248",
+            "segment_pid_generics": [
+                "BOOK:C2248/CHAPTER:00",
+                "BOOK:C2248/CHAPTER:01",
+                "BOOK:C2248/CHAPTER:02",
+            ],
+            "media_language": "pt",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2018",
+            "source_main_title": "C2248 Book",
+        }
+        line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
+        }
+
+        access.update_results_with_item_access_data(results, item_access_data, line)
+
+        self.assertEqual(len(results), 1)
+        result = list(results.values())[0]
+        self.assertEqual(result["pid_generic"], "BOOK:C2248")
+
+    def test_double_click_filter_uses_url_bucket_for_same_item(self):
+        results = {}
+        item_access_data = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "c2248",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:C2248/CHAPTER:03",
+            "title_pid_generic": "BOOK:C2248",
+            "media_language": "pt",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2018",
+            "source_main_title": "C2248 Book",
+        }
+        base_line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+        }
+
+        access.update_results_with_item_access_data(
+            results,
+            item_access_data,
+            {
+                **base_line,
+                "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
+                "url": "/id/c2248/03",
+            },
+        )
+        access.update_results_with_item_access_data(
+            results,
+            item_access_data,
+            {
+                **base_line,
+                "local_datetime": datetime(2024, 1, 15, 10, 0, 20),
+                "url": "https://books.scielo.org/id/c2248/epub/03.html?x=1",
+            },
+        )
+
+        raw = next(iter(results.values()))
+        self.assertEqual(
+            set(raw["click_timestamps_by_url"]),
+            {"/id/c2248/03", "/id/c2248/epub/03.html"},
+        )
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(results)
+        month_item = metrics_data["month"]["books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"]
+
+        self.assertEqual(month_item["total_requests"], 2)
+        self.assertEqual(month_item["unique_requests"], 1)
+
+    def test_double_click_filter_collapses_same_url_within_30_seconds(self):
+        results = {}
+        item_access_data = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "c2248",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:C2248/CHAPTER:03",
+            "title_pid_generic": "BOOK:C2248",
+            "media_language": "pt",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2018",
+            "source_main_title": "C2248 Book",
+        }
+        base_line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "url": "/id/c2248/03?from=search",
+        }
+
+        access.update_results_with_item_access_data(
+            results,
+            item_access_data,
+            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 5)},
+        )
+        access.update_results_with_item_access_data(
+            results,
+            item_access_data,
+            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 20)},
+        )
+
+        raw = next(iter(results.values()))
+        self.assertEqual(
+            raw["click_timestamps_by_url"],
+            {"/id/c2248/03": {"00:05": 1, "00:20": 1}},
+        )
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(results)
+        month_item = metrics_data["month"]["books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"]
+
+        self.assertEqual(month_item["total_requests"], 1)
+        self.assertEqual(month_item["unique_requests"], 1)
+
+    def test_generate_index_names_for_year_and_month(self):
+        self.assertEqual(
+            generate_year_index_name("usage", "scl", "2024-01-15"),
+            "usage_yearly_scl_2024",
+        )
+        self.assertEqual(
+            generate_month_index_name("usage", "scl", "2024-01-15"),
+            "usage_monthly_scl_2024",
+        )
+        self.assertEqual(
+            generate_year_index_name("usage", "books", "2024-01-15"),
+            "usage_yearly_books",
+        )
+        self.assertEqual(
+            generate_month_index_name("usage", "books", "2024-01-15"),
+            "usage_monthly_books",
+        )
+
+    def test_convert_raw_results_to_index_documents_creates_month_and_year_views(self):
+        data = {
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|browser|1.0|127.0.0.1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:03",
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "en",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {
+                        "book_id": "q7gtd",
+                        "isbn": "9788578791889",
+                    },
+                    "city": "Sao Paulo",
+                    "country": "BR",
+                    "subject_area_capes": [],
+                    "subject_area_wos": [],
+                    "acronym": None,
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            }
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+
+        self.assertEqual(set(metrics_data.keys()), {"month", "year"})
+        self.assertEqual(len(metrics_data["month"]), 2)
+        self.assertEqual(len(metrics_data["year"]), 2)
+
+        month_item = metrics_data["month"]["books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|2024-01|Open|Regular|2023"]
+        self.assertEqual(month_item["access_month"], "2024-01")
+        self.assertNotIn("access_country_code", month_item)
+        self.assertNotIn("content_language", month_item)
+        self.assertEqual(month_item["document_type"], "chapter")
+        self.assertEqual(month_item["metric_scope"], "item")
+        self.assertEqual(month_item["counter_data_type"], "Book_Segment")
+        self.assertEqual(month_item["title_pid_generic"], "BOOK:Q7GTD")
+        self.assertEqual(month_item["total_requests"], 1)
+        self.assertEqual(month_item["unique_requests"], 1)
+        self.assertNotIn("scielo_issn", month_item["source"])
+        self.assertEqual(month_item["source"]["identifiers"]["book_id"], "q7gtd")
+        self.assertEqual(month_item["source"]["publisher"], ["SciELO Books"])
+
+        month_title = metrics_data["month"]["title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"]
+        self.assertEqual(month_title["document_type"], "book")
+        self.assertEqual(month_title["metric_scope"], "title")
+        self.assertEqual(month_title["counter_data_type"], "Book")
+        self.assertEqual(month_title["pid_generic"], "BOOK:Q7GTD")
+        self.assertEqual(month_title["total_requests"], 1)
+        self.assertEqual(month_title["total_investigations"], 1)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+        year_item = metrics_data["year"][
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|en|BR|2024|Open|Regular|2023"
+        ]
+        self.assertEqual(year_item["access_year"], "2024")
+        self.assertEqual(year_item["access_country_code"], "BR")
+        self.assertEqual(year_item["content_language"], "en")
+        self.assertEqual(year_item["metric_scope"], "item")
+        self.assertEqual(year_item["total_requests"], 1)
+
+        year_title = metrics_data["year"][
+            "title|books|q7gtd|||BOOK:Q7GTD|en|BR|2024|Open|Regular|2023"
+        ]
+        self.assertEqual(year_title["metric_scope"], "title")
+        self.assertEqual(year_title["total_requests"], 1)
+        self.assertEqual(year_title["total_investigations"], 1)
+        self.assertEqual(year_title["unique_requests"], 1)
+        self.assertEqual(year_title["unique_investigations"], 1)
+
+    def test_convert_raw_results_to_index_documents_maps_counter_data_types(self):
+        data = {
+            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|sess|BR|un|html|full_text": {
+                "collection": "preprints",
+                "source_key": "scielo-preprints",
+                "document_type": "preprint",
+                "pid_generic": "10.1590/SCIELOPREPRINTS.1234",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "un",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "preprint_server",
+                    "source_id": "scielo-preprints",
+                    "main_title": "SciELO Preprints",
+                },
+                "publication_year": "2024",
+            },
+            "data|scielo-data|||10.48331/SCIELODATA.ABC123|sess|BR|un|html|abstract": {
+                "collection": "data",
+                "source_key": "scielo-data",
+                "document_type": "dataset",
+                "pid_generic": "10.48331/SCIELODATA.ABC123",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "un",
+                "content_type": CONTENT_TYPE_ABSTRACT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "data_repository",
+                    "source_id": "scielo-data",
+                    "main_title": "SciELO Data",
+                },
+                "publication_year": "2024",
+            },
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+        preprint_doc = metrics_data["month"][
+            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|2024-01|Open|Regular|2024"
+        ]
+        dataset_doc = metrics_data["month"][
+            "data|scielo-data|||10.48331/SCIELODATA.ABC123|2024-01|Open|Regular|2024"
+        ]
+
+        self.assertEqual(preprint_doc["counter_data_type"], "Article")
+        self.assertEqual(preprint_doc["scielo_document_type"], "preprint")
+        self.assertEqual(preprint_doc["article_version"], "Preprint")
+        self.assertEqual(dataset_doc["counter_data_type"], "Dataset")
+        self.assertIsNone(dataset_doc["article_version"])
+
+    def test_convert_raw_results_to_index_documents_dedupes_book_unique_item_across_formats(self):
+        data = {
+            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|html|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248/CHAPTER:03",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|pdf|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248/CHAPTER:03",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:45": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+
+        month_item = metrics_data["month"]["books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"]
+        month_title = metrics_data["month"]["title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"]
+
+        self.assertEqual(month_item["total_requests"], 2)
+        self.assertEqual(month_item["total_investigations"], 2)
+        self.assertEqual(month_item["unique_requests"], 1)
+        self.assertEqual(month_item["unique_investigations"], 1)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+    def test_convert_raw_results_to_index_documents_skips_book_landing_page_from_item_scope(self):
+        data = {
+            "books|c2248|||BOOK:C2248|sess|BR|pt|html|abstract": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "book",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_ABSTRACT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+
+        self.assertEqual(
+            set(metrics_data["month"].keys()),
+            {"title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"},
+        )
+        self.assertEqual(
+            set(metrics_data["year"].keys()),
+            {"title|books|c2248|||BOOK:C2248|pt|BR|2024|Open|Regular|2018"},
+        )
+
+    def test_convert_raw_results_to_index_documents_counts_whole_book_without_segments_as_book_segment(self):
+        data = {
+            "books|c2248|||BOOK:C2248|sess|BR|pt|pdf|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "book",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+        month_item = metrics_data["month"]["books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"]
+        month_title = metrics_data["month"]["title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"]
+
+        self.assertEqual(month_item["counter_data_type"], "Book_Segment")
+        self.assertEqual(month_item["metric_scope"], "item")
+        self.assertEqual(month_title["counter_data_type"], "Book")
+        self.assertEqual(month_title["metric_scope"], "title")
+
+    def test_convert_raw_results_aggregates_multiple_chapters_correctly(self):
+        """Test that accessing multiple chapters creates correct title-level totals"""
+        data = {
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:01|session1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:01",
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "session1",
+                "click_timestamps": {"00:05": 1},
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {"book_id": "q7gtd"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            },
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:02|session1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:02",
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "session1",  # SAME SESSION
+                "click_timestamps": {"00:10": 1},
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {"book_id": "q7gtd"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            },
+        }
+
+        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
+
+        # Should have 2 item documents (one per chapter) + 2 title documents (month and year)
+        self.assertEqual(len(metrics_data["month"]), 3)  # 2 items + 1 title
+        self.assertEqual(len(metrics_data["year"]), 3)  # 2 items + 1 title
+
+        # Each item should have total=1, unique=1
+        month_item_1 = metrics_data["month"]["books|q7gtd|||BOOK:Q7GTD/CHAPTER:01|2024-01|Open|Regular|2023"]
+        self.assertEqual(month_item_1["total_requests"], 1)
+        self.assertEqual(month_item_1["unique_requests"], 1)
+
+        month_item_2 = metrics_data["month"]["books|q7gtd|||BOOK:Q7GTD/CHAPTER:02|2024-01|Open|Regular|2023"]
+        self.assertEqual(month_item_2["total_requests"], 1)
+        self.assertEqual(month_item_2["unique_requests"], 1)
+
+        # Title should have total=2 (sum of both chapters)
+        # Title unique should be 1 (same session accessed book, counted once)
+        month_title = metrics_data["month"]["title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"]
+        self.assertEqual(month_title["total_requests"], 2)
+        self.assertEqual(month_title["total_investigations"], 2)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+    def test_export_book_r51_monthly_metrics_writes_counter_title_columns(self):
+        from metrics.management.commands.export_book_r51_monthly_metrics import Command
+
+        command = Command()
+        monthly_documents = command._build_monthly_documents(
+            {
+                "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|pdf|full_text": {
+                    "collection": "books",
+                    "source_key": "c2248",
+                    "document_type": "chapter",
+                    "pid_v2": None,
+                    "pid_v3": None,
+                    "pid_generic": "BOOK:C2248/CHAPTER:03",
+                    "title_pid_generic": "BOOK:C2248",
+                    "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                    "click_timestamps": {"00:05": 1},
+                    "access_country_code": "BR",
+                    "content_language": "pt",
+                    "content_type": CONTENT_TYPE_FULL_TEXT,
+                    "access_date": "2024-01-15",
+                    "access_year": "2024",
+                    "source": {
+                        "source_type": "book",
+                        "source_id": "c2248",
+                        "main_title": "C2248 Book",
+                        "identifiers": {"book_id": "c2248"},
+                        "publisher_name": ["SciELO Books"],
+                    },
+                    "publication_year": "2018",
+                }
+            }
+        )
+
+        with TemporaryDirectory() as tmpdir:
+            title_path = Path(tmpdir) / "title.csv"
+            command._write_title_csv(title_path, monthly_documents["title"])
+
+            with title_path.open(newline="") as fh:
+                reader = csv.DictReader(fh)
+                rows = list(reader)
+
+        self.assertEqual(
+            reader.fieldnames,
+            [
+                "year_month",
+                "title_pid_generic",
+                "document_type",
+                "total_item_requests",
+                "total_item_investigations",
+                "unique_title_requests",
+                "unique_title_investigations",
+            ],
+        )
+        self.assertNotIn("total_title_requests", reader.fieldnames)
+        self.assertEqual(rows[0]["year_month"], "2024-01")
+        self.assertEqual(rows[0]["total_item_requests"], "1")
+        self.assertEqual(rows[0]["unique_title_requests"], "1")
diff --git a/metrics/tests/test_opensearch.py b/metrics/tests/test_opensearch.py
new file mode 100644
index 0000000..80586f9
--- /dev/null
+++ b/metrics/tests/test_opensearch.py
@@ -0,0 +1,92 @@
+from unittest import TestCase
+from unittest.mock import Mock, patch
+
+from django.test import override_settings
+
+from metrics import opensearch
+
+
+class OpenSearchUsageClientTests(TestCase):
+    @patch.object(opensearch.OpenSearchUsageClient, "get_opensearch_client")
+    def test_create_index_sends_mappings_in_request_body(self, mock_get_client):
+        mock_client = Mock()
+        mock_get_client.return_value = mock_client
+
+        client = opensearch.OpenSearchUsageClient(url="https://example.org:9200")
+        client.create_index(
+            index_name="usage_monthly_books_202506",
+            mappings=opensearch.MONTH_INDEX_MAPPINGS,
+        )
+
+        mock_client.indices.create.assert_called_once_with(
+            index="usage_monthly_books_202506",
+            body={
+                "settings": {"index": {"number_of_replicas": 0}},
+                "mappings": opensearch.MONTH_INDEX_MAPPINGS,
+            },
+        )
+
+    @override_settings(
+        OPENSEARCH_VERIFY_CERTS=True,
+        OPENSEARCH_BASIC_AUTH=None,
+        OPENSEARCH_API_KEY=None,
+    )
+    @patch("metrics.opensearch.client.OpenSearch")
+    def test_verify_certs_false_explicitly_overrides_settings(self, mock_opensearch):
+        opensearch.OpenSearchUsageClient(
+            url="https://example.org:9200",
+            verify_certs=False,
+        )
+
+        mock_opensearch.assert_called_once_with(
+            "https://example.org:9200",
+            verify_certs=False,
+        )
+
+    def test_get_index_mappings_returns_books_specific_mappings(self):
+        self.assertIs(
+            opensearch.get_index_mappings("books", "month"),
+            opensearch.BOOKS_MONTH_INDEX_MAPPINGS,
+        )
+        self.assertIs(
+            opensearch.get_index_mappings("books", "year"),
+            opensearch.BOOKS_YEAR_INDEX_MAPPINGS,
+        )
+        self.assertIn("metric_scope", opensearch.BOOKS_MONTH_INDEX_MAPPINGS["properties"])
+        self.assertIn("counter_data_type", opensearch.BOOKS_YEAR_INDEX_MAPPINGS["properties"])
+        self.assertIn("title_pid_generic", opensearch.BOOKS_YEAR_INDEX_MAPPINGS["properties"])
+        self.assertIn("applied_jobs", opensearch.BOOKS_MONTH_INDEX_MAPPINGS["properties"])
+
+    @patch("metrics.opensearch.client.helpers.bulk")
+    @patch.object(opensearch.OpenSearchUsageClient, "get_opensearch_client")
+    def test_increment_documents_for_daily_job_uses_applied_jobs(
+        self,
+        mock_get_client,
+        mock_bulk,
+    ):
+        mock_get_client.return_value = Mock()
+        client = opensearch.OpenSearchUsageClient(url="https://example.org:9200")
+
+        client.increment_documents_for_daily_job(
+            index_name="usage_monthly_books_202506",
+            documents={
+                "doc-1": {
+                    "collection": "books",
+                    "pid": "BOOK:WD",
+                    "pid_generic": "BOOK:WD",
+                    "access_date": "2025-06-03",
+                    "total_requests": 3,
+                    "total_investigations": 4,
+                    "unique_requests": 2,
+                    "unique_investigations": 3,
+                }
+            },
+            job_id="books|2025-06-03|abc123",
+        )
+
+        actions = list(mock_bulk.call_args.args[1])
+        self.assertEqual(len(actions), 1)
+        action = actions[0]
+        self.assertEqual(action["_op_type"], "update")
+        self.assertEqual(action["script"]["params"]["job_id"], "books|2025-06-03|abc123")
+        self.assertEqual(action["upsert"], {"applied_jobs": []})
diff --git a/metrics/tests/test_tasks.py b/metrics/tests/test_tasks.py
new file mode 100644
index 0000000..932944f
--- /dev/null
+++ b/metrics/tests/test_tasks.py
@@ -0,0 +1,268 @@
+from datetime import date, timedelta
+from unittest.mock import patch
+
+from django.test import TestCase
+from django.utils import timezone
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics import tasks
+from metrics.models import DailyMetricJob
+
+
+class ParseLogsTaskTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def _log_file(self, hash_value, probably_date, status=choices.LOG_FILE_STATUS_QUEUED):
+        return LogFile.objects.create(
+            hash=hash_value,
+            path=f"/tmp/{hash_value}.log.gz",
+            stat_result={},
+            status=status,
+            collection=self.collection,
+            date=date.fromisoformat(probably_date),
+            validation={"probably_date": probably_date},
+        )
+
+    def test_task_parse_logs_enqueues_one_daily_job_per_collection_date(self):
+        first = self._log_file("1" * 32, "2012-03-10")
+        second = self._log_file("2" * 32, "2012-03-10")
+        third = self._log_file("3" * 32, "2012-03-15")
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_parse_logs.run(
+                collections=["books"],
+                include_logs_with_error=False,
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        self.assertEqual(result["enqueued_jobs"], 2)
+        self.assertEqual(mocked_apply_async.call_count, 2)
+        jobs = list(DailyMetricJob.objects.order_by("access_date"))
+        self.assertEqual([job.access_date for job in jobs], [date(2012, 3, 10), date(2012, 3, 15)])
+        self.assertEqual(jobs[0].input_log_hashes, sorted([first.hash, second.hash]))
+        self.assertEqual(jobs[1].input_log_hashes, [third.hash])
+
+    def test_task_parse_logs_allows_queue_override_and_robots_source(self):
+        self._log_file("1" * 32, "2012-03-10")
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            tasks.task_parse_logs.run(
+                collections=["books"],
+                include_logs_with_error=False,
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+                queue_name="parse_small_mult",
+                robots_source="counter",
+            )
+
+        mocked_apply_async.assert_called_once()
+        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult")
+        self.assertEqual(mocked_apply_async.call_args.kwargs["args"][-1], "counter")
+
+    def test_task_parse_logs_skip_log_hashes_prevents_reprocessing_same_auto_run(self):
+        skipped = self._log_file("1" * 32, "2012-03-10", status=choices.LOG_FILE_STATUS_ERROR)
+        queued = self._log_file("2" * 32, "2012-03-11")
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_parse_logs.run(
+                collections=["books"],
+                include_logs_with_error=True,
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+                skip_log_hashes=[skipped.hash],
+            )
+
+        mocked_apply_async.assert_called_once()
+        job = DailyMetricJob.objects.get()
+        self.assertEqual(job.input_log_hashes, [queued.hash])
+        self.assertEqual(result["enqueued_jobs"], 1)
+
+    def test_wait_parse_logs_wave_rechecks_until_daily_jobs_complete(self):
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+        )
+
+        with patch("metrics.tasks.task_wait_parse_logs_wave.apply_async") as mocked_wait_apply_async:
+            with patch("metrics.tasks.task_parse_logs.apply_async") as mocked_parse_logs_apply_async:
+                result = tasks.task_wait_parse_logs_wave.run(
+                    wave_log_hashes=[job.pk],
+                    collections=["books"],
+                    include_logs_with_error=False,
+                    max_log_files=2,
+                    auto_reexecute=True,
+                )
+
+        self.assertEqual(result, {"wave_completed": False, "reexecution_enqueued": False})
+        mocked_parse_logs_apply_async.assert_not_called()
+        mocked_wait_apply_async.assert_called_once()
+
+
+class ResumeDailyMetricJobTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def test_resume_log_exports_requeues_error_daily_jobs(self):
+        log_file = LogFile.objects.create(
+            hash="1" * 32,
+            path="/tmp/1.log.gz",
+            stat_result={},
+            status=choices.LOG_FILE_STATUS_ERROR,
+            collection=self.collection,
+            date=date(2012, 3, 10),
+        )
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=[log_file.hash],
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+                queue_name="parse_small_mult",
+            )
+
+        mocked_apply_async.assert_called_once()
+        self.assertEqual(mocked_apply_async.call_args.kwargs["args"][0], job.pk)
+        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult")
+        self.assertEqual(result["resumed_logs"], 1)
+
+    def test_resume_log_exports_clears_payload_when_current_logs_change(self):
+        log_file = LogFile.objects.create(
+            hash="2" * 32,
+            path="/tmp/2.log.gz",
+            stat_result={},
+            status=choices.LOG_FILE_STATUS_QUEUED,
+            collection=self.collection,
+            date=date(2012, 3, 10),
+        )
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=["1" * 32],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+            summary={"month_document_count": 1},
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async"):
+            tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        job.refresh_from_db()
+        self.assertEqual(job.input_log_hashes, [log_file.hash])
+        self.assertEqual(job.storage_path, "")
+        self.assertEqual(job.payload_hash, "")
+        self.assertEqual(job.summary, {})
+
+    def test_resume_log_exports_preserves_payload_when_current_logs_match(self):
+        log_file = LogFile.objects.create(
+            hash="1" * 32,
+            path="/tmp/1.log.gz",
+            stat_result={},
+            status=choices.LOG_FILE_STATUS_ERROR,
+            collection=self.collection,
+            date=date(2012, 3, 10),
+        )
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=[log_file.hash],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+            summary={"month_document_count": 1},
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async"):
+            tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        job.refresh_from_db()
+        self.assertEqual(job.storage_path, "books/2012/03/2012-03-10.json")
+        self.assertEqual(job.payload_hash, "abc")
+        self.assertEqual(job.summary, {"month_document_count": 1})
+
+    def test_resume_log_exports_requeues_stored_payload_without_current_logs(self):
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+            input_log_hashes=["1" * 32],
+            storage_path="books/2012/03/2012-03-10.json",
+            payload_hash="abc",
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        mocked_apply_async.assert_called_once()
+        self.assertEqual(mocked_apply_async.call_args.kwargs["args"][0], job.pk)
+        self.assertEqual(result["resumed_jobs"], 1)
+
+    def test_resume_log_exports_skips_jobs_without_logs_or_payload(self):
+        DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_ERROR,
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        mocked_apply_async.assert_not_called()
+        self.assertEqual(result["resumed_jobs"], 0)
+
+    def test_resume_log_exports_releases_stale_exporting_jobs(self):
+        log_file = LogFile.objects.create(
+            hash="1" * 32,
+            path="/tmp/1.log.gz",
+            stat_result={},
+            status=choices.LOG_FILE_STATUS_ERROR,
+            collection=self.collection,
+            date=date(2012, 3, 10),
+        )
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+            input_log_hashes=[log_file.hash],
+            export_started_at=timezone.now() - timedelta(minutes=120),
+        )
+
+        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
+            result = tasks.task_resume_log_exports.run(
+                collections=["books"],
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+                stale_after_minutes=60,
+        )
+
+        job.refresh_from_db()
+        self.assertEqual(job.status, DailyMetricJob.STATUS_PENDING)
+        mocked_apply_async.assert_called_once()
+        self.assertEqual(result["released_stale_batches"], 1)
diff --git a/metrics/utils/__init__.py b/metrics/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/metrics/utils/index_utils.py b/metrics/utils/index_utils.py
deleted file mode 100644
index 76af8c2..0000000
--- a/metrics/utils/index_utils.py
+++ /dev/null
@@ -1,331 +0,0 @@
-from scielo_usage_counter.counter import compute_r5_metrics
-from scielo_usage_counter.values import CONTENT_TYPE_UNDEFINED, MEDIA_FORMAT_UNDEFINED
-
-from core.utils import standardizer
-from core.utils.date_utils import extract_minute_second_key, truncate_datetime_to_hour
-
-
-def generate_user_session_id(client_name, client_version, ip_address, datetime, sep='|'):
-    """
-    Generates a user session ID based on the provided parameters.
-    
-    Parameters:
-        client_name (str): The name of the client.
-        client_version (str): The version of the client.
-        ip_address (str): The IP address of the user.
-        datetime (datetime): The datetime object representing the session time.
-        sep (str): The separator to use in the ID. Default is '|'.
-    
-    Returns:
-        str: A user session ID formatted as a string.
-    """
-    dt_year_month_day = datetime.strftime('%Y-%m-%d')
-    dt_hour = datetime.strftime('%H')
-    
-    return sep.join([
-        str(client_name),
-        str(client_version),
-        str(ip_address),
-        str(dt_year_month_day),
-        str(dt_hour),
-    ])
-
-
-def generate_item_access_id(col_acron3, scielo_issn, pid_v2, pid_v3, pid_generic, user_session_id, country_code, media_language, media_format, content_type, sep='|'):
-    """
-    Generates an item access ID based on the provided parameters.
-
-    Parameters:
-        col_acron3 (str): The acronym of the collection.
-        scielo_issn (str): The ISSN of the SciELO journal.
-        pid_v2 (str): The PID version 2.
-        pid_v3 (str): The PID version 3.
-        pid_generic (str): The generic PID.
-        user_session_id (str): The user session ID.
-        country_code (str): The country code of the user.
-        media_language (str): The language of the media.
-        media_format (str): The format of the media.
-        content_type (str): The type of content.
-        sep (str): The separator to use in the ID. Default is '|'.
-    """
-    return sep.join([
-        col_acron3,
-        scielo_issn,
-        pid_v2 or '',
-        pid_v3 or '',
-        pid_generic or '',
-        user_session_id,
-        country_code,
-        media_language,
-        media_format,
-        content_type,
-    ])
-
-
-def generate_index_name(index_prefix: str, collection: str, date: str):
-    """ Generates an index name based on the provided parameters.
-    Parameters:
-        index_prefix (str): The prefix for the index name.
-        collection (str): The collection acronym.
-        date (str): The date string in 'YYYY-MM-DD' format.
-    Returns:
-        str: The formatted index name.
-    """
-    if not date or not isinstance(date, str):
-        raise ValueError("Date must be a non-empty string in 'YYYY-MM-DD' format.")
-    
-    if not collection or not isinstance(collection, str):
-        raise ValueError("Collection must be a non-empty string.")
-    
-    if not index_prefix or not isinstance(index_prefix, str):
-        raise ValueError("Index prefix must be a non-empty string.")
-
-    index_year, _, _ = date.split('-')
-    return f'{index_prefix}_{collection}_{index_year}'
-
-
-def generate_index_id(collection, journal, pid_v2, pid_v3, pid_generic, media_language, country_code, date_str):
-    """
-    Generates a unique index key based on the provided parameters. 
-    This is different from the item access ID as it does not include user session, media_format, and content_type information.
-    It is used for indexing purposes.
-
-    Parameters:
-        collection (str): The collection acronym.
-        journal (str): The journal name.
-        pid_v2 (str): The PID version 2.
-        pid_v3 (str): The PID version 3.
-        pid_generic (str): The generic PID.
-        media_language (str): The media language code.
-        country_code (str): The country code.
-        date_str (str): The date string in 'YYYY-MM-DD' format.
-    
-    Returns:
-        str: A unique index key formatted as a string.
-    """
-    return '|'.join([
-        collection,
-        journal,
-        pid_v2 or '',
-        pid_v3 or '',
-        pid_generic or '',
-        media_language,
-        country_code,
-        date_str
-    ])
-
-
-def extract_item_access_data(collection_acron3:str, translated_url: dict):
-    """
-    Extracts item access data from the translated URL and standardizes it.
-
-    Args:
-        collection_acron3 (str): The acronym of the collection.
-        translated_url (dict): The translated URL containing metadata.
-    
-    Returns:
-        dict: A dictionary containing standardized item access data, or None if the data is invalid.
-    """
-    if not translated_url or not isinstance(translated_url, dict):
-        return {}
-    
-    item_access_data = {
-        'collection': collection_acron3,
-        'scielo_issn': translated_url.get('scielo_issn'),
-        'pid_v2': standardizer.standardize_pid_v2(translated_url.get('pid_v2')),
-        'pid_v3': standardizer.standardize_pid_v3(translated_url.get('pid_v3')),
-        'pid_generic': standardizer.standardize_pid_generic(translated_url.get('pid_generic')),
-        'media_language': standardizer.standardize_language_code(translated_url.get('media_language')),
-        'media_format': translated_url.get('media_format'),
-        'content_type': translated_url.get('content_type'),
-        'year_of_publication': standardizer.standardize_year_of_publication(translated_url.get('year_of_publication')),
-        'journal_main_title': translated_url.get('journal_main_title'),
-        'journal_subject_area_capes': translated_url.get('journal_subject_area_capes'),
-        'journal_subject_area_wos': translated_url.get('journal_subject_area_wos'),
-        'journal_acronym': translated_url.get('journal_acronym'),
-        'journal_publisher_name': translated_url.get('journal_publisher_name'),
-    }
-        
-    return item_access_data
-
-
-def is_valid_item_access_data(data: dict, utm=None, ignore_utm_validation=False):
-    """
-    Validates the item access data based on the provided parameters.
-
-    Parameters:
-        data (dict): A dictionary containing the following keys:
-            - scielo_issn (str): The ISSN of the SciELO journal.
-            - pid_v2 (str): The PID version 2 of the document.
-            - pid_v3 (str): The PID version 3 of the document.
-            - media_format (str): The media format of the document.
-            - content_type (str): The content type of the document.
-        utm: URL translation manager for converting URLs
-        ignore_utm_validation (bool): If True, skips validation against the URL translation manager.
-
-    Returns:
-        tuple: A tuple containing a boolean indicating whether the data is valid and a message.
-        If the data is valid, the first element is True and the second element is a success message.
-        If the data is invalid, the first element is False and the second element is an error message.
-    """
-    if not isinstance(data, dict):
-        return False, {'message': 'Invalid data format. Expected a dictionary.', 'code': 'invalid_format'}
-
-    scielo_issn = data.get('scielo_issn')
-    media_format = data.get('media_format')
-    content_type = data.get('content_type')
-    pid_v2 = data.get('pid_v2')
-    pid_v3 = data.get('pid_v3')
-    pid_generic = data.get('pid_generic')
-
-    if not all([
-        scielo_issn,
-        media_format and media_format != MEDIA_FORMAT_UNDEFINED,
-        content_type and content_type != CONTENT_TYPE_UNDEFINED,
-        pid_v2 or pid_v3 or pid_generic,
-    ]):
-        return False, {'message': 'Missing required fields in item access data.', 'code': 'missing_fields'}
-    
-    # Check ISSN and PIDs validity using the URL translation manager
-    if utm and not ignore_utm_validation:
-        if not utm.is_valid_code(scielo_issn, utm.journals_metadata['issn_set']):
-            return False, {'message': f'Invalid scielo_issn: {scielo_issn}', 'code': 'invalid_scielo_issn'}
-        
-        if pid_v2 and not utm.is_valid_code(pid_v2, utm.articles_metadata['pid_set']):
-            return False, {'message': f'Invalid pid_v2: {pid_v2}', 'code': 'invalid_pid_v2'}
-        
-        if pid_v3 and not utm.is_valid_code(pid_v3, utm.articles_metadata['pid_set']):
-            return False, {'message': f'Invalid pid_v3: {pid_v3}', 'code': 'invalid_pid_v3'}
-        
-        if pid_generic and not utm.is_valid_code(pid_generic, utm.articles_metadata['pid_set']):
-            return False, {'message': f'Invalid pid_generic: {pid_generic}', 'code': 'invalid_pid_generic'}
-
-    return True, {'message': 'Item access data is valid.', 'code': 'valid'}
-
-
-def update_results_with_item_access_data(results: dict, item_access_data: dict, line: dict):
-    """
-    Updates the item access data with the information from the log line.
-
-    Args:
-        data (dict): The dictionary to store item access data.
-        item_access_data (dict): The item access data extracted from the translated URL.
-        line (dict): The log line containing additional information.
-    
-    Returns:
-        None.
-    """
-    col_acron3 = item_access_data.get('collection')
-    scielo_issn = item_access_data.get('scielo_issn')
-    pid_v2 = item_access_data.get('pid_v2')
-    pid_v3 = item_access_data.get('pid_v3')
-    pid_generic = item_access_data.get('pid_generic')
-
-    media_format = item_access_data.get('media_format')
-    media_language = item_access_data.get('media_language')
-    content_type = item_access_data.get('content_type')
-
-    client_name = line.get('client_name')
-    client_version = line.get('client_version')
-    local_datetime = line.get('local_datetime')
-    country_code = line.get('country_code')
-    ip_address = line.get('ip_address')
-
-    truncated_datetime = truncate_datetime_to_hour(local_datetime)
-    ms_key = extract_minute_second_key(local_datetime)
-
-    user_session_id = generate_user_session_id(
-        client_name, 
-        client_version, 
-        ip_address, 
-        truncated_datetime,
-    )
-
-    item_access_id = generate_item_access_id(
-        user_session_id=user_session_id, 
-        col_acron3=col_acron3, 
-        scielo_issn=scielo_issn, 
-        pid_v2=pid_v2, 
-        pid_v3=pid_v3, 
-        pid_generic=pid_generic, 
-        media_language=media_language, 
-        country_code=country_code, 
-        media_format=media_format, 
-        content_type=content_type, 
-    )
-
-    if item_access_id not in results:
-        results[item_access_id] = {
-            'click_timestamps': {ms_key: 0},
-            'media_format': media_format,
-            'media_language': media_language,
-            'content_type': content_type,
-            'country_code': country_code,
-            'date_str': truncated_datetime.strftime('%Y-%m-%d'),
-            'date': truncated_datetime,
-            'year_of_publication': item_access_data.get('year_of_publication'),
-            'journal': {
-                'scielo_issn': item_access_data.get('scielo_issn'),
-                'main_title': item_access_data.get('journal_main_title'),
-                'subject_area_capes': item_access_data.get('journal_subject_area_capes'),
-                'subject_area_wos': item_access_data.get('journal_subject_area_wos'),
-                'acronym': item_access_data.get('journal_acronym'),
-                'publisher_name': item_access_data.get('journal_publisher_name'),
-            },
-        }
-
-    # Check if the click timestamp for this minute-second key exists, if not, initialize it
-    if ms_key not in results[item_access_id]['click_timestamps']:
-        results[item_access_id]['click_timestamps'][ms_key] = 0
-
-    # Increment the click timestamp count
-    results[item_access_id]['click_timestamps'][ms_key] += 1
-
-
-def convert_to_index_documents(data: dict, key_sep='|'):
-    """
-    Converts the provided data into a format suitable for indexing metrics.
-    This function processes the data dictionary, extracting relevant fields and computing metrics.
-    
-    Args:
-        data (dict): A dictionary containing the metrics data to be processed.
-
-    Returns:
-        dict: A dictionary containing the processed metrics data, ready for indexing.
-    """
-    if not isinstance(data, dict):
-        return {}
-    
-    metrics_data = {}
-
-    for key, value in data.items():
-        collection, scielo_issn, pid_v2, pid_v3, pid_generic, _, _, _, _, _, country_code, media_language, _, content_type = key.split(key_sep)
-
-        document_id = generate_index_id(
-            collection, 
-            scielo_issn, 
-            pid_v2, 
-            pid_v3, 
-            pid_generic, 
-            media_language, 
-            country_code, 
-            value.get('date_str')
-        )
-
-        compute_r5_metrics(
-            document_id,
-            metrics_data,
-            collection,
-            value.get('journal'),
-            pid_v2,
-            pid_v3,
-            pid_generic,
-            value.get('year_of_publication'),
-            media_language,
-            value.get('country_code'),
-            value.get('date_str'),
-            value.get('click_timestamps'),
-            content_type,
-        )
-    
-    return metrics_data
diff --git a/metrics/wagtail_hooks.py b/metrics/wagtail_hooks.py
new file mode 100644
index 0000000..94c2ffb
--- /dev/null
+++ b/metrics/wagtail_hooks.py
@@ -0,0 +1,22 @@
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet
+
+from metrics.models import DailyMetricJob
+
+class DailyMetricJobSnippetViewSet(SnippetViewSet):
+    model = DailyMetricJob
+    menu_label = _("Daily Metric Jobs")
+    icon = "history"
+    menu_order = 600
+    list_display = (
+        "collection",
+        "access_date",
+        "status",
+        "input_log_count",
+        "attempts",
+        "export_started_at",
+        "exported_at",
+        "updated",
+    )
+    list_filter = ("status", "collection", "access_date")
+    search_fields = ("collection__acron3", "error_message")
diff --git a/article/management/__init__.py b/reports/__init__.py
similarity index 100%
rename from article/management/__init__.py
rename to reports/__init__.py
diff --git a/reports/apps.py b/reports/apps.py
new file mode 100644
index 0000000..119ca26
--- /dev/null
+++ b/reports/apps.py
@@ -0,0 +1,8 @@
+from django.apps import AppConfig
+from django.utils.translation import gettext_lazy as _
+
+
+class ReportsConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "reports"
+    verbose_name = _("Reports")
diff --git a/reports/migrations/0001_initial.py b/reports/migrations/0001_initial.py
new file mode 100644
index 0000000..2a72923
--- /dev/null
+++ b/reports/migrations/0001_initial.py
@@ -0,0 +1,140 @@
+# Generated by Django 5.2.12 on 2026-05-01 15:50
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="MonthlyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                ("month", models.IntegerField(verbose_name="Month")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Monthly Log Report",
+                "verbose_name_plural": "Monthly Log Reports",
+                "ordering": ["-year", "-month", "collection__acron3"],
+                "unique_together": {("collection", "year", "month")},
+            },
+        ),
+        migrations.CreateModel(
+            name="WeeklyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                ("week", models.IntegerField(verbose_name="ISO Week")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Weekly Log Report",
+                "verbose_name_plural": "Weekly Log Reports",
+                "ordering": ["-year", "-week", "collection__acron3"],
+                "unique_together": {("collection", "year", "week")},
+            },
+        ),
+        migrations.CreateModel(
+            name="YearlyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Yearly Log Report",
+                "verbose_name_plural": "Yearly Log Reports",
+                "ordering": ["-year", "collection__acron3"],
+                "unique_together": {("collection", "year")},
+            },
+        ),
+    ]
diff --git a/reports/migrations/0002_alter_monthlylogreport_options_and_more.py b/reports/migrations/0002_alter_monthlylogreport_options_and_more.py
new file mode 100644
index 0000000..659215c
--- /dev/null
+++ b/reports/migrations/0002_alter_monthlylogreport_options_and_more.py
@@ -0,0 +1,36 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("reports", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name="monthlylogreport",
+            options={
+                "ordering": ["collection__acron3", "year", "month"],
+                "verbose_name": "Monthly Log Report",
+                "verbose_name_plural": "Monthly Log Reports",
+            },
+        ),
+        migrations.AlterModelOptions(
+            name="weeklylogreport",
+            options={
+                "ordering": ["collection__acron3", "year", "week"],
+                "verbose_name": "Weekly Log Report",
+                "verbose_name_plural": "Weekly Log Reports",
+            },
+        ),
+        migrations.AlterModelOptions(
+            name="yearlylogreport",
+            options={
+                "ordering": ["collection__acron3", "year"],
+                "verbose_name": "Yearly Log Report",
+                "verbose_name_plural": "Yearly Log Reports",
+            },
+        ),
+    ]
diff --git a/article/management/commands/__init__.py b/reports/migrations/__init__.py
similarity index 100%
rename from article/management/commands/__init__.py
rename to reports/migrations/__init__.py
diff --git a/reports/models.py b/reports/models.py
new file mode 100644
index 0000000..3af1ec8
--- /dev/null
+++ b/reports/models.py
@@ -0,0 +1,100 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+
+
+class AbstractLogReport(models.Model):
+    collection = models.ForeignKey(
+        Collection,
+        on_delete=models.CASCADE,
+        verbose_name=_("Collection"),
+    )
+    total_files = models.IntegerField(default=0)
+    created_files = models.IntegerField(default=0)
+    validated_files = models.IntegerField(default=0)
+    invalidated_files = models.IntegerField(default=0)
+    errored_files = models.IntegerField(default=0)
+    lines_parsed = models.IntegerField(default=0)
+    valid_lines = models.IntegerField(default=0)
+    discarded_lines = models.IntegerField(default=0)
+    ip_local_count = models.IntegerField(default=0)
+    ip_remote_count = models.IntegerField(default=0)
+    ip_unknown_count = models.IntegerField(default=0)
+    generated_at = models.DateTimeField(auto_now=True)
+
+    class Meta:
+        abstract = True
+
+    @property
+    def pct_validated(self):
+        if not self.total_files:
+            return 0
+        return round(self.validated_files / self.total_files * 100, 1)
+    pct_validated.fget.short_description = _("% Valid Files")
+
+    @property
+    def pct_valid_lines(self):
+        if not self.lines_parsed:
+            return 0
+        return round(self.valid_lines / self.lines_parsed * 100, 1)
+    pct_valid_lines.fget.short_description = _("% Valid Lines")
+
+    @property
+    def pct_remote_ip(self):
+        total = self.ip_remote_count + self.ip_local_count
+        if not total:
+            return 0
+        return round(self.ip_remote_count / total * 100, 1)
+    pct_remote_ip.fget.short_description = _("% Remote IP")
+
+    def __str__(self):
+        return f"{self.collection.acron3} {self.period_label}"
+
+    @property
+    def period_label(self):
+        raise NotImplementedError
+
+
+class WeeklyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+    week = models.IntegerField(verbose_name=_("ISO Week"))
+
+    class Meta:
+        unique_together = [("collection", "year", "week")]
+        ordering = ["collection__acron3", "year", "week"]
+        verbose_name = _("Weekly Log Report")
+        verbose_name_plural = _("Weekly Log Reports")
+
+    @property
+    def period_label(self):
+        return f"{self.year}-W{self.week:02d}"
+
+
+class MonthlyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+    month = models.IntegerField(verbose_name=_("Month"))
+
+    class Meta:
+        unique_together = [("collection", "year", "month")]
+        ordering = ["collection__acron3", "year", "month"]
+        verbose_name = _("Monthly Log Report")
+        verbose_name_plural = _("Monthly Log Reports")
+
+    @property
+    def period_label(self):
+        return f"{self.year}-{self.month:02d}"
+
+
+class YearlyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+
+    class Meta:
+        unique_together = [("collection", "year")]
+        ordering = ["collection__acron3", "year"]
+        verbose_name = _("Yearly Log Report")
+        verbose_name_plural = _("Yearly Log Reports")
+
+    @property
+    def period_label(self):
+        return str(self.year)
diff --git a/reports/tasks.py b/reports/tasks.py
new file mode 100644
index 0000000..69a53a1
--- /dev/null
+++ b/reports/tasks.py
@@ -0,0 +1,238 @@
+import logging
+from collections import defaultdict
+
+from django.core.mail import send_mail
+from django.conf import settings
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils import date_utils
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from log_manager_config import models as lmc_models
+
+from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
+
+
+def _extract_date_from_log_file(lf):
+    if lf.date:
+        return lf.date
+
+    probably_date = (lf.validation or {}).get("probably_date")
+    if isinstance(probably_date, str) and probably_date:
+        return date_utils.get_date_obj(probably_date)
+
+    try:
+        import re
+        match = re.search(r"(\d{4}-\d{2}-\d{2})", lf.path)
+        if match:
+            return date_utils.get_date_obj(match.group(1))
+    except Exception:
+        pass
+
+    return None
+
+
+@celery_app.task(bind=True, name=_("[Reports] Populate All Reports"))
+def task_populate_all_reports(self, year=None, collection_acron=None):
+    qs = LogFile.objects.select_related("collection")
+    if collection_acron:
+        qs = qs.filter(collection__acron3=collection_acron)
+    qs = qs.only(
+        "id", "collection_id", "date", "path", "status", "summary", "validation"
+    )
+
+    weekly = defaultdict(lambda: defaultdict(int))
+    monthly = defaultdict(lambda: defaultdict(int))
+    yearly = defaultdict(lambda: defaultdict(int))
+
+    for lf in qs.iterator(chunk_size=2000):
+        extracted_date = _extract_date_from_log_file(lf)
+        if not extracted_date:
+            continue
+        if year and extracted_date.year != int(year):
+            continue
+
+        iso_year, iso_week, _ = extracted_date.isocalendar()
+        yr = extracted_date.year
+        mo = extracted_date.month
+
+        for agg, key in [
+            (weekly, (lf.collection_id, iso_year, iso_week)),
+            (monthly, (lf.collection_id, yr, mo)),
+            (yearly, (lf.collection_id, yr)),
+        ]:
+            r = agg[key]
+            r["total_files"] += 1
+            st = lf.status
+            if st == "CRE":
+                r["created_files"] += 1
+            elif st in ("QUE", "PAR", "PRO"):
+                r["validated_files"] += 1
+            elif st == "INV":
+                r["invalidated_files"] += 1
+            elif st == "ERR":
+                r["errored_files"] += 1
+
+            s = lf.summary or {}
+            lp = s.get("lines_parsed", 0) or 0
+            vl = s.get("valid_lines", 0) or 0
+            r["lines_parsed"] += lp
+            r["valid_lines"] += vl
+            r["discarded_lines"] += max(lp - vl, 0)
+
+            ips = (
+                (lf.validation or {})
+                .get("content", {})
+                .get("summary", {})
+                .get("ips", {})
+            )
+            r["ip_local_count"] += ips.get("local", 0) or 0
+            r["ip_remote_count"] += ips.get("remote", 0) or 0
+            r["ip_unknown_count"] += ips.get("unknown", 0) or 0
+
+    w_count = _upsert_reports(WeeklyLogReport, weekly)
+    m_count = _upsert_reports(MonthlyLogReport, monthly)
+    y_count = _upsert_reports(YearlyLogReport, yearly)
+
+    logging.info(
+        "Reports populated: %s weekly, %s monthly, %s yearly.",
+        w_count, m_count, y_count,
+    )
+    return f"Weekly: {w_count}, Monthly: {m_count}, Yearly: {y_count}"
+
+
+def _upsert_reports(model_class, data):
+    count = 0
+    unique_fields = list(model_class._meta.unique_together[0])
+    period_fields = unique_fields[1:]
+    for key, fields in data.items():
+        coll_id = key[0]
+        period_values = key[1:]
+        lookup = {"collection_id": coll_id}
+        for idx, field_name in enumerate(period_fields):
+            lookup[field_name] = period_values[idx]
+        model_class.objects.update_or_create(defaults=fields, **lookup)
+        count += 1
+    return count
+
+
+@celery_app.task(
+    bind=True,
+    name=_("[Reports] Generate Log Report Summary (Manual)"),
+    queue="load",
+)
+def task_log_files_count_status_report(
+    self,
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    user_id=None,
+    username=None,
+):
+    from_date_str, until_date_str = date_utils.get_date_range_str(
+        from_date, until_date, days_to_go_back
+    )
+    subject = _(
+        "Usage Log Report Summary "
+        f"({from_date_str} to {until_date_str})"
+    )
+
+    for collection_acron in (collections or Collection.acron3_list()):
+        try:
+            collection = Collection.objects.get(acron3=collection_acron)
+        except Collection.DoesNotExist:
+            logging.warning("Collection not found: %s", collection_acron)
+            continue
+
+        message = _build_report_message(
+            collection,
+            from_date_str,
+            until_date_str,
+        )
+
+        if not message:
+            continue
+
+        logging.info(
+            "Sending email to collection %s. Subject: %s.",
+            collection.main_name, subject,
+        )
+
+        _send_collection_email(subject, message, collection_acron)
+
+
+def _build_report_message(collection, from_date_str, until_date_str):
+    monthly = MonthlyLogReport.objects.filter(
+        collection=collection,
+    ).order_by("-year", "-month")
+
+    if not monthly.exists():
+        return ""
+
+    latest = monthly.first()
+    message = _(
+        f"Usage Log Report for {collection.acron3}\n"
+        f"Period: {from_date_str} to {until_date_str}\n\n"
+    )
+    message += _("Latest month ({latest}):\n").format(latest=latest.period_label)
+    message += (
+        f"  Total files: {latest.total_files}\n"
+        f"  Validated files: {latest.validated_files} ({latest.pct_validated}%)\n"
+        f"  Invalidated files: {latest.invalidated_files}\n"
+        f"  Errored files: {latest.errored_files}\n"
+        f"  Lines parsed: {latest.lines_parsed}\n"
+        f"  Valid lines: {latest.valid_lines} ({latest.pct_valid_lines}%)\n"
+        f"  Discarded lines: {latest.discarded_lines}\n"
+        f"  Remote IPs: {latest.ip_remote_count} ({latest.pct_remote_ip}%)\n"
+        f"  Local IPs: {latest.ip_local_count}\n"
+    )
+
+    prev_month = latest
+    if len(monthly) > 1:
+        prev_month = monthly[1]
+        message += _("\nPrevious month ({prev}):\n").format(prev=prev_month.period_label)
+        message += (
+            f"  Total files: {prev_month.total_files}\n"
+            f"  Validated files: {prev_month.validated_files} ({prev_month.pct_validated}%)\n"
+            f"  Valid lines: {prev_month.valid_lines} ({prev_month.pct_valid_lines}%)\n"
+            f"  Remote IPs: {prev_month.ip_remote_count} ({prev_month.pct_remote_ip}%)\n"
+        )
+
+        if prev_month.total_files:
+            file_diff = latest.total_files - prev_month.total_files
+            line_diff = latest.lines_parsed - prev_month.lines_parsed
+            message += _("\nMonth-over-month change:\n")
+            message += f"  Files: {file_diff:+d}\n"
+            message += f"  Lines: {line_diff:+d}\n"
+
+    message += (
+        f"\n---\n"
+        f"This report is automatically generated by SciELO Usage.\n"
+    )
+    return message
+
+
+def _send_collection_email(subject, message, collection):
+    emails = lmc_models.CollectionEmail.objects.filter(
+        config__collection__acron3=collection, active=True
+    ).values_list("email", flat=True)
+
+    if not emails:
+        logging.error(
+            "Error. Please, add an E-mail Configuration for the collection %s.",
+            collection,
+        )
+        return
+
+    try:
+        send_mail(
+            subject=subject,
+            message=message,
+            from_email=settings.DEFAULT_FROM_EMAIL,
+            recipient_list=list(emails),
+        )
+    except Exception as e:
+        logging.error("Error sending log files report for %s: %s", collection, e)
diff --git a/reports/wagtail_hooks.py b/reports/wagtail_hooks.py
new file mode 100644
index 0000000..b2aeac7
--- /dev/null
+++ b/reports/wagtail_hooks.py
@@ -0,0 +1,75 @@
+from django.contrib.auth import get_user_model
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
+from wagtail.snippets.models import register_snippet
+from wagtail.permission_policies.base import BasePermissionPolicy
+
+from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
+
+
+class ReadOnlyPermissionPolicy(BasePermissionPolicy):
+    def user_has_permission(self, user, action):
+        if action in ("add", "change", "delete"):
+            return False
+        return True
+
+    def users_with_any_permission(self, actions):
+        return get_user_model().objects.filter(is_active=True)
+
+
+COMMON_LIST_DISPLAY = (
+    "total_files",
+    "pct_validated",
+    "lines_parsed",
+    "pct_valid_lines",
+    "pct_remote_ip",
+    "generated_at",
+)
+
+
+class WeeklyLogReportSnippetViewSet(SnippetViewSet):
+    model = WeeklyLogReport
+    menu_label = _("Weekly")
+    icon = "info-circle"
+    menu_order = 100
+    list_display = ("collection", "year", "week") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year", "week")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(WeeklyLogReport)
+
+
+class MonthlyLogReportSnippetViewSet(SnippetViewSet):
+    model = MonthlyLogReport
+    menu_label = _("Monthly")
+    icon = "info-circle"
+    menu_order = 200
+    list_display = ("collection", "year", "month") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year", "month")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(MonthlyLogReport)
+
+
+class YearlyLogReportSnippetViewSet(SnippetViewSet):
+    model = YearlyLogReport
+    menu_label = _("Yearly")
+    icon = "info-circle"
+    menu_order = 300
+    list_display = ("collection", "year") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(YearlyLogReport)
+
+
+class ReportsSnippetViewSetGroup(SnippetViewSetGroup):
+    menu_name = "usage_reports"
+    menu_label = _("Reports")
+    menu_icon = "info-circle"
+    menu_order = 350
+    items = (
+        WeeklyLogReportSnippetViewSet,
+        MonthlyLogReportSnippetViewSet,
+        YearlyLogReportSnippetViewSet,
+    )
+
+
+register_snippet(ReportsSnippetViewSetGroup)
diff --git a/requirements/base.txt b/requirements/base.txt
index 6ef5fba..7b5ed61 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -63,10 +63,13 @@ minio==7.2.7
 reverse-geocode==1.6  # https://pypi.org/project/reverse-geocode/
 
 # SciELO Log Validator
--e git+https://github.com/scieloorg/scielo_log_validator@0.4.0#egg=scielo_log_validator
+-e git+https://github.com/scieloorg/scielo_log_validator@2.0.0#egg=scielo_log_validator
+
+# SciELO Scholarly Data
+-e git+https://github.com/scieloorg/scielo_scholarly_data@v0.1.4#egg=scielo_scholarly_data
 
 # SciELO Usage COUNTER
--e git+https://github.com/scieloorg/scielo_usage_counter@1.5.1#egg=scielo_usage_counter
+-e git+https://github.com/scieloorg/scielo_usage_counter@2.0.0#egg=scielo_usage_counter
 
 # Device Detector
 device-detector==0.10  # https://github.com/thinkwelltwd/device_detector
@@ -93,6 +96,6 @@ tenacity==8.3.0  # https://pypi.org/project/tenacity/
 # ------------------------------------------------------------------------------
 articlemetaapi==1.26.7
 
-# ElasticSearch
+# OpenSearch
 # ------------------------------------------------------------------------------
-elasticsearch==8.18.1  # https://elasticsearch-py.readthedocs.io/en/v8.18.1/
+opensearch-py==3.1.0
diff --git a/resources/constants.py b/resources/constants.py
index feba18d..2ce64da 100644
--- a/resources/constants.py
+++ b/resources/constants.py
@@ -1,2 +1,2 @@
 DEFAULT_COUNTER_ROBOTS_URL = 'https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json'
-DEFAULT_MMDB_URL = 'https://download.db-ip.com/free/dbip-country-lite-2025-02.mmdb.gz'
+DEFAULT_MMDB_URL = 'https://download.db-ip.com/free/dbip-country-lite-2026-03.mmdb.gz'
diff --git a/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py b/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py
new file mode 100644
index 0000000..80bb0cc
--- /dev/null
+++ b/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py
@@ -0,0 +1,61 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("resources", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="mmdb",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="mmdb",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="robotuseragent",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="robotuseragent",
+            name="updated_by",
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="is_active",
+            field=models.BooleanField(
+                db_index=True, default=True, verbose_name="Active"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_counter",
+            field=models.BooleanField(
+                db_index=True, default=False, verbose_name="From Atmire/COUNTER"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_scielo",
+            field=models.BooleanField(
+                db_index=True, default=False, verbose_name="From SciELO"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_url",
+            field=models.URLField(
+                blank=True, max_length=255, null=True, verbose_name="Source URL"
+            ),
+        ),
+        migrations.AlterField(
+            model_name="robotuseragent",
+            name="last_changed",
+            field=models.DateField(blank=True, null=True, verbose_name="Last Changed"),
+        ),
+    ]
diff --git a/resources/models.py b/resources/models.py
index a30b8d3..22663e2 100644
--- a/resources/models.py
+++ b/resources/models.py
@@ -2,11 +2,26 @@
 
 from django.db import models
 from django.utils.translation import gettext_lazy as _
+from wagtail.admin.panels import FieldPanel
 
-from core.models import CommonControlField
+class RobotUserAgent(models.Model):
+    SOURCE_ALL = "all"
+    SOURCE_COUNTER = "counter"
+    SOURCE_SCIELO = "scielo"
+    SOURCE_CHOICES = [SOURCE_ALL, SOURCE_COUNTER, SOURCE_SCIELO]
 
+    panels = [
+        FieldPanel("pattern"),
+        FieldPanel("source_counter"),
+        FieldPanel("source_scielo"),
+        FieldPanel("is_active"),
+        FieldPanel("source_url"),
+        FieldPanel("last_changed"),
+    ]
+
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
 
-class RobotUserAgent(CommonControlField):
     pattern = models.CharField(
         verbose_name=_('Pattern'),
         max_length=255,
@@ -14,21 +29,77 @@ class RobotUserAgent(CommonControlField):
         blank=False,
         primary_key=True,
     )
+    source_counter = models.BooleanField(
+        verbose_name=_("From Atmire/COUNTER"),
+        default=False,
+        db_index=True,
+    )
+    source_scielo = models.BooleanField(
+        verbose_name=_("From SciELO"),
+        default=False,
+        db_index=True,
+    )
+    is_active = models.BooleanField(
+        verbose_name=_("Active"),
+        default=True,
+        db_index=True,
+    )
+    source_url = models.URLField(
+        verbose_name=_("Source URL"),
+        max_length=255,
+        null=True,
+        blank=True,
+    )
     last_changed = models.DateField(
         verbose_name=_('Last Changed'),
-        null=False,
-        blank=False,
+        null=True,
+        blank=True,
     )
 
     @classmethod
     def get_all_patterns(cls):
-        return cls.objects.values_list('pattern', flat=True)
+        return cls.get_patterns(source=cls.SOURCE_ALL)
+
+    @classmethod
+    def normalize_source(cls, source=None):
+        normalized = (source or cls.SOURCE_ALL).lower()
+        if normalized not in cls.SOURCE_CHOICES:
+            raise ValueError(f"Unsupported robots source: {source}")
+        return normalized
+
+    @classmethod
+    def get_patterns(cls, source=None):
+        source = cls.normalize_source(source)
+        queryset = cls.objects.filter(is_active=True)
+
+        if source == cls.SOURCE_COUNTER:
+            queryset = queryset.filter(source_counter=True)
+        elif source == cls.SOURCE_SCIELO:
+            queryset = queryset.filter(source_scielo=True)
+
+        return queryset.values_list("pattern", flat=True)
+
+    @property
+    def source_labels(self):
+        labels = []
+        if self.source_counter:
+            labels.append("Atmire/COUNTER")
+        if self.source_scielo:
+            labels.append("SciELO")
+        return ", ".join(labels) or "-"
+
+    def save(self, *args, **kwargs):
+        if not self.source_counter and not self.source_scielo:
+            self.source_scielo = True
+        super().save(*args, **kwargs)
 
     def __str__(self):
         return self.pattern
 
 
-class MMDB(CommonControlField):
+class MMDB(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
     id = models.CharField(
         verbose_name=_('ID (HASH)'),
         max_length=64, 
diff --git a/resources/tasks.py b/resources/tasks.py
index e67cea1..4df60a9 100644
--- a/resources/tasks.py
+++ b/resources/tasks.py
@@ -1,19 +1,13 @@
 import logging
 
-from django.contrib.auth import get_user_model
-from django.utils import timezone
 from django.utils.translation import gettext as _
 
 from config import celery_app
-from core.utils.utils import _get_user
 
 from . import constants, models, utils
 
-
-User = get_user_model()
-
-@celery_app.task(bind=True, name=_('Load robots data'))
-def task_load_robots(self, url_robots=None, user_id=None, username=None):
+@celery_app.task(bind=True, name=_('[Resources] Load Robots Data'))
+def task_load_robots(self, url_robots=None):
     """
     Load robots from a given URL and save them to the database.
     This function fetches robot data from a specified URL (or a default URL if none is provided),
@@ -32,8 +26,6 @@ def task_load_robots(self, url_robots=None, user_id=None, username=None):
         - Error if there is an issue downloading or saving the robots.
         - Debug information for each robot saved.
     """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    
     if not url_robots:
         url_robots = constants.DEFAULT_COUNTER_ROBOTS_URL
         logging.warning(f'No robots URL provided. Using default: {url_robots}')
@@ -45,43 +37,63 @@ def task_load_robots(self, url_robots=None, user_id=None, username=None):
         return False
 
     cleaned_robots_data = utils.clean_robots_list(robots_data)
+    fetched_patterns = set()
 
     try:
         for r_str in cleaned_robots_data:
             pattern = r_str.get('pattern')
             last_changed = r_str.get('last_changed')
+            fetched_patterns.add(pattern)
 
-            r_obj, created = models.RobotUserAgent.objects.get_or_create(pattern=pattern, last_changed=last_changed)
+            r_obj = models.RobotUserAgent.objects.filter(pattern=pattern).first()
+            created = r_obj is None
 
             if created:
-                r_obj.creator = user
-
-            r_obj.updated = timezone.now()
-            r_obj.updated_by = user
+                r_obj = models.RobotUserAgent(
+                    pattern=pattern,
+                    source_counter=True,
+                    source_scielo=False,
+                )
+            r_obj.source_counter = True
+            r_obj.is_active = True
+            r_obj.source_url = url_robots
+            r_obj.last_changed = last_changed
 
             r_obj.save()
             logging.debug(f'Robot saved: {r_obj}')
+
+        stale_counter_patterns = models.RobotUserAgent.objects.filter(
+            source_counter=True
+        ).exclude(pattern__in=fetched_patterns)
+
+        for r_obj in stale_counter_patterns:
+            r_obj.source_counter = False
+            r_obj.source_url = None
+            r_obj.last_changed = None
+            if not r_obj.source_scielo:
+                r_obj.is_active = False
+            r_obj.save()
+            logging.debug(f'Robot deactivated or detached from COUNTER source: {r_obj}')
+
         return True
 
     except Exception as e:
         logging.error(f'Error saving robots: {e}')
+        return False
 
 
-@celery_app.task(bind=True, name=_('Load geolocation and country data'))
-def task_load_geoip(self, url_geoip=None, user_id=None, username=None, validate=True):
+@celery_app.task(bind=True, name=_('[Resources] Load Geolocation Data'))
+def task_load_geoip(self, url_geoip=None, validate=True):
     """
     Load GeoIP data from a specified URL, validate it, and save it to the database.
     Args:
         url_geoip (str, optional): The URL to download the GeoIP data from. Defaults to None.
-        user_id (int, optional): The ID of the user performing the task. Defaults to None.
-        username (str, optional): The username of the user performing the task. Defaults to None.
         validate (bool, optional): Whether to validate the GeoIP data. Defaults to True.
     Returns:
         bool: True if the GeoIP data was successfully loaded and saved, False otherwise.
     Raises:
         Exception: If there is an error downloading, decompressing, or validating the GeoIP data.
     """
-    user = _get_user(self.request, username=username, user_id=user_id)
 
     if not url_geoip:
         url_geoip = constants.DEFAULT_MMDB_URL
@@ -115,10 +127,6 @@ def task_load_geoip(self, url_geoip=None, user_id=None, username=None, validate=
     except models.MMDB.DoesNotExist:
         mmdb_obj = models.MMDB.objects.create(id=mmdb_hash, data=mmdb_data)
         mmdb_obj.url = url_geoip or constants.DEFAULT_MMDB_URL
-        mmdb_obj.creator = user
-
-    mmdb_obj.updated = timezone.now()
-    mmdb_obj.updated_by = user
 
     mmdb_obj.save()
     logging.debug(f'GeoIP data has been saved: {mmdb_obj}')
diff --git a/resources/tests.py b/resources/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/resources/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/resources/tests/__init__.py b/resources/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/resources/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/resources/tests/test_robots.py b/resources/tests/test_robots.py
new file mode 100644
index 0000000..4d6bf74
--- /dev/null
+++ b/resources/tests/test_robots.py
@@ -0,0 +1,113 @@
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from resources import models, tasks
+
+
+class RobotUserAgentModelTests(TestCase):
+    def test_manual_robot_defaults_to_scielo_source(self):
+        robot = models.RobotUserAgent.objects.create(pattern="CustomBot")
+
+        robot.refresh_from_db()
+
+        self.assertFalse(robot.source_counter)
+        self.assertTrue(robot.source_scielo)
+        self.assertTrue(robot.is_active)
+        self.assertEqual(robot.source_labels, "SciELO")
+
+    def test_get_all_patterns_only_returns_active_patterns(self):
+        active = models.RobotUserAgent.objects.create(
+            pattern="ActiveBot",
+            source_scielo=True,
+            is_active=True,
+        )
+        models.RobotUserAgent.objects.create(
+            pattern="InactiveBot",
+            source_scielo=True,
+            is_active=False,
+        )
+
+        self.assertListEqual(list(models.RobotUserAgent.get_all_patterns()), [active.pattern])
+
+    def test_get_patterns_can_filter_by_source(self):
+        counter_only = models.RobotUserAgent.objects.create(
+            pattern="CounterOnlyBot",
+            source_counter=True,
+            source_scielo=False,
+            is_active=True,
+        )
+        shared = models.RobotUserAgent.objects.create(
+            pattern="SharedBot",
+            source_counter=True,
+            source_scielo=True,
+            is_active=True,
+        )
+        scielo_only = models.RobotUserAgent.objects.create(
+            pattern="ScieloOnlyBot",
+            source_counter=False,
+            source_scielo=True,
+            is_active=True,
+        )
+
+        self.assertCountEqual(
+            list(models.RobotUserAgent.get_patterns(source="counter")),
+            [counter_only.pattern, shared.pattern],
+        )
+        self.assertCountEqual(
+            list(models.RobotUserAgent.get_patterns(source="scielo")),
+            [shared.pattern, scielo_only.pattern],
+        )
+
+    def test_get_patterns_rejects_invalid_source(self):
+        with self.assertRaises(ValueError):
+            list(models.RobotUserAgent.get_patterns(source="invalid"))
+
+
+class LoadRobotsTaskTests(TestCase):
+
+    @patch("resources.tasks.utils.fetch_data")
+    def test_task_load_robots_marks_counter_source_and_deactivates_stale_counter_entries(
+        self,
+        mock_fetch_data,
+    ):
+        mock_fetch_data.return_value = [
+            {"pattern": "CounterBot", "last_changed": "2025-01-15"},
+            {"pattern": "SharedBot", "last_changed": "2025-01-20"},
+        ]
+
+        stale_counter = models.RobotUserAgent.objects.create(
+            pattern="OldCounterBot",
+            source_counter=True,
+            is_active=True,
+            last_changed="2024-12-01",
+            source_url="https://old.example.org/robots.json",
+        )
+        shared_bot = models.RobotUserAgent.objects.create(
+            pattern="SharedBot",
+            source_scielo=True,
+            is_active=True,
+        )
+
+        result = tasks.task_load_robots.run(
+            url_robots="https://counter.example.org/robots.json",
+        )
+
+        self.assertTrue(result)
+
+        counter_bot = models.RobotUserAgent.objects.get(pattern="CounterBot")
+        self.assertTrue(counter_bot.source_counter)
+        self.assertFalse(counter_bot.source_scielo)
+        self.assertTrue(counter_bot.is_active)
+        self.assertEqual(counter_bot.source_url, "https://counter.example.org/robots.json")
+
+        shared_bot.refresh_from_db()
+        self.assertTrue(shared_bot.source_counter)
+        self.assertTrue(shared_bot.source_scielo)
+        self.assertTrue(shared_bot.is_active)
+
+        stale_counter.refresh_from_db()
+        self.assertFalse(stale_counter.source_counter)
+        self.assertFalse(stale_counter.is_active)
+        self.assertIsNone(stale_counter.source_url)
+        self.assertIsNone(stale_counter.last_changed)
diff --git a/resources/wagtail_hooks.py b/resources/wagtail_hooks.py
index 758bb53..c347b22 100644
--- a/resources/wagtail_hooks.py
+++ b/resources/wagtail_hooks.py
@@ -15,13 +15,25 @@ class RobotUserAgentSnippetViewSet(SnippetViewSet):
 
     list_display = (
         "pattern",
+        "source_labels",
+        "is_active",
         "last_changed",
     )
     search_fields = (
         "pattern",
+        "source_url",
+    )
+    list_filter = (
+        "source_counter",
+        "source_scielo",
+        "is_active",
     )
     list_export = (
         "pattern",
+        "source_counter",
+        "source_scielo",
+        "is_active",
+        "source_url",
         "last_changed",
     )
     export_filename = "robots"
diff --git a/source/__init__.py b/source/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/__init__.py
@@ -0,0 +1 @@
+
diff --git a/article/apps.py b/source/apps.py
similarity index 63%
rename from article/apps.py
rename to source/apps.py
index 8c0e2c9..06d886d 100644
--- a/article/apps.py
+++ b/source/apps.py
@@ -1,6 +1,6 @@
 from django.apps import AppConfig
 
 
-class ArticleConfig(AppConfig):
+class SourceConfig(AppConfig):
     default_auto_field = "django.db.models.BigAutoField"
-    name = "article"
+    name = "source"
diff --git a/source/migrations/0001_initial.py b/source/migrations/0001_initial.py
new file mode 100644
index 0000000..cc736e3
--- /dev/null
+++ b/source/migrations/0001_initial.py
@@ -0,0 +1,210 @@
+# Generated by Django 5.0.7 on 2026-03-15 00:00
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Source",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True,
+                        verbose_name="Creation date",
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True,
+                        verbose_name="Last update date",
+                    ),
+                ),
+                (
+                    "source_type",
+                    models.CharField(
+                        choices=[
+                            ("journal", "Journal"),
+                            ("book", "Book"),
+                            ("preprint_server", "Preprint Server"),
+                            ("data_repository", "Data Repository"),
+                            ("other", "Other"),
+                        ],
+                        db_index=True,
+                        max_length=32,
+                        verbose_name="Source Type",
+                    ),
+                ),
+                (
+                    "source_id",
+                    models.CharField(
+                        db_index=True,
+                        max_length=255,
+                        verbose_name="Source ID",
+                    ),
+                ),
+                (
+                    "scielo_issn",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=9,
+                        null=True,
+                        verbose_name="SciELO ISSN",
+                    ),
+                ),
+                (
+                    "acronym",
+                    models.CharField(
+                        blank=True,
+                        default="",
+                        max_length=64,
+                        null=True,
+                        verbose_name="Source Acronym",
+                    ),
+                ),
+                (
+                    "title",
+                    models.CharField(
+                        max_length=255,
+                        verbose_name="Source Title",
+                    ),
+                ),
+                (
+                    "identifiers",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Identifiers",
+                    ),
+                ),
+                (
+                    "publisher_name",
+                    models.JSONField(
+                        blank=True,
+                        default=list,
+                        null=True,
+                        verbose_name="Publisher Name",
+                    ),
+                ),
+                (
+                    "subject_areas",
+                    models.JSONField(
+                        default=list,
+                        verbose_name="Subject Areas (CAPES)",
+                    ),
+                ),
+                (
+                    "wos_subject_areas",
+                    models.JSONField(
+                        default=list,
+                        verbose_name="Subject Areas (WoS)",
+                    ),
+                ),
+                (
+                    "default_lang",
+                    models.CharField(
+                        blank=True,
+                        max_length=8,
+                        null=True,
+                        verbose_name="Default Language",
+                    ),
+                ),
+                (
+                    "publication_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Publication Date",
+                    ),
+                ),
+                (
+                    "publication_year",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=4,
+                        null=True,
+                        verbose_name="Publication Year",
+                    ),
+                ),
+                (
+                    "extra_data",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Extra Data",
+                    ),
+                ),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Source",
+                "verbose_name_plural": "Sources",
+                "unique_together": {("collection", "source_type", "source_id")},
+                "indexes": [
+                    models.Index(
+                        fields=["collection", "source_type"],
+                        name="source_collection_type_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "scielo_issn"],
+                        name="source_collection_issn_idx",
+                    ),
+                ],
+            },
+        ),
+    ]
diff --git a/source/migrations/0002_source_access_type.py b/source/migrations/0002_source_access_type.py
new file mode 100644
index 0000000..e148c15
--- /dev/null
+++ b/source/migrations/0002_source_access_type.py
@@ -0,0 +1,25 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("source", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="source",
+            name="access_type",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("open_access", "Open Access"),
+                    ("commercial", "Commercial"),
+                ],
+                db_index=True,
+                max_length=32,
+                null=True,
+                verbose_name="Access Type",
+            ),
+        ),
+    ]
diff --git a/source/migrations/0003_alter_source_title.py b/source/migrations/0003_alter_source_title.py
new file mode 100644
index 0000000..354a82a
--- /dev/null
+++ b/source/migrations/0003_alter_source_title.py
@@ -0,0 +1,15 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("source", "0002_source_access_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="source",
+            name="title",
+            field=models.CharField(max_length=500, verbose_name="Source Title"),
+        ),
+    ]
diff --git a/source/migrations/__init__.py b/source/migrations/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/migrations/__init__.py
@@ -0,0 +1 @@
+
diff --git a/source/models.py b/source/models.py
new file mode 100644
index 0000000..48d3e00
--- /dev/null
+++ b/source/models.py
@@ -0,0 +1,219 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+
+
+class Source(CommonControlField):
+    SOURCE_TYPE_JOURNAL = "journal"
+    SOURCE_TYPE_BOOK = "book"
+    SOURCE_TYPE_PREPRINT_SERVER = "preprint_server"
+    SOURCE_TYPE_DATA_REPOSITORY = "data_repository"
+    SOURCE_TYPE_OTHER = "other"
+    SOURCE_TYPE_CHOICES = (
+        (SOURCE_TYPE_JOURNAL, _("Journal")),
+        (SOURCE_TYPE_BOOK, _("Book")),
+        (SOURCE_TYPE_PREPRINT_SERVER, _("Preprint Server")),
+        (SOURCE_TYPE_DATA_REPOSITORY, _("Data Repository")),
+        (SOURCE_TYPE_OTHER, _("Other")),
+    )
+
+    ACCESS_TYPE_OPEN_ACCESS = "open_access"
+    ACCESS_TYPE_COMMERCIAL = "commercial"
+    ACCESS_TYPE_CHOICES = (
+        (ACCESS_TYPE_OPEN_ACCESS, _("Open Access")),
+        (ACCESS_TYPE_COMMERCIAL, _("Commercial")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source_type = models.CharField(
+        verbose_name=_("Source Type"),
+        max_length=32,
+        choices=SOURCE_TYPE_CHOICES,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source_id = models.CharField(
+        verbose_name=_("Source ID"),
+        max_length=255,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    scielo_issn = models.CharField(
+        verbose_name=_("SciELO ISSN"),
+        max_length=9,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    acronym = models.CharField(
+        verbose_name=_("Source Acronym"),
+        max_length=64,
+        blank=True,
+        null=True,
+        default="",
+    )
+
+    title = models.CharField(
+        verbose_name=_("Source Title"),
+        max_length=500,
+        blank=False,
+        null=False,
+    )
+
+    identifiers = models.JSONField(
+        verbose_name=_("Identifiers"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    publisher_name = models.JSONField(
+        verbose_name=_("Publisher Name"),
+        blank=True,
+        null=True,
+        default=list,
+    )
+
+    subject_areas = models.JSONField(
+        verbose_name=_("Subject Areas (CAPES)"),
+        null=False,
+        blank=False,
+        default=list,
+    )
+
+    wos_subject_areas = models.JSONField(
+        verbose_name=_("Subject Areas (WoS)"),
+        null=False,
+        blank=False,
+        default=list,
+    )
+
+    default_lang = models.CharField(
+        verbose_name=_("Default Language"),
+        max_length=8,
+        blank=True,
+        null=True,
+    )
+
+    publication_date = models.CharField(
+        verbose_name=_("Publication Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_year = models.CharField(
+        verbose_name=_("Publication Year"),
+        max_length=4,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    access_type = models.CharField(
+        verbose_name=_("Access Type"),
+        max_length=32,
+        choices=ACCESS_TYPE_CHOICES,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    extra_data = models.JSONField(
+        verbose_name=_("Extra Data"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    def __str__(self):
+        return f"{self.collection.acron3} - {self.source_type} - {self.source_id}"
+
+    @staticmethod
+    def _extract_issns(identifiers):
+        if not isinstance(identifiers, dict):
+            return set()
+
+        return {
+            value
+            for key, value in identifiers.items()
+            if value and "issn" in str(key).lower()
+        }
+
+    @classmethod
+    def metadata(cls, collection=None):
+        queryset = cls.objects.select_related("collection").only(
+            "acronym",
+            "collection__acron3",
+            "default_lang",
+            "extra_data",
+            "identifiers",
+            "publication_date",
+            "publication_year",
+            "access_type",
+            "publisher_name",
+            "scielo_issn",
+            "source_id",
+            "source_type",
+            "subject_areas",
+            "title",
+            "wos_subject_areas",
+        )
+
+        if collection:
+            queryset = queryset.filter(collection=collection)
+
+        for source in queryset.iterator():
+            identifiers = source.identifiers or {}
+            yield {
+                "acronym": source.acronym,
+                "collection": source.collection.acron3,
+                "default_lang": source.default_lang,
+                "extra_data": source.extra_data or {},
+                "identifiers": identifiers,
+                "issns": cls._extract_issns(identifiers),
+                "publication_date": source.publication_date,
+                "publication_year": source.publication_year,
+                "access_type": source.access_type,
+                "publisher_name": source.publisher_name or [],
+                "scielo_issn": source.scielo_issn,
+                "source_id": source.source_id,
+                "source_type": source.source_type,
+                "subject_areas": source.subject_areas or [],
+                "title": source.title,
+                "wos_subject_areas": source.wos_subject_areas or [],
+            }
+
+    class Meta:
+        verbose_name = _("Source")
+        verbose_name_plural = _("Sources")
+        unique_together = (
+            "collection",
+            "source_type",
+            "source_id",
+        )
+        indexes = [
+            models.Index(
+                fields=["collection", "source_type"],
+                name="source_collection_type_idx",
+            ),
+            models.Index(
+                fields=["collection", "scielo_issn"],
+                name="source_collection_issn_idx",
+            ),
+        ]
diff --git a/source/services/__init__.py b/source/services/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/services/__init__.py
@@ -0,0 +1 @@
+
diff --git a/source/services/books.py b/source/services/books.py
new file mode 100644
index 0000000..df9bd4d
--- /dev/null
+++ b/source/services/books.py
@@ -0,0 +1,137 @@
+from collection.models import Collection
+from source.models import Source
+
+
+BOOKS_COLLECTION_ACRONYM = "books"
+
+
+def get_books_collection(acronym=BOOKS_COLLECTION_ACRONYM):
+    return Collection.objects.get(acron3=acronym)
+
+
+def upsert_monograph_source(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Monograph":
+        return None
+
+    source, created = Source.objects.get_or_create(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_BOOK,
+        source_id=str(payload.get("id")),
+    )
+
+    if created and user:
+        source.creator = user
+
+    if created or force_update:
+        source.scielo_issn = None
+        source.acronym = ""
+        source.title = payload.get("title") or str(payload.get("id"))
+        source.identifiers = _build_source_identifiers(payload)
+        source.publisher_name = _as_list(payload.get("publisher"))
+        source.subject_areas = []
+        source.wos_subject_areas = []
+        source.default_lang = payload.get("language") or None
+        source.publication_date = payload.get("publication_date") or None
+        source.publication_year = _normalize_year(payload.get("year"))
+        source.access_type = _normalize_access_type(payload.get("is_comercial"))
+        source.extra_data = _build_source_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        source.updated_by = user
+
+    source.save()
+    return source
+
+
+def delete_book_source(collection, book_id):
+    return Source.objects.filter(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_BOOK,
+        source_id=str(book_id),
+    ).delete()
+
+
+def _build_source_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("isbn"),
+        "eisbn": payload.get("eisbn"),
+        "doi": payload.get("doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_source_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "city": payload.get("city"),
+        "country": payload.get("country"),
+        "pages": payload.get("pages"),
+        "collection_data": payload.get("collection"),
+        "creators": payload.get("creators"),
+        "is_comercial": payload.get("is_comercial"),
+        "use_licence": payload.get("use_licence"),
+        "price_reais": payload.get("price_reais"),
+        "price_dollar": payload.get("price_dollar"),
+        "shopping_info": payload.get("shopping_info"),
+        "serie": payload.get("serie"),
+        "format": payload.get("format"),
+        "translated_titles": payload.get("translated_titles"),
+        "translated_synopses": payload.get("translated_synopses"),
+        "synopsis": payload.get("synopsis"),
+        "primary_descriptor": payload.get("primary_descriptor"),
+        "translated_primary_descriptors": payload.get("translated_primary_descriptors"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _as_list(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return value
+
+    return [value]
+
+
+def _normalize_year(value):
+    if value in (None, ""):
+        return None
+    return str(value)[:4]
+
+
+def _normalize_access_type(value):
+    if value in (None, ""):
+        return None
+
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"true", "1", "yes", "y", "sim"}:
+            return Source.ACCESS_TYPE_COMMERCIAL
+        if normalized in {"false", "0", "no", "n", "nao", "não"}:
+            return Source.ACCESS_TYPE_OPEN_ACCESS
+
+    return Source.ACCESS_TYPE_COMMERCIAL if bool(value) else Source.ACCESS_TYPE_OPEN_ACCESS
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/source/services/journals.py b/source/services/journals.py
new file mode 100644
index 0000000..ac133f6
--- /dev/null
+++ b/source/services/journals.py
@@ -0,0 +1,118 @@
+from django.db.models import Q
+
+from collection.models import Collection
+from source.models import Source
+
+
+def get_collection(acronym):
+    return Collection.objects.filter(acron3=acronym).first()
+
+
+def upsert_journal_source(
+    journal,
+    collection,
+    user=None,
+    force_update=True,
+    load_mode=None,
+):
+    scielo_issn = _value(journal, "scielo_issn")
+    if not scielo_issn:
+        return None
+
+    source, created = Source.objects.get_or_create(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_JOURNAL,
+        source_id=scielo_issn,
+    )
+
+    if created and user:
+        source.creator = user
+
+    if created or force_update:
+        source.scielo_issn = scielo_issn
+        source.acronym = _value(journal, "acronym") or ""
+        source.title = _value(journal, "title") or scielo_issn
+        source.identifiers = _build_source_identifiers(journal)
+        source.publisher_name = _as_list(_value(journal, "publisher_name"))
+        source.subject_areas = _as_list(_value(journal, "subject_areas"))
+        source.wos_subject_areas = _as_list(_value(journal, "wos_subject_areas"))
+        source.default_lang = None
+        source.publication_date = None
+        source.publication_year = None
+        source.extra_data = _compact_dict(
+            {
+                "collection_acronym": _value(journal, "collection_acronym"),
+                "load_mode": load_mode,
+            }
+        )
+
+    if user:
+        source.updated_by = user
+
+    source.save()
+    return source
+
+
+def find_journal_source_by_issns(collection, issns):
+    for issn in filter(None, issns or []):
+        source = (
+            Source.objects.filter(
+                collection=collection,
+                source_type=Source.SOURCE_TYPE_JOURNAL,
+            )
+            .filter(
+                Q(scielo_issn=issn)
+                | Q(source_id=issn)
+                | Q(identifiers__electronic_issn=issn)
+                | Q(identifiers__print_issn=issn)
+                | Q(identifiers__scielo_issn=issn)
+            )
+            .first()
+        )
+        if source:
+            return source
+    return None
+
+
+def find_journal_source_by_acronym(collection, acronym):
+    if not acronym:
+        return None
+
+    return Source.objects.filter(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_JOURNAL,
+        acronym=acronym,
+    ).first()
+
+
+def _build_source_identifiers(journal):
+    identifiers = {
+        "electronic_issn": _value(journal, "electronic_issn"),
+        "print_issn": _value(journal, "print_issn"),
+        "scielo_issn": _value(journal, "scielo_issn"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _as_list(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return value
+
+    return [value]
+
+
+def _value(data, key, default=None):
+    if isinstance(data, dict):
+        return data.get(key, default)
+    return getattr(data, key, default)
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/source/tasks.py b/source/tasks.py
new file mode 100644
index 0000000..eb1633b
--- /dev/null
+++ b/source/tasks.py
@@ -0,0 +1,148 @@
+import logging
+
+from django.utils.translation import gettext as _
+from django.conf import settings
+
+from collection.models import Collection
+from config import celery_app
+from core.collectors import articlemeta as articlemeta_collector
+from core.collectors import scielo_books as scielo_books_collector
+from core.utils.request_utils import _get_user
+from source.services import books as books_service
+from source.services import journals as journal_service
+
+
+def load_sources_from_article_meta(
+    collections=None,
+    force_update=True,
+    user=None,
+    mode="thrift",
+):
+    collection_codes = collections or Collection.acron3_list()
+
+    for collection_code in collection_codes:
+        logging.info(
+            "Loading sources from Article Meta. Collection: %s, Mode: %s",
+            collection_code,
+            mode,
+        )
+
+        for journal in articlemeta_collector.iter_journals(
+            collection=collection_code,
+            mode=mode,
+        ):
+            collection = journal_service.get_collection(journal.collection_acronym)
+            if not collection:
+                logging.error(
+                    "Collection %s does not exist",
+                    journal.collection_acronym,
+                )
+                continue
+
+            source = journal_service.upsert_journal_source(
+                journal,
+                collection=collection,
+                user=user,
+                force_update=force_update,
+                load_mode=mode,
+            )
+            logging.info(
+                "Source %s upserted for collection %s",
+                source.source_id if source else None,
+                collection.acron3,
+            )
+
+    return True
+
+
+def load_sources_from_scielo_books(
+    collection="books",
+    db_name=settings.SCIELO_BOOKS_DB_NAME,
+    since=0,
+    limit=settings.SCIELO_BOOKS_LIMIT,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    collection_obj = books_service.get_books_collection(collection)
+
+    logging.info(
+        "Loading sources from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
+        collection,
+        db_name,
+        since,
+        limit,
+    )
+
+    for item in scielo_books_collector.iter_change_documents(
+        base_url=base_url,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        headers=headers,
+    ):
+        change = item["change"]
+
+        if item["deleted"]:
+            books_service.delete_book_source(collection_obj, change.get("id"))
+            continue
+
+        payload = item["payload"] or {}
+        if payload.get("TYPE") != "Monograph":
+            continue
+
+        books_service.upsert_monograph_source(
+            payload,
+            collection=collection_obj,
+            user=user,
+            force_update=force_update,
+            source_url=item.get("source_url"),
+            last_seq=change.get("seq"),
+        )
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Sources (Article Meta)"), queue="load")
+def task_load_sources_from_article_meta(
+    self,
+    collections=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+    mode="thrift",
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_sources_from_article_meta(
+        collections=collections,
+        force_update=force_update,
+        user=user,
+        mode=mode,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Sources (SciELO Books)"), queue="load")
+def task_load_sources_from_scielo_books(
+    self,
+    collection="books",
+    db_name=settings.SCIELO_BOOKS_DB_NAME,
+    since=0,
+    limit=settings.SCIELO_BOOKS_LIMIT,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_sources_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
diff --git a/source/tests.py b/source/tests.py
new file mode 100644
index 0000000..a182f4e
--- /dev/null
+++ b/source/tests.py
@@ -0,0 +1,133 @@
+from django.test import TestCase
+
+from collection.models import Collection
+
+from .models import Source
+from .services import books as books_service
+from .services import journals as journal_service
+
+
+class SourceMetadataTests(TestCase):
+    def test_source_type_choices_include_scielo_non_journal_sources(self):
+        self.assertIn(
+            (Source.SOURCE_TYPE_PREPRINT_SERVER, "Preprint Server"),
+            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
+        )
+        self.assertIn(
+            (Source.SOURCE_TYPE_DATA_REPOSITORY, "Data Repository"),
+            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
+        )
+
+    def test_metadata_exposes_generic_and_journal_fields(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+            identifiers={
+                "electronic_issn": "1234-5678",
+                "print_issn": "8765-4321",
+                "doi": "10.1590/example",
+            },
+            publisher_name=["SciELO"],
+            subject_areas=["Health Sciences"],
+            wos_subject_areas=["Medicine"],
+            default_lang="en",
+            publication_date="2024-01-15",
+            publication_year="2024",
+            extra_data={"country": "BR"},
+        )
+
+        metadata = list(Source.metadata(collection=collection))
+
+        self.assertEqual(len(metadata), 1)
+        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(metadata[0]["source_id"], "1234-5678")
+        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
+        self.assertEqual(metadata[0]["issns"], {"1234-5678", "8765-4321"})
+        self.assertEqual(metadata[0]["title"], "Test Journal")
+
+    def test_upsert_monograph_source_maps_scielo_books_payload(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+
+        source = books_service.upsert_monograph_source(
+            {
+                "TYPE": "Monograph",
+                "id": "abcd1",
+                "title": "Sample Book",
+                "isbn": "9788578791889",
+                "eisbn": "9788578791880",
+                "doi_number": "10.1234/book",
+                "language": "pt",
+                "publication_date": "2024-05-20",
+                "year": "2024",
+                "publisher": "SciELO Books",
+                "is_comercial": False,
+                "visible": True,
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(source.source_type, Source.SOURCE_TYPE_BOOK)
+        self.assertEqual(source.source_id, "abcd1")
+        self.assertEqual(source.identifiers["isbn"], "9788578791889")
+        self.assertEqual(source.default_lang, "pt")
+        self.assertEqual(source.publication_year, "2024")
+        self.assertEqual(source.access_type, Source.ACCESS_TYPE_OPEN_ACCESS)
+
+    def test_upsert_monograph_source_accepts_long_real_world_title(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        title = (
+            "O Estado da Arte sobre Refugiados, Deslocados Internos, "
+            "Deslocados Ambientais e Apatridas no Brasil: atualizacao do "
+            "Diretorio Nacional do ACNUR de teses, dissertacoes, trabalhos "
+            "de conclusao de curso de graduacao em Joao Pessoa (Paraiba) e "
+            "artigos (2007 a 2017)"
+        )
+
+        source = books_service.upsert_monograph_source(
+            {
+                "TYPE": "Monograph",
+                "id": "9zzts",
+                "title": title,
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(source.title, title)
+
+    def test_upsert_journal_source_maps_articlemeta_payload(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+
+        source = journal_service.upsert_journal_source(
+            {
+                "collection_acronym": "scl",
+                "scielo_issn": "1234-5678",
+                "electronic_issn": "1234-5678",
+                "print_issn": "8765-4321",
+                "acronym": "testjou",
+                "title": "Test Journal",
+                "publisher_name": "SciELO",
+                "subject_areas": ["Health Sciences"],
+                "wos_subject_areas": ["Medicine"],
+            },
+            collection=collection,
+            load_mode="thrift",
+        )
+
+        self.assertEqual(source.source_type, Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(source.source_id, "1234-5678")
+        self.assertEqual(source.identifiers["electronic_issn"], "1234-5678")
+        self.assertEqual(source.publisher_name, ["SciELO"])
+        self.assertEqual(source.extra_data["load_mode"], "thrift")
+        self.assertEqual(
+            journal_service.find_journal_source_by_issns(collection, ["8765-4321"]).pk,
+            source.pk,
+        )
+        self.assertEqual(
+            journal_service.find_journal_source_by_acronym(collection, "testjou").pk,
+            source.pk,
+        )
diff --git a/source/wagtail_hooks.py b/source/wagtail_hooks.py
new file mode 100644
index 0000000..5ffad62
--- /dev/null
+++ b/source/wagtail_hooks.py
@@ -0,0 +1,32 @@
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet
+
+from .models import Source
+
+
+class SourceSnippetViewSet(SnippetViewSet):
+    model = Source
+    icon = "folder-open-inverse"
+    menu_label = _("Source")
+    menu_order = 200
+
+    list_display = (
+        "collection",
+        "source_type",
+        "source_id",
+        "scielo_issn",
+        "acronym",
+        "title",
+        "publication_year",
+    )
+    list_filter = (
+        "collection",
+        "source_type",
+        "publication_year",
+    )
+    search_fields = (
+        "source_id",
+        "scielo_issn",
+        "acronym",
+        "title",
+    )
diff --git a/start-dev.sh b/start-dev.sh
deleted file mode 100644
index 92d064a..0000000
--- a/start-dev.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-# Change this value to the local ethernet.
-ethernet=wlp0s20f3
-
-# Linux IP.
-export IP=$(/sbin/ip -o -4 addr list $ethernet | awk '{print $4}' | cut -d/ -f1)
-
-# Mac OS IP.
-#export IP=$(ifconfig $ethernet | grep inet | grep -v inet6 | awk '{print $2}')
-
-export DATABASE_URL=postgres://GVRFlLmcCNfGLhsFvSnCioYOPJPYpyfj:BQ4hSUL4rdj5WZLdR8ilDLRQMvCtzo0caMaXDO0olGsmycQjlcZlTVK9DepZR8kk@$IP:5432/scielo_core
-export CELERY_BROKER_URL=redis://$IP:6379/0
-export USE_DOCKER=no
-export IPYTHONDIR=/app/.ipython
-export REDIS_URL=redis://$IP:6379/0
-export CELERY_FLOWER_USER=PhFRdLexbrsBvrrbSXxjcMMOcVOavCrZ
-export CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xbVYZbrwR
-export EMAIL_HOST=$IP
-export SOLR_URL=http://$IP:8983/solr/
-
-
-docker stop scielo_core_local_django
-# workon scms
-python manage.py runserver_plus 0.0.0.0:8000
diff --git a/tracker/choices.py b/tracker/choices.py
index e2c80e2..dfc562c 100644
--- a/tracker/choices.py
+++ b/tracker/choices.py
@@ -1,54 +1,16 @@
 from django.utils.translation import gettext_lazy as _
 
-ERROR = "ERROR"
-EXCEPTION = "EXCEPTION"
-INFO = "INFO"
-WARNING = "WARNING"
-
-EVENT_MSG_TYPE = [
-    (ERROR, _("error")),
-    (WARNING, _("warning")),
-    (INFO, _("info")),
-    (EXCEPTION, _("exception")),
-]
-
-
-PROGRESS_STATUS_IGNORED = "IGNORED"
-PROGRESS_STATUS_REPROC = "REPROC"
-PROGRESS_STATUS_TODO = "TODO"
-PROGRESS_STATUS_DOING = "DOING"
-PROGRESS_STATUS_DONE = "DONE"
-PROGRESS_STATUS_PENDING = "PENDING"
-
-PROGRESS_STATUS = (
-    (PROGRESS_STATUS_REPROC, _("To reprocess")),
-    (PROGRESS_STATUS_TODO, _("To do")),
-    (PROGRESS_STATUS_DONE, _("Done")),
-    (PROGRESS_STATUS_DOING, _("Doing")),
-    (PROGRESS_STATUS_PENDING, _("Pending")),
-    (PROGRESS_STATUS_IGNORED, _("ignored")),
-)
-
 
 LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA = 'MET'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE = 'ART'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL = 'JOU'
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT = 'DOC'
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE = 'SRC'
 LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION = 'URL'
 LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR = 'DBE'
 
 LOG_FILE_DISCARDED_LINE_REASON = [
     (LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA, _("Missing Metadata")),
-    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE, _("Missing PIDv2 or PIDv3 or PID Generic")),
-    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL, _("Missing ISSN")),
+    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT, _("Missing Document")),
+    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE, _("Missing Source")),
     (LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, _("URL Translation")),
     (LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR, _("Database Error")),
 ]
-
-
-ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED = 'MUL'
-ARTICLE_EVENT_TYPE_DATA_ERROR = 'ERR'
-
-ARTICLE_EVENT_TYPE = [
-    (ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED, _("Multiple Articles Returned")),
-    (ARTICLE_EVENT_TYPE_DATA_ERROR, _("Data Error")),
-]
diff --git a/tracker/exceptions.py b/tracker/exceptions.py
index 31ed8c8..9ef3267 100644
--- a/tracker/exceptions.py
+++ b/tracker/exceptions.py
@@ -1,26 +1,2 @@
-class ProcEventCreateError(Exception):
-    ...
-
-class UnexpectedEventCreateError(Exception):
-    ...
-
-class EventCreateError(Exception):
-    ...
-
-class EventReportCreateError(Exception):
-    ...
-
-class EventReportSaveFileError(Exception):
-    ...
-
-class EventReportCreateError(Exception):
-    ...
-
-class EventReportDeleteEventsError(Exception):
-    ...
-
 class LogFileDiscardedLineCreateError(Exception):
     ...
-
-class ArticleEventError(Exception):
-    ...
diff --git a/tracker/migrations/0001_initial.py b/tracker/migrations/0001_initial.py
index f207722..04fdc35 100644
--- a/tracker/migrations/0001_initial.py
+++ b/tracker/migrations/0001_initial.py
@@ -1,13 +1,18 @@
-# Generated by Django 5.0.7 on 2024-08-30 00:52
+# Generated by Codex on 2026-04-27
 
+import django.db.models.deletion
 import uuid
+from django.conf import settings
 from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
     initial = True
 
-    dependencies = []
+    dependencies = [
+        ("log_manager", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
 
     operations = [
         migrations.CreateModel(
@@ -24,21 +29,15 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
                 ),
                 (
                     "exception_type",
-                    models.TextField(
-                        blank=True, null=True, verbose_name="Exception Type"
-                    ),
+                    models.TextField(blank=True, null=True, verbose_name="Exception Type"),
                 ),
                 (
                     "exception_msg",
-                    models.TextField(
-                        blank=True, null=True, verbose_name="Exception Msg"
-                    ),
+                    models.TextField(blank=True, null=True, verbose_name="Exception Msg"),
                 ),
                 ("traceback", models.JSONField(blank=True, null=True)),
                 ("detail", models.JSONField(blank=True, null=True)),
@@ -46,9 +45,148 @@ class Migration(migrations.Migration):
             options={
                 "indexes": [
                     models.Index(
-                        fields=["exception_type"], name="tracker_une_excepti_47ede4_idx"
+                        fields=["exception_type"],
+                        name="tracker_une_excepti_47ede4_idx",
                     )
                 ],
             },
         ),
+        migrations.CreateModel(
+            name="ArticleEvent",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "event_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("MUL", "Multiple Articles Returned"),
+                            ("ERR", "Data Error"),
+                        ],
+                        max_length=3,
+                        null=True,
+                        verbose_name="Event Type",
+                    ),
+                ),
+                (
+                    "message",
+                    models.TextField(blank=True, null=True, verbose_name="Message"),
+                ),
+                ("data", models.JSONField(default=dict, verbose_name="Data")),
+                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+        ),
+        migrations.CreateModel(
+            name="LogFileDiscardedLine",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "error_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("MET", "Missing Metadata"),
+                            ("DOC", "Missing Document"),
+                            ("SRC", "Missing Source"),
+                            ("URL", "URL Translation"),
+                            ("DBE", "Database Error"),
+                        ],
+                        max_length=3,
+                        null=True,
+                        verbose_name="Error Type",
+                    ),
+                ),
+                ("data", models.JSONField(default=dict, verbose_name="Data")),
+                (
+                    "message",
+                    models.TextField(blank=True, null=True, verbose_name="Message"),
+                ),
+                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "log_file",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="log_manager.logfile",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+        ),
     ]
diff --git a/tracker/migrations/0002_remove_articleevent_creator_and_more.py b/tracker/migrations/0002_remove_articleevent_creator_and_more.py
new file mode 100644
index 0000000..ee23c85
--- /dev/null
+++ b/tracker/migrations/0002_remove_articleevent_creator_and_more.py
@@ -0,0 +1,38 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("tracker", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="articleevent",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="articleevent",
+            name="updated_by",
+        ),
+        migrations.DeleteModel(
+            name="UnexpectedEvent",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="updated",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="updated_by",
+        ),
+        migrations.DeleteModel(
+            name="ArticleEvent",
+        ),
+    ]
diff --git a/tracker/migrations/0002_top100articlesfileevent.py b/tracker/migrations/0002_top100articlesfileevent.py
deleted file mode 100644
index 230fb8a..0000000
--- a/tracker/migrations/0002_top100articlesfileevent.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Generated by Django 5.0.7 on 2024-08-30 21:52
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0002_alter_top100articlesfile_status"),
-        ("tracker", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Top100ArticlesFileEvent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "status",
-                    models.CharField(
-                        blank=True, max_length=64, null=True, verbose_name="Status"
-                    ),
-                ),
-                (
-                    "lines",
-                    models.IntegerField(
-                        blank=True, default=0, null=True, verbose_name="Lines"
-                    ),
-                ),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "file",
-                    models.ForeignKey(
-                        blank=True,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        to="metrics.top100articlesfile",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name_plural": "Top 100 Article File Events",
-            },
-        ),
-    ]
diff --git a/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py b/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py
deleted file mode 100644
index 6e37a9f..0000000
--- a/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("log_manager", "0002_alter_collectionconfig_unique_together_and_more"),
-        ("tracker", "0002_top100articlesfileevent"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="LogFileDiscardedLine",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "error_type",
-                    models.CharField(
-                        blank=True,
-                        choices=[
-                            ("MET", "Missing Metadata"),
-                            ("ART", "Missing Article"),
-                            ("JOU", "Missing Journal"),
-                        ],
-                        max_length=3,
-                        null=True,
-                        verbose_name="Error Type",
-                    ),
-                ),
-                ("data", models.JSONField(default=dict, verbose_name="Data")),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "log_file",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="log_manager.logfile",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "abstract": False,
-            },
-        ),
-        migrations.DeleteModel(
-            name="Top100ArticlesFileEvent",
-        ),
-    ]
diff --git a/tracker/migrations/0004_alter_logfilediscardedline_error_type.py b/tracker/migrations/0004_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index 1061793..0000000
--- a/tracker/migrations/0004_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-27 20:40
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0003_logfilediscardedline_delete_top100articlesfileevent"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing Article"),
-                    ("JOU", "Missing Journal"),
-                    ("URL", "URL Translation"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/migrations/0005_articleevent.py b/tracker/migrations/0005_articleevent.py
deleted file mode 100644
index 859910e..0000000
--- a/tracker/migrations/0005_articleevent.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Generated by Django 5.0.7 on 2025-05-23 17:27
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0004_alter_logfilediscardedline_error_type"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="ArticleEvent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "event_type",
-                    models.CharField(
-                        blank=True,
-                        choices=[
-                            ("MUL", "Multiple Articles Returned"),
-                            ("ERR", "Data Error"),
-                        ],
-                        max_length=3,
-                        null=True,
-                        verbose_name="Event Type",
-                    ),
-                ),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                ("data", models.JSONField(default=dict, verbose_name="Data")),
-                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "abstract": False,
-            },
-        ),
-    ]
diff --git a/tracker/migrations/0006_alter_logfilediscardedline_error_type.py b/tracker/migrations/0006_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index fb7f74a..0000000
--- a/tracker/migrations/0006_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-14 10:46
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0005_articleevent"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing Article"),
-                    ("JOU", "Missing Journal"),
-                    ("URL", "URL Translation"),
-                    ("DBE", "Database Error"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/migrations/0007_alter_logfilediscardedline_error_type.py b/tracker/migrations/0007_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index f9ffebe..0000000
--- a/tracker/migrations/0007_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Generated by Django 5.0.7 on 2025-08-09 21:04
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0006_alter_logfilediscardedline_error_type"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing PIDv2 or PIDv3 or PID Generic"),
-                    ("JOU", "Missing ISSN"),
-                    ("URL", "URL Translation"),
-                    ("DBE", "Database Error"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/models.py b/tracker/models.py
index 77086ee..a394ed6 100644
--- a/tracker/models.py
+++ b/tracker/models.py
@@ -1,65 +1,13 @@
-import json
-import logging
-import traceback
-import uuid
-
-from datetime import datetime
-
-from django.core.files.base import ContentFile
 from django.db import models
 from django.utils.translation import gettext_lazy as _
 
-from core.models import CommonControlField
 from log_manager.models import LogFile
 from tracker import choices
-
-from .exceptions import *
+from .exceptions import LogFileDiscardedLineCreateError
 
 
-class ArticleEvent(CommonControlField):
-    event_type = models.CharField(
-        _("Event Type"),
-        choices=choices.ARTICLE_EVENT_TYPE,
-        max_length=3,
-        null=True,
-        blank=True,
-    )
-
-    message = models.TextField(
-        _("Message"),
-        null=True,
-        blank=True,
-    )
-
-    data = models.JSONField(
-        _("Data"),
-        default=dict,
-    )
-
-    handled = models.BooleanField(
-        _("Handled"),
-        default=False
-    )
-
-    @classmethod
-    def create(cls, event_type, message, data):
-        try:
-            obj = cls()
-            obj.event_type = event_type
-            obj.message = message
-            obj.data = data
-            obj.save()
-        except Exception as exc:
-            raise ArticleEventError(
-                f"Unable to create ArticleEvent ({data} - {event_type} - {message}). EXCEPTION {exc}"
-            )
-        return obj
-        
-    def __str__(self):
-        return f"{self.event_type} - {self.message}"
-
-
-class LogFileDiscardedLine(CommonControlField):
+class LogFileDiscardedLine(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
     log_file = models.ForeignKey(
         LogFile, 
         on_delete=models.CASCADE, 
@@ -108,174 +56,4 @@ def __str__(self):
         return f"{self.data} - {self.message}"
 
 
-class UnexpectedEvent(models.Model):
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
-    exception_type = models.TextField(_("Exception Type"), null=True, blank=True)
-    exception_msg = models.TextField(_("Exception Msg"), null=True, blank=True)
-    traceback = models.JSONField(null=True, blank=True)
-    detail = models.JSONField(null=True, blank=True)
-
-    class Meta:
-        indexes = [
-            models.Index(fields=["exception_type"]),
-        ]
-
-    def __str__(self):
-        return f"{self.exception_msg}"
-
-    @property
-    def data(self):
-        return dict(
-            created=self.created.isoformat(),
-            exception_type=self.exception_type,
-            exception_msg=self.exception_msg,
-            traceback=json.dumps(self.traceback),
-            detail=json.dumps(self.detail),
-        )
-
-    @classmethod
-    def create(
-        cls,
-        exception=None,
-        exc_traceback=None,
-        detail=None,
-    ):
-        try:
-            if exception:
-                logging.exception(exception)
-
-            obj = cls()
-            obj.exception_msg = str(exception)
-            obj.exception_type = str(type(exception))
-            try:
-                json.dumps(detail)
-                obj.detail = detail
-            except Exception as e:
-                obj.detail = str(detail)
 
-            if exc_traceback:
-                obj.traceback = traceback.format_tb(exc_traceback)
-            obj.save()
-            return obj
-        except Exception as exc:
-            raise UnexpectedEventCreateError(
-                f"Unable to create unexpected event ({exception} {exc_traceback}). EXCEPTION {exc}"
-            )
-
-
-class Event(CommonControlField):
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    message = models.TextField(_("Message"), null=True, blank=True)
-    message_type = models.CharField(
-        _("Message type"),
-        choices=choices.EVENT_MSG_TYPE,
-        max_length=16,
-        null=True,
-        blank=True,
-    )
-    detail = models.JSONField(null=True, blank=True)
-    unexpected_event = models.ForeignKey(
-        'UnexpectedEvent', on_delete=models.SET_NULL, null=True, blank=True
-    )
-
-    class Meta:
-        abstract = True
-        indexes = [
-            models.Index(fields=["message_type"]),
-        ]
-
-    @property
-    def data(self):
-        d = {}
-        d["created"] = self.created.isoformat()
-        d["user"] = self.user.username
-        d.update(
-            dict(
-                message=self.message, message_type=self.message_type, detail=self.detail
-            )
-        )
-        if self.unexpected_event:
-            d.update(self.unexpected_event.data)
-        return d
-
-    @classmethod
-    def create(
-        cls,
-        user=None,
-        message_type=None,
-        message=None,
-        e=None,
-        exc_traceback=None,
-        detail=None,
-    ):
-        try:
-            obj = cls()
-            obj.creator = user
-            obj.message = message
-            obj.message_type = message_type
-            obj.detail = detail
-            obj.save()
-
-            if e:
-                logging.exception(f"{message}: {e}")
-                obj.unexpected_event = UnexpectedEvent.create(
-                    exception=e,
-                    exc_traceback=exc_traceback,
-                )
-                obj.save()
-        except Exception as exc:
-            raise EventCreateError(
-                f"Unable to create Event ({message} {e}). EXCEPTION: {exc}"
-            )
-        return obj
-
-
-def tracker_file_directory_path(instance, filename):
-    d = datetime.now(datetime.timezone.utc)
-    return f"tracker/{d.year}/{d.month}/{d.day}/{filename}"
-
-
-class EventReport(CommonControlField):
-    file = models.FileField(
-        upload_to=tracker_file_directory_path, null=True, blank=True
-    )
-
-    class Meta:
-        abstract = True
-
-    def save_file(self, events, ext=None):
-        if not events:
-            return
-        try:
-            ext = ".json"
-            content = json.dumps(list([item.data for item in events]))
-            name = datetime.now(datetime.timezone.utc).isoformat() + ext
-            self.file.save(name, ContentFile(content))
-            self.delete_events(events)
-        except Exception as e:
-            raise EventReportSaveFileError(
-                f"Unable to save EventReport.file ({name}). Exception: {e}"
-            )
-
-    def delete_events(self, events):
-        for item in events:
-            try:
-                item.unexpected_event.delete()
-            except:
-                pass
-            try:
-                item.delete()
-            except:
-                pass
-
-    @classmethod
-    def create(cls, user):
-        try:
-            obj = cls()
-            obj.creator = user
-            obj.save()
-        except Exception as e:
-            raise EventReportCreateError(
-                f"Unable to create EventReport. Exception: {e}"
-            )
diff --git a/tracker/tasks.py b/tracker/tasks.py
deleted file mode 100644
index ace8145..0000000
--- a/tracker/tasks.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# tasks.py
-from datetime import datetime
-
-from django.contrib.auth import get_user_model
-
-from config import celery_app
-from core.utils.utils import _get_user
-
-from .models import UnexpectedEvent
-
-
-User = get_user_model()
-
-
-@celery_app.task(bind=True, name="Cleanup unexpected events")
-def delete_unexpected_events(self, exception_type, start_date=None, end_date=None, user_id=None, username=None):
-    """
-    Delete UnexpectedEvent records based on exception type and optional date range.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    if exception_type == '__all__':
-        UnexpectedEvent.objects.all().delete()
-        return
-
-    filters = {'exception_type__icontains': exception_type}
-    if start_date:
-        start_date = datetime.fromisoformat(start_date)
-        filters['created__gte'] = start_date
-    if end_date:
-        end_date = datetime.fromisoformat(end_date)
-        filters['created__lte'] = end_date
-
-    UnexpectedEvent.objects.filter(**filters).delete()
diff --git a/tracker/wagtail_hooks.py b/tracker/wagtail_hooks.py
index ce1b30f..1ceb9c7 100644
--- a/tracker/wagtail_hooks.py
+++ b/tracker/wagtail_hooks.py
@@ -4,35 +4,9 @@
 
 from config.menu import get_menu_order
 
-from .models import UnexpectedEvent, LogFileDiscardedLine, ArticleEvent
+from .models import LogFileDiscardedLine
 
 
-class UnexpectedEventSnippetViewSet(SnippetViewSet):
-    model = UnexpectedEvent
-    menu_label = _("Unexpected Events")
-    icon = 'warning'
-    menu_order = get_menu_order("tracker")
-    add_to_admin_menu = False
-
-    list_display = (
-        "exception_type",
-        "exception_msg",
-        "traceback",
-        "created",
-    )
-    list_filter = ("exception_type",)
-    search_fields = (
-        "exception_msg",
-        "detail",
-    )
-    inspect_view_fields = (
-        "exception_type",
-        "exception_msg",
-        "traceback",
-        "detail",
-        "created",
-    )
-
 class LogFileDiscardedLineSnippetViewSet(SnippetViewSet):
     model = LogFileDiscardedLine
     menu_label = _("Discarded Lines")
@@ -64,34 +38,7 @@ class LogFileDiscardedLineSnippetViewSet(SnippetViewSet):
         "handled",
     )
 
-class ArticleEventSnippetViewSet(SnippetViewSet):
-    model = ArticleEvent
-    menu_label = _("Article Events")
-    icon = 'warning'
-    menu_order = get_menu_order("tracker")
-    add_to_admin_menu = False
-
-    list_display = (
-        "event_type",
-        "message",
-        "data",
-        "handled",
-    )
-
-    list_filter = (
-        "event_type",
-        "handled",
-    )
 
-    search_fields = (
-        "message",
-    )
-    inspect_view_fields = (
-        "event_type",
-        "message",
-        "data",
-        "handled",
-    )
 
 
 class TrackerSnippetViewSetGroup(SnippetViewSetGroup):
@@ -101,9 +48,7 @@ class TrackerSnippetViewSetGroup(SnippetViewSetGroup):
     menu_order = get_menu_order("tracker")
     
     items = (
-        UnexpectedEventSnippetViewSet,
         LogFileDiscardedLineSnippetViewSet,
-        ArticleEventSnippetViewSet,
     )