diff --git a/.gitignore b/.gitignore index 73bd851ebbdd..67f5004899ee 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,8 @@ GEMINI.md # WANT TO ADD MORE? You can tell Git without adding to this file: # See https://git-scm.com/docs/gitignore # In particular, if you have tools you use, add to $GIT_DIR/info/exclude or use core.excludesFile + +# Azure Blob Storage testing artifacts (local testing only) +AzuriteConfig +__azurite_db_*.json +__blobstorage__/ diff --git a/changelog/unreleased/SOLR-17949-azure-blob-repository.yml b/changelog/unreleased/SOLR-17949-azure-blob-repository.yml new file mode 100644 index 000000000000..c00344a629ff --- /dev/null +++ b/changelog/unreleased/SOLR-17949-azure-blob-repository.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Add Azure Blob Storage backup repository module +type: added +authors: + - name: Prateek Singhal +links: + - name: SOLR-17949 + url: https://issues.apache.org/jira/browse/SOLR-17949 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f1e1d2c08e3e..65731cac576c 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -52,6 +52,7 @@ asciidoctor-mathjax = "0.0.9" # @keep Asciidoctor tabs version used in ref-guide asciidoctor-tabs = "1.0.0-beta.6" azagniotov-langdetect = "12.5.2" +azure-sdk-bom = "1.3.6" # @keep bats-assert version used in packaging bats-assert = "2.0.0" # @keep bats-core version used in packaging @@ -297,6 +298,13 @@ apache-zookeeper-zookeeper = { module = "org.apache.zookeeper:zookeeper", versio # @keep transitive dependency for version alignment apiguardian-api = { module = "org.apiguardian:apiguardian-api", version.ref = "apiguardian" } azagniotov-langdetect = { module = "io.github.azagniotov:language-detection", version.ref = "azagniotov-langdetect" } +azure-core = { module = "com.azure:azure-core" } +azure-core-http-okhttp = { module = "com.azure:azure-core-http-okhttp" } +azure-identity = { module = "com.azure:azure-identity" } +azure-sdk-bom = { module = "com.azure:azure-sdk-bom", version.ref = "azure-sdk-bom" } +azure-storage-blob = { module = "com.azure:azure-storage-blob" } +azure-storage-blob-batch = { module = "com.azure:azure-storage-blob-batch" } +azure-storage-common = { module = "com.azure:azure-storage-common" } bc-jose4j = { module = "org.bitbucket.b_c:jose4j", version.ref = "bc-jose4j" } benmanes-caffeine = { module = "com.github.ben-manes.caffeine:caffeine", version.ref = "benmanes-caffeine" } bouncycastle-bcpkix = { module = "org.bouncycastle:bcpkix-jdk18on", version.ref = "bouncycastle" } @@ -502,6 +510,8 @@ ow2-asm-commons = { module = "org.ow2.asm:asm-commons", version.ref = "ow2-asm" ow2-asm-tree = { module = "org.ow2.asm:asm-tree", version.ref = "ow2-asm" } # @keep transitive dependency for version alignment perfmark-api = { module = "io.perfmark:perfmark-api", version.ref = "perfmark" } +# Version managed by azure-sdk-bom +projectreactor-core = { module = "io.projectreactor:reactor-core" } prometheus-metrics-expositionformats = { module = "io.prometheus:prometheus-metrics-exposition-formats", version.ref = "prometheus-metrics" } prometheus-metrics-model = { module = "io.prometheus:prometheus-metrics-model", version.ref = "prometheus-metrics" } quicktheories-quicktheories = { module = "org.quicktheories:quicktheories", version.ref = "quicktheories" } diff --git a/settings.gradle b/settings.gradle index 782edec43251..ed296cde5ec0 100644 --- a/settings.gradle +++ b/settings.gradle @@ -45,6 +45,7 @@ include "solr:core" include "solr:cross-dc-manager" include "solr:server" include "solr:modules:analysis-extras" +include "solr:modules:azure-blob-repository" include "solr:modules:clustering" include "solr:modules:cross-dc" include "solr:modules:cuvs" diff --git a/solr/licenses/azure-LICENSE-MIT.txt b/solr/licenses/azure-LICENSE-MIT.txt new file mode 100644 index 000000000000..b8b569d7746d --- /dev/null +++ b/solr/licenses/azure-LICENSE-MIT.txt @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Microsoft + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/solr/licenses/azure-core-1.57.1.jar.sha1 b/solr/licenses/azure-core-1.57.1.jar.sha1 new file mode 100644 index 000000000000..d17089dd72ff --- /dev/null +++ b/solr/licenses/azure-core-1.57.1.jar.sha1 @@ -0,0 +1 @@ +abbbea38f58a257ea125450b2e8faa79a55062f5 diff --git a/solr/licenses/azure-core-http-okhttp-1.13.3.jar.sha1 b/solr/licenses/azure-core-http-okhttp-1.13.3.jar.sha1 new file mode 100644 index 000000000000..7988f219ff4d --- /dev/null +++ b/solr/licenses/azure-core-http-okhttp-1.13.3.jar.sha1 @@ -0,0 +1 @@ +32029fabf625aa0ad9109038e080d94976148d9d diff --git a/solr/licenses/azure-identity-1.18.2.jar.sha1 b/solr/licenses/azure-identity-1.18.2.jar.sha1 new file mode 100644 index 000000000000..500b3596aa6a --- /dev/null +++ b/solr/licenses/azure-identity-1.18.2.jar.sha1 @@ -0,0 +1 @@ +5a057c0d1e2ea2105a53a79d70420207d7e03f17 diff --git a/solr/licenses/azure-json-1.5.1.jar.sha1 b/solr/licenses/azure-json-1.5.1.jar.sha1 new file mode 100644 index 000000000000..450773c15e31 --- /dev/null +++ b/solr/licenses/azure-json-1.5.1.jar.sha1 @@ -0,0 +1 @@ +29c6d074e9c72d877e0a6bfd65e725b9e34c7a4c diff --git a/solr/licenses/azure-storage-blob-12.33.3.jar.sha1 b/solr/licenses/azure-storage-blob-12.33.3.jar.sha1 new file mode 100644 index 000000000000..35e91009d860 --- /dev/null +++ b/solr/licenses/azure-storage-blob-12.33.3.jar.sha1 @@ -0,0 +1 @@ +0dd4a9ec49ec0c9e0420757374ee747ea37c54ae diff --git a/solr/licenses/azure-storage-blob-batch-12.29.3.jar.sha1 b/solr/licenses/azure-storage-blob-batch-12.29.3.jar.sha1 new file mode 100644 index 000000000000..5dca5706a8db --- /dev/null +++ b/solr/licenses/azure-storage-blob-batch-12.29.3.jar.sha1 @@ -0,0 +1 @@ +6cdfd2e89fc2ecb7278b02aba490d71e938ceacf diff --git a/solr/licenses/azure-storage-common-12.32.2.jar.sha1 b/solr/licenses/azure-storage-common-12.32.2.jar.sha1 new file mode 100644 index 000000000000..7f60233c8735 --- /dev/null +++ b/solr/licenses/azure-storage-common-12.32.2.jar.sha1 @@ -0,0 +1 @@ +44a842f25175000c8678daacdccf11829d3dcf4d diff --git a/solr/licenses/azure-storage-internal-avro-12.18.2.jar.sha1 b/solr/licenses/azure-storage-internal-avro-12.18.2.jar.sha1 new file mode 100644 index 000000000000..c669fbe3420f --- /dev/null +++ b/solr/licenses/azure-storage-internal-avro-12.18.2.jar.sha1 @@ -0,0 +1 @@ +bde92a7cd189bbc27ee0f87ef72ea36884ee1b1b diff --git a/solr/licenses/azure-xml-1.2.1.jar.sha1 b/solr/licenses/azure-xml-1.2.1.jar.sha1 new file mode 100644 index 000000000000..9e82d9dabf14 --- /dev/null +++ b/solr/licenses/azure-xml-1.2.1.jar.sha1 @@ -0,0 +1 @@ +053ffe8a1d5cb26a0fd94a40db7eeb7b6ae715f3 diff --git a/solr/licenses/msal4j-1.23.1.jar.sha1 b/solr/licenses/msal4j-1.23.1.jar.sha1 new file mode 100644 index 000000000000..04c49543b817 --- /dev/null +++ b/solr/licenses/msal4j-1.23.1.jar.sha1 @@ -0,0 +1 @@ +6c722b514873b24a4e1ce9c22dca36ea3c22bdbe diff --git a/solr/licenses/msal4j-LICENSE-MIT.txt b/solr/licenses/msal4j-LICENSE-MIT.txt new file mode 100644 index 000000000000..ad22b888b221 --- /dev/null +++ b/solr/licenses/msal4j-LICENSE-MIT.txt @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE + diff --git a/solr/licenses/reactor-LICENSE-ASL.txt b/solr/licenses/reactor-LICENSE-ASL.txt new file mode 100644 index 000000000000..1eef70a9b9f4 --- /dev/null +++ b/solr/licenses/reactor-LICENSE-ASL.txt @@ -0,0 +1,206 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Note: Other license terms may apply to certain, identified software files contained within or distributed + with the accompanying software if such terms are included in the directory containing the accompanying software. + Such other license terms will then apply in lieu of the terms of the software license above. diff --git a/solr/licenses/reactor-NOTICE.txt b/solr/licenses/reactor-NOTICE.txt new file mode 100644 index 000000000000..990ac4433824 --- /dev/null +++ b/solr/licenses/reactor-NOTICE.txt @@ -0,0 +1,7 @@ +Project Reactor +Copyright (c) 2011-2024 VMware Inc. or its affiliates, All Rights Reserved. + +This product includes software developed at +VMware Inc. (https://github.com/reactor) + +Licensed under the Apache License 2.0 diff --git a/solr/licenses/reactor-core-3.7.14.jar.sha1 b/solr/licenses/reactor-core-3.7.14.jar.sha1 new file mode 100644 index 000000000000..4b3338d7b682 --- /dev/null +++ b/solr/licenses/reactor-core-3.7.14.jar.sha1 @@ -0,0 +1 @@ +0fbc7e6ce98e3e4a4d9d061b386b3baf410e9bf0 diff --git a/solr/modules/azure-blob-repository/README.md b/solr/modules/azure-blob-repository/README.md new file mode 100644 index 000000000000..806547c1201c --- /dev/null +++ b/solr/modules/azure-blob-repository/README.md @@ -0,0 +1,33 @@ + + +# Apache Solr Azure Blob Storage Backup Repository + +A `BackupRepository` implementation for storing Solr backups in Azure Blob Storage. + +Enable the module: + +```bash +export SOLR_MODULES=azure-blob-repository +``` + +End-user documentation -- configuration, the supported authentication methods (connection +string, account key, SAS token, and Azure Identity), the Security Manager limitation, and +troubleshooting -- lives in the Solr Reference Guide, under the "Backup/Restore" page in the +`AzureBlobBackupRepository` section: + +https://solr.apache.org/guide/solr/latest/deployment-guide/backup-restore.html diff --git a/solr/modules/azure-blob-repository/build.gradle b/solr/modules/azure-blob-repository/build.gradle new file mode 100644 index 000000000000..10f9817dab59 --- /dev/null +++ b/solr/modules/azure-blob-repository/build.gradle @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +apply plugin: 'java-library' + +description = 'Azure Blob Storage Repository' + +ext { + // Disable security manager for azure-blob-repository module tests + // Required because Testcontainers needs access to Docker socket and system properties + useSecurityManager = false +} + +dependencies { + implementation platform(project(':platform')) + api(project(':solr:core')) + implementation project(':solr:solrj') + + implementation libs.apache.lucene.core + + implementation platform(libs.azure.sdk.bom) + + // Azure Storage SDK dependencies + implementation(libs.azure.storage.blob) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation(libs.azure.storage.blob.batch) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation(libs.azure.identity) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + exclude group: 'com.microsoft.azure', module: 'msal4j-persistence-extension' + exclude group: 'net.java.dev.jna', module: 'jna-platform' + } + implementation(libs.azure.core) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation(libs.azure.storage.common) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + + implementation libs.google.guava + implementation libs.slf4j.api + + runtimeOnly libs.azure.core.http.okhttp + runtimeOnly libs.fasterxml.woodstox.core + runtimeOnly libs.codehaus.woodstox.stax2api + + testImplementation project(':solr:test-framework') + testImplementation libs.junit.junit + testImplementation libs.commonsio.commonsio + + testImplementation libs.azure.core.http.okhttp + testImplementation libs.squareup.okhttp3.okhttp.jvm + + // Testcontainers for Azurite integration testing + testImplementation libs.testcontainers + + // Explicit transitive test dependencies for dependency analyzer + testImplementation libs.carrotsearch.randomizedtesting.runner + testImplementation libs.apache.lucene.testframework +} \ No newline at end of file diff --git a/solr/modules/azure-blob-repository/gradle.lockfile b/solr/modules/azure-blob-repository/gradle.lockfile new file mode 100644 index 000000000000..84ab8d019ee5 --- /dev/null +++ b/solr/modules/azure-blob-repository/gradle.lockfile @@ -0,0 +1,192 @@ +# This is a Gradle generated file for dependency locking. +# Manual edits can break the build and are not advised. +# This file is expected to be part of source control. +# To regenerate this file, run: ./gradlew :solr:modules:azure-blob-repository:dependencies --write-locks +com.azure:azure-core-http-okhttp:1.13.3=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-core:1.57.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-identity:1.18.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-json:1.5.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-sdk-bom:1.3.6=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-blob-batch:12.29.3=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-blob:12.33.3=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-common:12.32.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-internal-avro:12.18.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-xml:1.2.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.8.4=jarValidation,testCompileClasspath,testRuntimeClasspath +com.carrotsearch:hppc:0.10.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-annotations:2.22=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-core:2.22.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-databind:2.22.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.22.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.22.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.22.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.module:jackson-module-jakarta-xmlbind-annotations:2.22.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson:jackson-bom:2.22.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.woodstox:woodstox-core:7.2.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.github.ben-manes.caffeine:caffeine:3.2.3=annotationProcessor,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testRuntimeClasspath +com.github.docker-java:docker-java-api:3.7.1=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.docker-java:docker-java-transport-zerodep:3.7.1=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.docker-java:docker-java-transport:3.7.1=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.kevinstern:software-and-algorithms:1.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.auto.service:auto-service-annotations:1.0.1=annotationProcessor,errorprone,testAnnotationProcessor +com.google.auto.value:auto-value-annotations:1.11.1=annotationProcessor,errorprone,testAnnotationProcessor +com.google.auto:auto-common:1.2.2=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_annotation:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_annotations:2.47.0=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.errorprone:error_prone_check_api:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_core:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.googlejavaformat:google-java-format:1.27.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.guava:failureaccess:1.0.3=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.guava:guava:33.6.0-jre=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.j2objc:j2objc-annotations:3.1=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.protobuf:protobuf-java:4.35.1=annotationProcessor,errorprone,testAnnotationProcessor +com.j256.simplemagic:simplemagic:1.17=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.jayway.jsonpath:json-path:3.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.lmax:disruptor:4.0.0=solrPlatformLibs +com.microsoft.azure:msal4j:1.23.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okhttp3:okhttp-jvm:5.4.0=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okhttp3:okhttp:5.4.0=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okio:okio-jvm:3.17.0=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okio:okio:3.17.0=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.tdunning:t-digest:3.3=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +commons-cli:commons-cli:1.11.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +commons-codec:commons-codec:1.22.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +commons-io:commons-io:2.22.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.dropwizard.metrics:metrics-core:4.2.39=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.github.eisop:dataflow-errorprone:3.41.0-eisop1=annotationProcessor,errorprone,testAnnotationProcessor +io.github.java-diff-utils:java-diff-utils:4.12=annotationProcessor,errorprone,testAnnotationProcessor +io.netty:netty-buffer:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-codec-base:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-common:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-handler:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-resolver:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-tcnative-boringssl-static:2.0.79.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-tcnative-classes:2.0.79.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-classes-epoll:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-native-epoll:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-native-unix-common:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport:4.2.15.Final=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-instrumentation-api-incubator:2.22.0-alpha=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-instrumentation-api:2.22.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-runtime-telemetry-java17:2.22.0-alpha=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-runtime-telemetry-java8:2.22.0-alpha=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.semconv:opentelemetry-semconv:1.37.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-api-incubator:1.56.0-alpha=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-api:1.56.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-common:1.56.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-context:1.56.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-exporter-prometheus:1.56.0-alpha=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-common:1.56.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-metrics:1.56.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-trace:1.56.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk:1.56.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.projectreactor:reactor-core:3.7.14=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +io.prometheus:prometheus-metrics-exposition-formats:1.1.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.prometheus:prometheus-metrics-model:1.1.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.sgr:s2-geometry-library-java:1.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.swagger.core.v3:swagger-annotations-jakarta:2.2.52=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +jakarta.activation:jakarta.activation-api:2.1.3=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.annotation:jakarta.annotation-api:3.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.inject:jakarta.inject-api:2.0.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.servlet:jakarta.servlet-api:6.1.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.validation:jakarta.validation-api:3.1.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.ws.rs:jakarta.ws.rs-api:4.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.xml.bind:jakarta.xml.bind-api:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +javax.inject:javax.inject:1=annotationProcessor,errorprone,testAnnotationProcessor +junit:junit:4.13.2=jarValidation,testCompileClasspath,testRuntimeClasspath +net.java.dev.jna:jna:5.19.1=jarValidation,testCompileClasspath,testRuntimeClasspath +org.antlr:antlr4-runtime:4.13.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.commons:commons-compress:1.28.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.apache.commons:commons-exec:1.6.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.commons:commons-lang3:3.20.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.commons:commons-math3:3.6.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.curator:curator-client:5.9.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.curator:curator-framework:5.9.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.curator:curator-test:5.9.0=jarValidation,testRuntimeClasspath +org.apache.logging.log4j:log4j-1.2-api:2.26.0=solrPlatformLibs +org.apache.logging.log4j:log4j-api:2.26.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-core:2.26.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-layout-template-json:2.26.0=solrPlatformLibs +org.apache.logging.log4j:log4j-slf4j2-impl:2.26.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-web:2.26.0=solrPlatformLibs +org.apache.lucene:lucene-analysis-common:10.4.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-analysis-kuromoji:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-analysis-nori:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-analysis-phonetic:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-backward-codecs:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-classification:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-codecs:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-core:10.4.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-expressions:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-facet:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-grouping:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-highlighter:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-join:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-memory:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-misc:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-queries:10.4.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-queryparser:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-sandbox:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-spatial-extras:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-spatial3d:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-suggest:10.4.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-test-framework:10.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.apache.zookeeper:zookeeper-jute:3.9.5=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.zookeeper:zookeeper:3.9.5=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apiguardian:apiguardian-api:1.1.2=jarValidation,testRuntimeClasspath +org.codehaus.woodstox:stax2-api:4.3.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty.ee10:jetty-ee10-servlet:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-client-transport:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-client:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-common:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-hpack:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-client:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-java-client:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-java-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-client:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-http:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-io:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-rewrite:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-security:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-server:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-session:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-util:12.0.34=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.glassfish.hk2.external:aopalliance-repackaged:4.0.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-api:4.0.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-locator:4.0.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-utils:4.0.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:osgi-resource-locator:3.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.containers:jersey-container-jetty-http:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-client:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-common:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-server:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.ext:jersey-entity-filtering:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.inject:jersey-hk2:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.media:jersey-media-json-jackson:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey:jersey-bom:4.0.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.hamcrest:hamcrest:3.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.javassist:javassist:3.30.2-GA=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.jetbrains.kotlin:kotlin-stdlib:2.2.21=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jetbrains:annotations:26.0.2=jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jspecify:jspecify:1.0.0=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +org.junit.jupiter:junit-jupiter-api:5.6.2=jarValidation,testRuntimeClasspath +org.junit.platform:junit-platform-commons:1.6.2=jarValidation,testRuntimeClasspath +org.junit:junit-bom:5.6.2=jarValidation,testRuntimeClasspath +org.locationtech.spatial4j:spatial4j:0.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.opentest4j:opentest4j:1.2.0=jarValidation,testRuntimeClasspath +org.ow2.asm:asm-commons:9.10.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.ow2.asm:asm-tree:9.10.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.ow2.asm:asm:9.10.1=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.pcollections:pcollections:4.0.1=annotationProcessor,errorprone,testAnnotationProcessor +org.reactivestreams:reactive-streams:1.0.4=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.rnorth.duct-tape:duct-tape:1.0.8=jarValidation,testCompileClasspath,testRuntimeClasspath +org.semver4j:semver4j:6.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.slf4j:jcl-over-slf4j:2.0.17=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.slf4j:jul-to-slf4j:2.0.17=solrPlatformLibs +org.slf4j:slf4j-api:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.testcontainers:testcontainers:2.0.5=jarValidation,testCompileClasspath,testRuntimeClasspath +org.xerial.snappy:snappy-java:1.1.10.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +empty=missingdoclet,packaging diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java new file mode 100644 index 000000000000..f11a47f02f47 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.google.common.annotations.VisibleForTesting; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.backup.repository.AbstractBackupRepository; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A concrete implementation of {@link BackupRepository} interface supporting backup/restore of Solr + * indexes to Azure Blob Storage. + */ +public class AzureBlobBackupRepository extends AbstractBackupRepository { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static final String BLOB_SCHEME = "blob"; + private static final int COPY_BUFFER_SIZE = 8192; + + private AzureBlobStorageClient client; + + @Override + public void init(NamedList args) { + super.init(args); + AzureBlobBackupRepositoryConfig backupConfig = new AzureBlobBackupRepositoryConfig(this.config); + + if (client != null) { + client.close(); + } + + this.client = backupConfig.buildClient(); + } + + @VisibleForTesting + public void setClient(AzureBlobStorageClient client) { + this.client = client; + } + + @Override + @SuppressWarnings("unchecked") + public T getConfigProperty(String name) { + return (T) this.config.get(name); + } + + @Override + public URI createURI(String location) { + if (StrUtils.isNullOrEmpty(location)) { + throw new IllegalArgumentException("cannot create URI with an empty location"); + } + + URI result; + try { + if (location.startsWith(BLOB_SCHEME + ":")) { + result = new URI(location); + } else if (location.startsWith("/")) { + result = new URI(BLOB_SCHEME, "", location, null); + } else { + result = new URI(BLOB_SCHEME, "", "/" + location, null); + } + return result; + } catch (URISyntaxException ex) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ex); + } + } + + @Override + public URI createDirectoryURI(String location) { + if (StrUtils.isNullOrEmpty(location)) { + throw new IllegalArgumentException("cannot create URI with an empty location"); + } + + if (!location.endsWith("/")) { + location += "/"; + } + + return createURI(location); + } + + @Override + public URI resolve(URI baseUri, String... pathComponents) { + if (!BLOB_SCHEME.equalsIgnoreCase(baseUri.getScheme())) { + throw new IllegalArgumentException("URI must begin with 'blob:' scheme"); + } + + // If paths contain unnecessary '/' separators, they'll be removed by URI.normalize() + String path = baseUri + "/" + String.join("/", pathComponents); + return URI.create(path).normalize(); + } + + @Override + public URI resolveDirectory(URI baseUri, String... pathComponents) { + if (pathComponents.length > 0) { + if (!pathComponents[pathComponents.length - 1].endsWith("/")) { + pathComponents[pathComponents.length - 1] = pathComponents[pathComponents.length - 1] + "/"; + } + } else { + if (!baseUri.toString().endsWith("/")) { + baseUri = URI.create(baseUri + "/"); + } + } + return resolve(baseUri, pathComponents); + } + + @Override + public void createDirectory(URI path) throws IOException { + Objects.requireNonNull(path, "cannot create directory to a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Create directory '{}'", blobPath); + } + + client.createDirectory(blobPath); + } + + @Override + public void deleteDirectory(URI path) throws IOException { + Objects.requireNonNull(path, "cannot delete directory with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Delete directory '{}'", blobPath); + } + + client.deleteDirectory(blobPath); + } + + @Override + public void delete(URI path, Collection files) throws IOException { + Objects.requireNonNull(path, "cannot delete with a null URI"); + Objects.requireNonNull(files, "cannot delete with a null files collection"); + + Set fullPaths = + files.stream() + .map(file -> resolve(path, file)) + .map(this::getBlobPath) + .collect(Collectors.toSet()); + + if (log.isDebugEnabled()) { + log.debug("Delete files '{}'", fullPaths); + } + + try { + client.delete(fullPaths); + } catch (AzureBlobNotFoundException e) { + // Deleting files that are already absent is a no-op at the repository level, matching the + // lenient behavior of the local-filesystem and S3 repositories. Any present files in the + // batch were still removed before this was thrown. + if (log.isDebugEnabled()) { + log.debug("Some files requested for deletion were already absent", e); + } + } + } + + @Override + public boolean exists(URI path) throws IOException { + Objects.requireNonNull(path, "cannot check existence with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Check existence '{}'", blobPath); + } + + return client.pathExists(blobPath); + } + + @Override + public PathType getPathType(URI path) throws IOException { + Objects.requireNonNull(path, "cannot get path type with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Get path type '{}'", blobPath); + } + + if (client.isDirectory(blobPath)) { + return BackupRepository.PathType.DIRECTORY; + } else { + return BackupRepository.PathType.FILE; + } + } + + @Override + public String[] listAll(URI path) throws IOException { + Objects.requireNonNull(path, "cannot list with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("List all '{}'", blobPath); + } + + return client.listDir(blobPath); + } + + @Override + public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException { + Objects.requireNonNull(dirPath, "cannot open input with a null URI"); + Objects.requireNonNull(fileName, "cannot open input with a null fileName"); + + String base = getBlobPath(dirPath); + String blobPath = base.endsWith("/") ? base + fileName : base + "/" + fileName; + + if (log.isDebugEnabled()) { + log.debug("Open input '{}'", blobPath); + } + + return new AzureBlobIndexInput(client, blobPath, client.length(blobPath)); + } + + @Override + public OutputStream createOutput(URI path) throws IOException { + Objects.requireNonNull(path, "cannot create output with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Create output '{}'", blobPath); + } + + return client.pushStream(blobPath); + } + + @Override + public void copyIndexFileFrom( + Directory sourceDir, String sourceFileName, URI dest, String destFileName) + throws IOException { + Objects.requireNonNull(sourceDir, "cannot copy with a null sourceDir"); + Objects.requireNonNull(dest, "cannot copy with a null dest"); + if (StrUtils.isNullOrEmpty(sourceFileName)) { + throw new IllegalArgumentException("must have a valid source file name to copy"); + } + if (StrUtils.isNullOrEmpty(destFileName)) { + throw new IllegalArgumentException("must have a valid destination file name to copy"); + } + + URI filePath = resolve(dest, destFileName); + String blobPath = getBlobPath(filePath); + + if (log.isDebugEnabled()) { + log.debug("Copy index file from '{}' to '{}'", sourceFileName, blobPath); + } + + String parentDir = + blobPath.contains("/") ? blobPath.substring(0, blobPath.lastIndexOf('/') + 1) : ""; + try { + if (!parentDir.isEmpty()) { + client.createDirectory(parentDir); + } + } catch (AzureBlobException e) { + // ignore; write will surface real issues + } + + try (IndexInput input = + shouldVerifyChecksum + ? sourceDir.openChecksumInput(sourceFileName) + : sourceDir.openInput(sourceFileName, IOContext.READONCE)) { + if (input.length() <= CodecUtil.footerLength()) { + throw new CorruptIndexException("file is too small:" + input.length(), input); + } + + try (OutputStream output = client.pushStream(blobPath)) { + byte[] buffer = new byte[COPY_BUFFER_SIZE]; + long remaining = + shouldVerifyChecksum ? input.length() - CodecUtil.footerLength() : input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + output.write(buffer, 0, toRead); + remaining -= toRead; + } + if (shouldVerifyChecksum) { + long checksum = CodecUtil.checkFooter((ChecksumIndexInput) input); + writeFooter(checksum, output); + } + } + } + } + + @Override + public void copyIndexFileTo( + URI sourceDir, String sourceFileName, Directory dest, String destFileName) + throws IOException { + if (StrUtils.isNullOrEmpty(sourceFileName)) { + throw new IllegalArgumentException("must have a valid source file name to copy"); + } + if (StrUtils.isNullOrEmpty(destFileName)) { + throw new IllegalArgumentException("must have a valid destination file name to copy"); + } + + String basePath = getBlobPath(sourceDir); + String blobPath; + if (basePath.endsWith("/" + sourceFileName) + || basePath.equals(sourceFileName) + || basePath.equals("/" + sourceFileName)) { + blobPath = basePath; + } else { + URI filePath = resolve(sourceDir, sourceFileName); + blobPath = getBlobPath(filePath); + } + + Instant start = Instant.now(); + if (log.isDebugEnabled()) { + log.debug("Download started from blob '{}'", blobPath); + } + + try (InputStream inputStream = client.pullStream(blobPath); + IndexOutput indexOutput = dest.createOutput(destFileName, IOContext.DEFAULT)) { + byte[] buffer = new byte[COPY_BUFFER_SIZE]; + int len; + while ((len = inputStream.read(buffer)) != -1) { + indexOutput.writeBytes(buffer, 0, len); + } + } + + long timeElapsed = Duration.between(start, Instant.now()).toMillis(); + + if (log.isInfoEnabled()) { + log.info("Download from Azure Blob Storage '{}' finished in {}ms", blobPath, timeElapsed); + } + } + + @Override + public void close() throws IOException { + if (client != null) { + client.close(); + } + } + + private String getBlobPath(URI uri) { + if (!BLOB_SCHEME.equalsIgnoreCase(uri.getScheme())) { + throw new IllegalArgumentException("URI must begin with 'blob:' scheme"); + } + // Depending on the scheme, the first path element may be parsed as the URI host (e.g. + // "blob://dir/file" -> host="dir"). Fold it back into the path, mirroring S3BackupRepository. + String host = uri.getHost(); + return host == null ? uri.getPath() : host + uri.getPath(); + } + + private void writeFooter(long checksum, OutputStream outputStream) throws IOException { + IndexOutput out = + new IndexOutput("", "") { + @Override + public void writeByte(byte b) throws IOException { + outputStream.write(b); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + outputStream.write(b, offset, length); + } + + @Override + public void close() {} + + @Override + public long getFilePointer() { + return 0; + } + + @Override + public long getChecksum() { + return checksum; + } + }; + CodecUtil.writeFooter(out); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java new file mode 100644 index 000000000000..a4cfac3f4a9f --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import org.apache.solr.common.util.EnvUtils; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; + +public class AzureBlobBackupRepositoryConfig { + + public static final String CONTAINER_NAME = "azure.blob.container.name"; + public static final String CONNECTION_STRING = "azure.blob.connection.string"; + public static final String ENDPOINT = "azure.blob.endpoint"; + public static final String ACCOUNT_NAME = "azure.blob.account.name"; + public static final String ACCOUNT_KEY = "azure.blob.account.key"; + public static final String SAS_TOKEN = "azure.blob.sas.token"; + public static final String TENANT_ID = "azure.blob.tenant.id"; + public static final String CLIENT_ID = "azure.blob.client.id"; + public static final String CLIENT_SECRET = "azure.blob.client.secret"; + + private final String containerName; + private final String connectionString; + private final String endpoint; + private final String accountName; + private final String accountKey; + private final String sasToken; + private final String tenantId; + private final String clientId; + private final String clientSecret; + + public AzureBlobBackupRepositoryConfig(NamedList config) { + containerName = getStringConfig(config, CONTAINER_NAME); + connectionString = getStringConfig(config, CONNECTION_STRING); + endpoint = getStringConfig(config, ENDPOINT); + accountName = getStringConfig(config, ACCOUNT_NAME); + accountKey = getStringConfig(config, ACCOUNT_KEY); + sasToken = getStringConfig(config, SAS_TOKEN); + tenantId = getStringConfig(config, TENANT_ID); + clientId = getStringConfig(config, CLIENT_ID); + clientSecret = getStringConfig(config, CLIENT_SECRET); + } + + public AzureBlobStorageClient buildClient() { + if (StrUtils.isNullOrEmpty(containerName)) { + throw new IllegalArgumentException( + "Missing required configuration '" + + CONTAINER_NAME + + "' for the Azure Blob backup repository"); + } + return new AzureBlobStorageClient( + containerName, + connectionString, + endpoint, + accountName, + accountKey, + sasToken, + tenantId, + clientId, + clientSecret); + } + + static String getStringConfig(NamedList config, String property) { + String envProp = EnvUtils.getProperty(property); + if (envProp == null) { + Object configProp = config.get(property); + return configProp == null ? null : configProp.toString(); + } else { + return envProp; + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java new file mode 100644 index 000000000000..ebdcda28a338 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.IOException; + +/** + * Generic exception for Blob Storage related failures. Could originate from the {@link + * AzureBlobBackupRepository} or from its underlying {@link AzureBlobStorageClient}. + */ +public class AzureBlobException extends IOException { + public AzureBlobException(Throwable cause) { + super(cause); + } + + public AzureBlobException(String message) { + super(message); + } + + public AzureBlobException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java new file mode 100644 index 000000000000..15879c059e3a --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Locale; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.BufferedIndexInput; +import org.apache.lucene.store.IndexInput; + +/** + * {@link BufferedIndexInput} implementation that reads from a single blob in Azure Blob Storage, + * lazily opening per-instance HTTP range streams via {@link AzureBlobStorageClient}. + */ +class AzureBlobIndexInput extends BufferedIndexInput { + + static final int DEFAULT_BUFFER_SIZE = 64 * 1024; + static final int LOCAL_BUFFER_SIZE = 16 * 1024; + + private final AzureBlobStorageClient client; + private final String path; + + private final long absoluteOffset; + private final long length; + + private InputStream inputStream; + private long streamAbsolutePos = -1L; + private boolean closed = false; + + AzureBlobIndexInput(AzureBlobStorageClient client, String path, long length) { + this(client, path, 0L, length, "AzureBlobIndexInput(" + path + ")", DEFAULT_BUFFER_SIZE); + } + + private AzureBlobIndexInput( + AzureBlobStorageClient client, + String path, + long absoluteOffset, + long length, + String resourceDescription, + int bufferSize) { + super(resourceDescription, bufferSize); + this.client = client; + this.path = path; + this.absoluteOffset = absoluteOffset; + this.length = length; + } + + @Override + protected void readInternal(ByteBuffer dst) throws IOException { + if (closed) { + throw new AlreadyClosedException("Already closed: " + this); + } + + int expectedLength = dst.remaining(); + if (expectedLength == 0) { + return; + } + + long targetAbsolutePos = absoluteOffset + getFilePointer(); + ensureStreamAt(targetAbsolutePos); + + byte[] localBuffer = null; + try { + while (dst.hasRemaining()) { + int read; + if (dst.hasArray()) { + read = inputStream.read(dst.array(), dst.arrayOffset() + dst.position(), dst.remaining()); + } else { + if (localBuffer == null) { + localBuffer = new byte[LOCAL_BUFFER_SIZE]; + } + read = inputStream.read(localBuffer, 0, Math.min(dst.remaining(), localBuffer.length)); + } + + if (read <= 0) { + break; + } + + if (dst.hasArray()) { + dst.position(dst.position() + read); + } else { + dst.put(localBuffer, 0, read); + } + streamAbsolutePos += read; + } + + if (dst.remaining() > 0) { + throw new EOFException( + String.format( + Locale.ROOT, + "read past EOF: expected %d bytes at pos %d but only got %d (length=%d): %s", + expectedLength, + targetAbsolutePos, + expectedLength - dst.remaining(), + length, + this)); + } + } catch (IOException | RuntimeException e) { + closeStream(); + throw e; + } + } + + @Override + protected void seekInternal(long pos) throws IOException { + if (closed) { + throw new AlreadyClosedException("Already closed: " + this); + } + if (pos < 0 || pos > length) { + throw new EOFException("read past EOF: pos=" + pos + " vs length=" + length + ": " + this); + } + + closeStream(); + } + + private void ensureStreamAt(long targetAbsolutePos) throws IOException { + if (inputStream != null && streamAbsolutePos == targetAbsolutePos) { + return; + } + + closeStream(); + + long remaining = (absoluteOffset + length) - targetAbsolutePos; + if (remaining <= 0) { + throw new EOFException( + "read past EOF: pos=" + targetAbsolutePos + " vs end=" + (absoluteOffset + length)); + } + + inputStream = client.pullRangeStream(path, targetAbsolutePos, remaining); + streamAbsolutePos = targetAbsolutePos; + } + + private void closeStream() { + if (inputStream != null) { + try { + inputStream.close(); + } catch (IOException ignored) { + // best-effort + } + inputStream = null; + streamAbsolutePos = -1L; + } + } + + @Override + public final long length() { + return length; + } + + @Override + public AzureBlobIndexInput clone() { + AzureBlobIndexInput clone = (AzureBlobIndexInput) super.clone(); + clone.inputStream = null; + clone.streamAbsolutePos = -1L; + return clone; + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + if (closed) { + throw new AlreadyClosedException("Already closed: " + this); + } + + if (offset < 0 || length < 0 || length > this.length - offset) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "slice() %s out of bounds: offset=%d,length=%d,fileLength=%d: %s", + sliceDescription, + offset, + length, + this.length, + this)); + } + return new AzureBlobIndexInput( + client, + path, + this.absoluteOffset + offset, + length, + getFullSliceDescription(sliceDescription), + getBufferSize()); + } + + @Override + public void close() throws IOException { + closed = true; + closeStream(); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java new file mode 100644 index 000000000000..e28e6a7bd480 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +/** Exception thrown when a blob is not found in Azure Blob Storage. */ +public class AzureBlobNotFoundException extends AzureBlobException { + public AzureBlobNotFoundException(Throwable cause) { + super(cause); + } + + public AzureBlobNotFoundException(String message) { + super(message); + } + + public AzureBlobNotFoundException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java new file mode 100644 index 000000000000..f8363ca8d7d1 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.util.BinaryData; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.specialized.BlockBlobClient; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * OutputStream implementation for Azure Blob Storage using block blobs. Supports chunked uploads + * for large files. + */ +class AzureBlobOutputStream extends OutputStream { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final int BLOCK_SIZE = 4 * 1024 * 1024; + + private final BlobClient blobClient; + private final String blobPath; + private boolean closed; + private final ByteBuffer buffer; + private BlockUpload blockUpload; + + AzureBlobOutputStream(BlobClient blobClient, String blobPath) { + this.blobClient = blobClient; + this.blobPath = blobPath; + this.closed = false; + this.buffer = ByteBuffer.allocate(BLOCK_SIZE); + this.blockUpload = null; + + if (log.isDebugEnabled()) { + log.debug("Created BlobOutputStream for blobPath '{}'", blobPath); + } + } + + @Override + public void write(int b) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + buffer.put((byte) b); + + if (!buffer.hasRemaining()) { + uploadBlock(); + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + if (outOfRange(off, b.length) || len < 0 || outOfRange(off + len, b.length)) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + int currentOffset = off; + int lenRemaining = len; + while (buffer.remaining() < lenRemaining) { + int firstPart = buffer.remaining(); + buffer.put(b, currentOffset, firstPart); + uploadBlock(); + + currentOffset += firstPart; + lenRemaining -= firstPart; + } + if (lenRemaining > 0) { + buffer.put(b, currentOffset, lenRemaining); + } + } + + private static boolean outOfRange(int off, int len) { + return off < 0 || off > len; + } + + private void uploadBlock() throws IOException { + int size = buffer.position(); + + if (size == 0) { + return; + } + + if (blockUpload == null) { + if (log.isDebugEnabled()) { + log.debug("New block upload for blobPath '{}'", blobPath); + } + + blockUpload = new BlockUpload(); + } + + BinaryData data = BinaryData.fromByteBuffer(ByteBuffer.wrap(buffer.array(), 0, size)); + try { + blockUpload.uploadBlock(data); + } catch (IOException | RuntimeException e) { + blockUpload.markFailed(); + if (log.isDebugEnabled()) { + log.debug("Block upload marked as failed for blobPath '{}'.", blobPath); + } + throw e; + } + + buffer.clear(); + } + + @Override + public void flush() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + // Intentionally a no-op. Full blocks are staged as the buffer fills in write(), and the + // partial tail is staged in close(). Staging on every flush() would create tiny blocks and a + // frequently-flushing caller could exhaust Azure's 50,000-committed-block limit on small files. + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + + try { + if (blockUpload != null && blockUpload.failed) { + blockUpload = null; + return; + } + + // Stage any remaining buffered bytes as the final block. + uploadBlock(); + + if (blockUpload != null) { + blockUpload.complete(); + blockUpload = null; + } else { + try { + blobClient.upload(BinaryData.fromBytes(new byte[0]), true); + } catch (BlobStorageException e) { + throw new IOException( + "Failed to create empty blob", AzureBlobStorageClient.handleBlobException(e)); + } + } + } finally { + closed = true; + } + } + + private class BlockUpload { + private final List blockIds; + private boolean failed = false; + + BlockUpload() { + this.blockIds = new ArrayList<>(); + if (log.isDebugEnabled()) { + log.debug("Initiated block upload for blobPath '{}'", blobPath); + } + } + + void uploadBlock(BinaryData data) throws IOException { + if (failed) { + throw new IllegalStateException( + "Can't upload new blocks on a BlockUpload that previously failed"); + } + + String blockId = + Base64.getEncoder() + .encodeToString(UUID.randomUUID().toString().getBytes(StandardCharsets.UTF_8)); + + if (log.isDebugEnabled()) { + log.debug("Uploading block {} for blobPath '{}'", blockId, blobPath); + } + + try { + BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient(); + blockBlobClient.stageBlock(blockId, data); + blockIds.add(blockId); + } catch (BlobStorageException e) { + throw new IOException( + "Failed to upload block", AzureBlobStorageClient.handleBlobException(e)); + } + } + + void complete() throws IOException { + if (failed) { + throw new IllegalStateException("Can't complete a BlockUpload that previously failed"); + } + + if (log.isDebugEnabled()) { + log.debug("Completing block upload for blobPath '{}'", blobPath); + } + + try { + BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient(); + blockBlobClient.commitBlockList(blockIds, true); + } catch (BlobStorageException e) { + throw new IOException( + "Failed to commit block list", AzureBlobStorageClient.handleBlobException(e)); + } + } + + void markFailed() { + if (log.isWarnEnabled()) { + log.warn("Marking block upload as failed for blobPath '{}'", blobPath); + } + + failed = true; + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java new file mode 100644 index 000000000000..c8e69cf64590 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java @@ -0,0 +1,636 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.http.HttpRequest; +import com.azure.core.http.HttpResponse; +import com.azure.core.util.Context; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.batch.BlobBatchClient; +import com.azure.storage.blob.batch.BlobBatchClientBuilder; +import com.azure.storage.blob.batch.BlobBatchStorageException; +import com.azure.storage.blob.models.BlobItem; +import com.azure.storage.blob.models.BlobProperties; +import com.azure.storage.blob.models.BlobRange; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.ListBlobsOptions; +import com.azure.storage.blob.options.BlobParallelUploadOptions; +import com.azure.storage.blob.specialized.BlobInputStream; +import com.azure.storage.common.StorageSharedKeyCredential; +import com.google.common.annotations.VisibleForTesting; +import java.io.ByteArrayInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.solr.common.util.CollectionUtil; +import org.apache.solr.common.util.ResumableInputStream; +import org.apache.solr.common.util.StrUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Creates a {@link BlobServiceClient} for communicating with Azure Blob Storage. Utilizes the + * default Azure credential provider chain. + */ +public class AzureBlobStorageClient { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static final String BLOB_FILE_PATH_DELIMITER = "/"; + private static final int HTTP_NOT_FOUND = 404; + private static final int HTTP_CONFLICT = 409; + private static final int SKIP_BUFFER_SIZE = 8192; + // Azure Blob Storage caps batch operations at 256 sub-requests per HTTP request: + // https://learn.microsoft.com/rest/api/storageservices/blob-batch + // Package-private so tests can reference the boundary directly. + static final int DELETE_BATCH_SIZE = 256; + + private final BlobContainerClient containerClient; + private final BlobBatchClient batchClient; + + AzureBlobStorageClient( + String containerName, + String connectionString, + String endpoint, + String accountName, + String accountKey, + String sasToken, + String tenantId, + String clientId, + String clientSecret) { + this( + createInternalClient( + connectionString, + endpoint, + accountName, + accountKey, + sasToken, + tenantId, + clientId, + clientSecret), + containerName); + } + + AzureBlobStorageClient(BlobServiceClient blobServiceClient, String containerName) { + this.containerClient = blobServiceClient.getBlobContainerClient(containerName); + this.batchClient = new BlobBatchClientBuilder(blobServiceClient).buildClient(); + } + + private static BlobServiceClient createInternalClient( + String connectionString, + String endpoint, + String accountName, + String accountKey, + String sasToken, + String tenantId, + String clientId, + String clientSecret) { + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + + if (StrUtils.isNotNullOrEmpty(connectionString)) { + builder.connectionString(connectionString); + } else if (StrUtils.isNotNullOrEmpty(endpoint)) { + builder.endpoint(endpoint); + if (StrUtils.isNotNullOrEmpty(accountName) && StrUtils.isNotNullOrEmpty(accountKey)) { + builder.credential(new StorageSharedKeyCredential(accountName, accountKey)); + } else if (StrUtils.isNotNullOrEmpty(sasToken)) { + builder.sasToken(sasToken); + } else if (StrUtils.isNotNullOrEmpty(tenantId) + && StrUtils.isNotNullOrEmpty(clientId) + && StrUtils.isNotNullOrEmpty(clientSecret)) { + builder.credential( + new ClientSecretCredentialBuilder() + .tenantId(tenantId) + .clientId(clientId) + .clientSecret(clientSecret) + .build()); + } else { + DefaultAzureCredentialBuilder dac = new DefaultAzureCredentialBuilder(); + if (StrUtils.isNotNullOrEmpty(tenantId)) { + dac.tenantId(tenantId); + } + + builder.credential(dac.build()); + } + } else { + throw new IllegalArgumentException("Either connectionString or endpoint must be provided"); + } + + return builder.buildClient(); + } + + void createDirectory(String path) throws AzureBlobException { + String sanitizedDirPath = sanitizedDirPath(path); + + if (!pathExists(sanitizedDirPath)) { + String parent = getParentDirectory(sanitizedDirPath); + if (!parent.isEmpty() && !parent.equals(BLOB_FILE_PATH_DELIMITER)) { + createDirectory(parent); + } + + try { + BlobClient blobClient = containerClient.getBlobClient(sanitizedDirPath); + Map metadata = new HashMap<>(); + metadata.put("hdi_isfolder", "true"); + BlobParallelUploadOptions options = + new BlobParallelUploadOptions(new ByteArrayInputStream(new byte[0])) + .setMetadata(metadata); + blobClient.uploadWithResponse(options, null, Context.NONE); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + } + + /** + * Strict delete: throws {@link AzureBlobNotFoundException} if any path was missing. Use {@link + * #deleteDirectory(String)} for lenient semantics. Not atomic — present paths may still be + * deleted server-side when this throws. + */ + void delete(Collection paths) throws AzureBlobException { + Set entries = new HashSet<>(); + for (String path : paths) { + entries.add(sanitizedFilePath(path)); + } + + Collection deletedPaths = deleteBlobs(entries); + + if (entries.size() != deletedPaths.size()) { + Set missing = new HashSet<>(entries); + missing.removeAll(deletedPaths); + throw new AzureBlobNotFoundException("Blobs not found: " + missing); + } + } + + void deleteDirectory(String path) throws AzureBlobException { + path = sanitizedDirPath(path); + + Set entries = listAll(path); + if (pathExists(path)) { + entries.add(path); + } + + deleteBlobs(entries); + } + + String[] listDir(String path) throws AzureBlobException { + path = sanitizedDirPath(path); + + try { + ListBlobsOptions options = new ListBlobsOptions().setPrefix(path).setMaxResultsPerPage(1000); + + final String finalPath = path; + return containerClient.listBlobs(options, null).stream() + .map(BlobItem::getName) + .filter(s -> s.startsWith(finalPath)) + .map(s -> s.substring(finalPath.length())) + .filter(s -> !s.isEmpty()) + .filter( + s -> { + int slashIndex = s.indexOf(BLOB_FILE_PATH_DELIMITER); + return slashIndex == -1 || slashIndex == s.length() - 1; + }) + .map(s -> s.endsWith(BLOB_FILE_PATH_DELIMITER) ? s.substring(0, s.length() - 1) : s) + .distinct() + .toArray(String[]::new); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + /** + * Checks existence by resolving the exact blob (a HEAD request). This module always writes {@code + * hdi_isfolder} marker blobs for directories, so it is self-consistent. Note the asymmetry with + * {@link #isDirectory(String)}: a marker-less "virtual" directory created by an external tool + * (e.g. azcopy) returns {@code false} here even though {@code isDirectory} reports it as a + * directory via prefix listing. + */ + boolean pathExists(String path) throws AzureBlobException { + final String blobPath = sanitizedPath(path); + + if (blobPath.isEmpty() || BLOB_FILE_PATH_DELIMITER.equals(blobPath)) { + return true; + } + + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + return blobClient.exists(); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + boolean isDirectory(String path) throws AzureBlobException { + final String dirPrefix = sanitizedDirPath(path); + + try { + ListBlobsOptions options = + new ListBlobsOptions().setPrefix(dirPrefix).setMaxResultsPerPage(1); + if (containerClient.listBlobs(options, null).iterator().hasNext()) { + return true; + } + + BlobClient markerClient = containerClient.getBlobClient(dirPrefix); + if (markerClient.exists()) { + BlobProperties props = markerClient.getProperties(); + if (props.getBlobSize() == 0) { + return true; + } + + Map md = props.getMetadata(); + return md != null && md.containsKey("hdi_isfolder"); + } + + return false; + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + long length(String path) throws AzureBlobException { + String blobPath = sanitizedFilePath(path); + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + return blobClient.getProperties().getBlobSize(); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + InputStream pullStream(String path) throws AzureBlobException { + final String blobPath = sanitizedFilePath(path); + + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + BlobInputStream blobInputStream = blobClient.openInputStream(); + + try { + final long contentLength = blobInputStream.getProperties().getBlobSize(); + InputStream initial = new IdempotentCloseInputStream(blobInputStream); + return new ResumableInputStream( + initial, + bytesRead -> { + if (bytesRead >= contentLength) { + return null; + } + try { + return pullRangeStream(path, bytesRead, contentLength - bytesRead); + } catch (AzureBlobException e) { + throw new RuntimeException(e); + } + }); + } catch (RuntimeException | Error t) { + blobInputStream.close(); + throw t; + } + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + InputStream pullRangeStream(String path, long offset, long length) throws AzureBlobException { + final String blobPath = sanitizedFilePath(path); + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + BlobRange range = new BlobRange(offset, length); + return new IdempotentCloseInputStream(blobClient.openInputStream(range, null)); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + private static final class IdempotentCloseInputStream extends FilterInputStream { + private boolean closed; + + IdempotentCloseInputStream(InputStream in) { + super(in); + this.closed = false; + } + + @Override + public int read() throws IOException { + if (closed) { + throw new IOException("Stream is already closed"); + } + try { + return super.read(); + } catch (RuntimeException re) { + if (isAlreadyClosed(re)) { + throw new IOException("Stream is already closed", re); + } + throw re; + } + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream is already closed"); + } + try { + return super.read(b, off, len); + } catch (RuntimeException re) { + if (isAlreadyClosed(re)) { + throw new IOException("Stream is already closed", re); + } + throw re; + } + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + try { + super.close(); + } catch (IOException e) { + String msg = e.getMessage(); + if (msg == null || !msg.toLowerCase(Locale.ROOT).contains("already closed")) { + throw e; + } + // swallow "already closed" to make close idempotent + } finally { + closed = true; + } + } + + @Override + public long skip(long n) throws IOException { + if (closed) { + throw new IOException("Stream is already closed"); + } + if (n <= 0) { + return 0L; + } + long remaining = n; + byte[] discard = new byte[SKIP_BUFFER_SIZE]; + try { + while (remaining > 0) { + int toRead = (int) Math.min(discard.length, remaining); + int read = super.read(discard, 0, toRead); + if (read < 0) { + break; + } + remaining -= read; + } + return n - remaining; + } catch (RuntimeException re) { + throw new IOException(re); + } + } + + private static boolean isAlreadyClosed(Throwable t) { + String msg = t.getMessage(); + return msg != null && msg.toLowerCase(Locale.ROOT).contains("already closed"); + } + } + + OutputStream pushStream(String path) throws AzureBlobException { + path = sanitizedFilePath(path); + + if (!parentDirectoryExist(path)) { + String parentDirectory = getParentDirectory(path); + if (!parentDirectory.isEmpty() && !parentDirectory.equals(BLOB_FILE_PATH_DELIMITER)) { + createDirectory(parentDirectory); + } + } + + try { + BlobClient blobClient = containerClient.getBlobClient(path); + return new AzureBlobOutputStream(blobClient, path); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + void close() { + // No-op: the underlying OkHttp client is SPI-loaded and shared process-wide, so there is + // nothing per-instance to release here. + } + + @VisibleForTesting + void createContainerForTests() { + try { + containerClient.create(); + } catch (BlobStorageException e) { + if (e.getStatusCode() != HTTP_CONFLICT) { + throw e; + } + } + } + + @VisibleForTesting + void deleteContainerForTests() { + try { + containerClient.delete(); + } catch (BlobStorageException e) { + if (e.getStatusCode() != HTTP_NOT_FOUND) { + throw e; + } + } + } + + private Collection deleteBlobs(Collection entries) throws AzureBlobException { + if (entries.isEmpty()) { + return Set.of(); + } + + Set deletedPaths = new HashSet<>(); + List all = new ArrayList<>(entries); + + for (int start = 0; start < all.size(); start += DELETE_BATCH_SIZE) { + List chunk = all.subList(start, Math.min(start + DELETE_BATCH_SIZE, all.size())); + + // The batch API addresses sub-requests by full blob URL, not container-relative path; keep + // an inverse map so we can identify which chunk entries 404'd from sub-exception URLs. + List blobUrls = new ArrayList<>(chunk.size()); + Map urlToPath = CollectionUtil.newHashMap(chunk.size()); + for (String path : chunk) { + String url = containerClient.getBlobClient(path).getBlobUrl(); + blobUrls.add(url); + urlToPath.put(url, path); + } + + try { + batchClient.deleteBlobs(blobUrls, null).forEach(r -> {}); + deletedPaths.addAll(chunk); + } catch (BlobBatchStorageException e) { + Set notFound = new HashSet<>(); + int subExceptionCount = 0; + for (BlobStorageException sub : e.getBatchExceptions()) { + subExceptionCount++; + if (sub.getStatusCode() != HTTP_NOT_FOUND) { + throw new AzureBlobException( + String.format( + Locale.ROOT, + "Batch delete failed (HTTP %d on %s)", + sub.getStatusCode(), + subRequestUrl(sub)), + e); + } + String path = urlToPath.get(subRequestUrl(sub)); + if (path != null) { + notFound.add(path); + } else if (log.isWarnEnabled()) { + log.warn( + "Could not map batch sub-response URL {} back to a chunk path", subRequestUrl(sub)); + } + } + + // URL attribution missed a sub-exception (canonical-form drift): fall back to + // "whole chunk not deleted" so the strict check in delete() still fires. + if (notFound.size() != subExceptionCount) { + notFound.addAll(chunk); + } + + if (log.isDebugEnabled()) { + log.debug("Batch delete tolerated {} not-found sub-responses", notFound.size()); + } + + for (String path : chunk) { + if (!notFound.contains(path)) { + deletedPaths.add(path); + } + } + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + return deletedPaths; + } + + /** Extracts the request URL from a batch sub-exception; returns {@code ""} on null. */ + private static String subRequestUrl(BlobStorageException sub) { + HttpResponse response = sub.getResponse(); + HttpRequest request = response == null ? null : response.getRequest(); + URL url = request == null ? null : request.getUrl(); + return url == null ? "" : url.toString(); + } + + private Set listAll(String path) throws AzureBlobException { + String prefix = sanitizedDirPath(path); + + try { + ListBlobsOptions options = + new ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1000); + + return containerClient.listBlobs(options, null).stream() + .map(BlobItem::getName) + .filter(s -> s.startsWith(prefix)) + .collect(Collectors.toSet()); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + private boolean parentDirectoryExist(String path) throws AzureBlobException { + String parentDirectory = getParentDirectory(path); + + if (parentDirectory.isEmpty() || parentDirectory.equals(BLOB_FILE_PATH_DELIMITER)) { + return true; + } + + return pathExists(parentDirectory); + } + + private String getParentDirectory(String path) { + if (!path.contains(BLOB_FILE_PATH_DELIMITER)) { + return ""; + } + + int fromEnd = path.length() - 1; + if (path.endsWith(BLOB_FILE_PATH_DELIMITER)) { + fromEnd -= 1; + } + return fromEnd > 0 + ? path.substring(0, path.lastIndexOf(BLOB_FILE_PATH_DELIMITER, fromEnd) + 1) + : ""; + } + + String sanitizedPath(String path) { + String sanitizedPath = path.trim(); + while (sanitizedPath.startsWith(BLOB_FILE_PATH_DELIMITER)) { + sanitizedPath = sanitizedPath.substring(1).trim(); + } + + return sanitizedPath; + } + + String sanitizedFilePath(String path) throws AzureBlobException { + String sanitizedPath = sanitizedPath(path); + + if (sanitizedPath.endsWith(BLOB_FILE_PATH_DELIMITER)) { + throw new AzureBlobException("Invalid Path. Path for file can't end with '/'"); + } + + if (sanitizedPath.isEmpty()) { + throw new AzureBlobException("Invalid Path. Path cannot be empty"); + } + + return sanitizedPath; + } + + String sanitizedDirPath(String path) throws AzureBlobException { + String sanitizedPath = sanitizedPath(path); + + if (!sanitizedPath.endsWith(BLOB_FILE_PATH_DELIMITER)) { + sanitizedPath += BLOB_FILE_PATH_DELIMITER; + } + + return sanitizedPath; + } + + static AzureBlobException handleBlobException(BlobStorageException e) { + String errMessage = + String.format( + Locale.ROOT, + "Azure Blob Storage error: [statusCode=%s] [errorCode=%s] [message=%s]", + e.getStatusCode(), + e.getErrorCode(), + e.getMessage()); + + if (e.getStatusCode() == HTTP_NOT_FOUND) { + if (log.isDebugEnabled()) { + log.debug(errMessage); + } + return new AzureBlobNotFoundException(errMessage, e); + } + + log.error(errMessage); + return new AzureBlobException(errMessage, e); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java new file mode 100644 index 000000000000..c76136b3e788 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Solr Azure Blob Storage backup repository */ +package org.apache.solr.azureblob; diff --git a/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml b/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml new file mode 100644 index 000000000000..a3a7cc465c27 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml b/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml new file mode 100644 index 000000000000..853ba6562416 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml @@ -0,0 +1,51 @@ + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + explicit + true + text + + + + + +: + + diff --git a/solr/modules/azure-blob-repository/src/test-files/log4j2.xml b/solr/modules/azure-blob-repository/src/test-files/log4j2.xml new file mode 100644 index 000000000000..528299e3e0bd --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/log4j2.xml @@ -0,0 +1,40 @@ + + + + + + + + + %maxLen{%-4r %-5p (%t) [%notEmpty{n:%X{node_name}}%notEmpty{ c:%X{collection}}%notEmpty{ s:%X{shard}}%notEmpty{ r:%X{replica}}%notEmpty{ x:%X{core}}%notEmpty{ t:%X{trace_id}}] %c{1.} %m%notEmpty{ + =>%ex{short}}}{10240}%n + + + + + + + + + + + + + + + diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java new file mode 100644 index 000000000000..064859aac29c --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.http.HttpClient; +import com.azure.core.http.okhttp.OkHttpAsyncHttpClientBuilder; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.carrotsearch.randomizedtesting.ThreadFilter; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.util.UUID; +import okhttp3.OkHttpClient; +import org.apache.lucene.tests.util.QuickPatchThreadsFilter; +import org.apache.solr.SolrIgnoredThreadsFilter; +import org.apache.solr.SolrTestCase; +import org.apache.solr.util.SocketProxy; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assume; +import org.junit.Before; +import org.junit.BeforeClass; + +/** Abstract class for tests with Azure Blob Storage emulator. */ +@ThreadLeakFilters( + defaultFilters = true, + filters = { + SolrIgnoredThreadsFilter.class, + QuickPatchThreadsFilter.class, + AbstractAzureBlobClientTest.OkHttpThreadLeakFilterTest.class, + }) +public class AbstractAzureBlobClientTest extends SolrTestCase { + + private static AzuriteTestContainer azurite; + private static OkHttpClient sharedOkHttpClient; + private static String connectionString; + + protected String containerName; + protected SocketProxy proxy; + + protected AzureBlobStorageClient client; + + @BeforeClass + public static void setUpClass() { + try { + azurite = AzuriteTestContainer.start(); + sharedOkHttpClient = new OkHttpClient.Builder().build(); + } catch (Throwable t) { + Assume.assumeNoException("Docker/Testcontainers not available; skipping Azure tests", t); + } + } + + @Before + public void setUpClient() throws Exception { + setAzureTestCredentials(); + + URI blobServiceUri = new URI(getBlobServiceUrl()); + connectionString = azurite.connectionString(); + + proxy = new SocketProxy(); + proxy.open(blobServiceUri); + + HttpClient httpClient = new OkHttpAsyncHttpClientBuilder(sharedOkHttpClient).build(); + + // Route the client through the proxy so tests can simulate connection loss. + String proxiedConn = + connectionString.replace(":" + blobServiceUri.getPort(), ":" + proxy.getListenPort()); + + BlobServiceClient blobServiceClient = + new BlobServiceClientBuilder() + .connectionString(proxiedConn) + .httpClient(httpClient) + .buildClient(); + + containerName = "test-" + UUID.randomUUID(); + client = new AzureBlobStorageClient(blobServiceClient, containerName); + client.createContainerForTests(); + } + + public static void setAzureTestCredentials() { + System.setProperty("AZURE_CLIENT_ID", "test-client-id"); + System.setProperty("AZURE_TENANT_ID", "test-tenant-id"); + System.setProperty("AZURE_CLIENT_SECRET", "test-client-secret"); + } + + @After + public void tearDownClient() { + if (client != null) { + try { + client.deleteContainerForTests(); + } catch (Throwable ignored) { + } + client.close(); + } + if (proxy != null) { + proxy.close(); + proxy = null; + } + } + + /** Simulate a connection loss on the proxy. */ + void initiateBlobConnectionLoss() { + if (proxy != null) { + proxy.halfClose(); + } + } + + @AfterClass + public static void afterAll() { + if (azurite != null) { + azurite.stop(); + azurite = null; + } + sharedOkHttpClient = null; + } + + void pushContent(String path, String content) throws AzureBlobException { + pushContent(path, content.getBytes(StandardCharsets.UTF_8)); + } + + void pushContent(String path, byte[] content) throws AzureBlobException { + try (OutputStream output = client.pushStream(path)) { + output.write(content); + } catch (IOException e) { + throw new AzureBlobException("Failed to write content", e); + } + } + + static String getConnectionString() { + return connectionString; + } + + String getBlobServiceUrl() { + return azurite.blobEndpoint(); + } + + public static class OkHttpThreadLeakFilterTest implements ThreadFilter { + + @Override + public boolean reject(Thread t) { + String name = t.getName(); + if (name == null) { + return false; + } + // OkHttp connection pool / dispatcher and Okio watchdog threads, plus the Reactor scheduler + // daemon threads the Azure SDK initializes. These are process-wide and outlive individual + // tests, so we filter them instead of force-shutting them down. + return name.contains("OkHttp") + || name.contains("Okio Watchdog") + || name.startsWith("reactor-") + || name.startsWith("parallel-") + || name.startsWith("boundedElastic-") + || name.startsWith("single-"); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java new file mode 100644 index 000000000000..a8cbe90e8321 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.util.BinaryData; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import org.apache.lucene.tests.util.QuickPatchThreadsFilter; +import org.apache.solr.SolrIgnoredThreadsFilter; +import org.apache.solr.cloud.api.collections.AbstractBackupRepositoryTest; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.junit.AfterClass; +import org.junit.Assume; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Runs the shared {@link AbstractBackupRepositoryTest} suite against a real {@link + * AzureBlobBackupRepository} that is created through its normal {@link + * AzureBlobBackupRepository#init(NamedList)} code path, backed by an Azurite emulator. + */ +@ThreadLeakFilters( + defaultFilters = true, + filters = { + SolrIgnoredThreadsFilter.class, + QuickPatchThreadsFilter.class, + AbstractAzureBlobClientTest.OkHttpThreadLeakFilterTest.class, + }) +public class AzureBlobBackupRepositoryTest extends AbstractBackupRepositoryTest { + + private static final String CONTAINER_NAME = "test-backup-repository"; + + private static AzuriteTestContainer azurite; + private static String connectionString; + + @BeforeClass + public static void setupClass() { + try { + azurite = AzuriteTestContainer.start(); + } catch (Throwable t) { + Assume.assumeNoException("Docker/Testcontainers not available; skipping Azure tests", t); + } + connectionString = azurite.connectionString(); + azurite.createContainerIfMissing(CONTAINER_NAME); + } + + @AfterClass + public static void tearDownClass() { + if (azurite != null) { + azurite.stop(); + azurite = null; + } + connectionString = null; + } + + @Override + protected Class getRepositoryClass() { + return AzureBlobBackupRepository.class; + } + + @Override + protected BackupRepository getRepository() { + AzureBlobBackupRepository repository = new AzureBlobBackupRepository(); + repository.init(getBaseBackupRepositoryConfiguration()); + return repository; + } + + @Override + protected URI getBaseUri() throws URISyntaxException { + return new URI(AzureBlobBackupRepository.BLOB_SCHEME + ":/"); + } + + @Override + protected NamedList getBaseBackupRepositoryConfiguration() { + NamedList args = new NamedList<>(); + args.add(AzureBlobBackupRepositoryConfig.CONTAINER_NAME, CONTAINER_NAME); + args.add(AzureBlobBackupRepositoryConfig.CONNECTION_STRING, connectionString); + return args; + } + + /** + * Azure-specific coverage not exercised by the shared suite: an external tool (e.g. azcopy) + * writes a child blob without this module's {@code hdi_isfolder} marker for its parent + * "directory". {@code getPathType} reports it as a directory via prefix listing, while {@code + * exists} resolves the exact marker-less blob and returns false. This documents the intentional + * asymmetry; the module stays self-consistent because it always writes markers itself. + */ + @Test + public void testExistsVsGetPathTypeForExternalVirtualDirectory() + throws IOException, URISyntaxException { + BlobServiceClient serviceClient = + new BlobServiceClientBuilder().connectionString(connectionString).buildClient(); + BlobContainerClient containerClient = serviceClient.getBlobContainerClient(CONTAINER_NAME); + containerClient + .getBlobClient("external-dir/child.txt") + .upload(BinaryData.fromString("external data"), true); + + try (BackupRepository repo = getRepository()) { + URI dirUri = repo.resolveDirectory(getBaseUri(), "external-dir"); + + assertEquals( + "Marker-less directory should be detected as a directory", + BackupRepository.PathType.DIRECTORY, + repo.getPathType(dirUri)); + + assertFalse( + "exists() returns false for a marker-less external directory", repo.exists(dirUri)); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java new file mode 100644 index 000000000000..80a8c97ee7a2 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.QuickPatchThreadsFilter; +import org.apache.solr.SolrIgnoredThreadsFilter; +import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest; +import org.apache.solr.util.LogLevel; +import org.junit.AfterClass; +import org.junit.Assume; +import org.junit.BeforeClass; + +/** + * Runs the shared {@link AbstractIncrementalBackupTest} SolrCloud suite against {@link + * AzureBlobBackupRepository}, backed by an Azurite emulator. + */ +// Backups do checksum validation against a footer value not present in 'SimpleText' +@LuceneTestCase.SuppressCodecs({"SimpleText"}) +@ThreadLeakLingering(linger = 10) +@ThreadLeakFilters( + defaultFilters = true, + filters = { + SolrIgnoredThreadsFilter.class, + QuickPatchThreadsFilter.class, + AbstractAzureBlobClientTest.OkHttpThreadLeakFilterTest.class, + }) +@LogLevel( + value = + "org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.api.collections=DEBUG;org.apache.solr.cloud.overseer=DEBUG") +public class AzureBlobIncrementalBackupTest extends AbstractIncrementalBackupTest { + + private static final String CONTAINER_NAME = "incremental-backup-test"; + + private static AzuriteTestContainer azurite; + + private static final String SOLR_XML = + "\n" + + "\n" + + " ${shareSchema:false}\n" + + " ${configSetBaseDir:configsets}\n" + + " ${coreRootDirectory:.}\n" + + "\n" + + " \n" + + " ${urlScheme:}\n" + + " ${socketTimeout:90000}\n" + + " ${connTimeout:15000}\n" + + " \n" + + "\n" + + " \n" + + " 127.0.0.1\n" + + " ${hostPort:8983}\n" + + " ${solr.zookeeper.client.timeout:30000}\n" + + " 10000\n" + + " ${distribUpdateConnTimeout:45000}\n" + + " ${distribUpdateSoTimeout:340000}\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " azure\n" + + " ${hostPort:8983}\n" + + " \n" + + " \n" + + " azure\n" + + " \n" + + " \n" + + " CONTAINER\n" + + " CONNECTION_STRING\n" + + " \n" + + " \n" + + " \n" + + "\n"; + + @BeforeClass + public static void ensureCompatibleLocale() { + // TODO: Find incompatible locales + } + + @BeforeClass + public static void setupClass() throws Exception { + try { + azurite = AzuriteTestContainer.start(); + } catch (Throwable t) { + Assume.assumeNoException("Docker/Testcontainers not available; skipping Azure tests", t); + } + azurite.createContainerIfMissing(CONTAINER_NAME); + + // Enable parallel backup/restore for cloud storage tests + System.setProperty("solr.backup.maxparalleluploads", "2"); + System.setProperty("solr.backup.maxparalleldownloads", "2"); + + configureCluster(NUM_NODES) // nodes + .addConfig("conf1", getFile("conf/solrconfig.xml").getParent()) + .withSolrXml( + SOLR_XML + .replace("CONTAINER", CONTAINER_NAME) + .replace("CONNECTION_STRING", azurite.connectionString())) + .configure(); + } + + @AfterClass + public static void tearDownClass() { + if (azurite != null) { + azurite.stop(); + azurite = null; + } + } + + @Override + public String getCollectionNamePrefix() { + return "backuprestore"; + } + + @Override + public String getBackupLocation() { + return "/"; + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java new file mode 100644 index 000000000000..bb432031f72a --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Locale; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.BufferedIndexInput; +import org.apache.lucene.store.IndexInput; +import org.junit.Test; + +public class AzureBlobIndexInputTest extends AbstractAzureBlobClientTest { + + /** Sequential read of a small blob via {@code readBytes} returns the full content unchanged. */ + @Test + public void testBasicIndexInput() throws Exception { + String path = "index-input-test.txt"; + String content = "Index input test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + byte[] buffer = new byte[1024]; + input.readBytes(buffer, 0, content.length()); + String readContent = new String(buffer, 0, content.length(), StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + /** Forward {@code seek()} into the middle of the blob, then read returns the suffix. */ + @Test + public void testIndexInputSeek() throws Exception { + String path = "index-input-seek-test.txt"; + String content = "Index input seek test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + long seekPosition = content.length() / 2; + input.seek(seekPosition); + + byte[] buffer = new byte[1024]; + String expectedContent = content.substring((int) seekPosition); + input.readBytes(buffer, 0, expectedContent.length()); + String readContent = new String(buffer, 0, expectedContent.length(), StandardCharsets.UTF_8); + assertEquals("Content from seek position should match", expectedContent, readContent); + } + } + + /** {@code length()} reports the blob's content length. */ + @Test + public void testIndexInputLength() throws Exception { + String path = "index-input-length-test.txt"; + String content = "Length test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + assertEquals("Length should match", content.length(), input.length()); + } + } + + /** Byte-by-byte sequential read via {@code readByte()} reconstructs the original content. */ + @Test + public void testIndexInputReadByte() throws Exception { + String path = "index-input-byte-test.txt"; + String content = "Byte read test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + StringBuilder readContent = new StringBuilder(); + for (int i = 0; i < content.length(); i++) { + byte b = input.readByte(); + readContent.append((char) b); + } + + assertEquals("Byte by byte content should match", content, readContent.toString()); + } + } + + /** Chunked reads with a small buffer cover the whole file across multiple buffer refills. */ + @Test + public void testIndexInputReadBytes() throws Exception { + String path = "index-input-bytes-test.txt"; + String content = "Bytes read test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + byte[] buffer = new byte[10]; + StringBuilder readContent = new StringBuilder(); + + long remaining = input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + readContent.append(new String(buffer, 0, toRead, StandardCharsets.UTF_8)); + remaining -= toRead; + } + + assertEquals("Bytes content should match", content, readContent.toString()); + } + } + + /** Seeking exactly to {@code length} is allowed; the next {@code readByte()} throws EOF. */ + @Test + public void testIndexInputSeekToEnd() throws Exception { + String path = "index-input-seek-end-test.txt"; + String content = "Seek to end test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + input.seek(content.length()); + expectThrows(IOException.class, input::readByte); + } + } + + /** Seeking past {@code length} throws {@link IOException}. */ + @Test + public void testIndexInputSeekBeyondEnd() throws Exception { + String path = "index-input-seek-beyond-test.txt"; + String content = "Seek beyond end test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + long invalidPosition = content.length() + 1L; + expectThrows(IOException.class, () -> input.seek(invalidPosition)); + } + } + + /** {@code getFilePointer()} reflects both incremental reads and explicit seeks. */ + @Test + public void testIndexInputGetFilePointer() throws Exception { + String path = "index-input-pointer-test.txt"; + String content = "File pointer test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + assertEquals("Initial position should be 0", 0, input.getFilePointer()); + + byte[] buffer = new byte[5]; + input.readBytes(buffer, 0, buffer.length); + assertEquals("Position should be 5 after reading 5 bytes", 5, input.getFilePointer()); + + input.seek(10); + assertEquals("Position should be 10 after seek", 10, input.getFilePointer()); + } + } + + /** + * Reading a multi-hundred-KB blob in 8 KB chunks exercises the buffer-refill / range-stream + * draining path end-to-end. + */ + @Test + public void testIndexInputLargeFile() throws Exception { + String path = "index-input-large-test.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 10000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + assertEquals("Length should match", content.length(), input.length()); + + byte[] buffer = new byte[8192]; + StringBuilder readContent = new StringBuilder(); + long remaining = input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + readContent.append(new String(buffer, 0, toRead, StandardCharsets.UTF_8)); + remaining -= toRead; + } + + assertEquals("Large content should match", content, readContent.toString()); + } + } + + /** On a 0-byte blob: {@code length} and {@code getFilePointer} are 0, and any read throws EOF. */ + @Test + public void testIndexInputEmptyFile() throws Exception { + String path = "index-input-empty-test.txt"; + String content = ""; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + assertEquals("Length should be 0", 0, input.length()); + assertEquals("Position should be 0", 0, input.getFilePointer()); + expectThrows(IOException.class, input::readByte); + } + } + + /** + * After {@code close()}, both {@code readByte} and {@code slice} throw {@link + * AlreadyClosedException} rather than silently re-opening a fresh stream. + */ + @Test + public void testIndexInputClose() throws Exception { + String path = "index-input-close-test.txt"; + String content = "Close test content"; + + pushContent(path, content); + + AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path)); + input.close(); + expectThrows(AlreadyClosedException.class, input::readByte); + expectThrows(AlreadyClosedException.class, () -> input.slice("after-close", 0L, 1L)); + } + + /** {@code close()} is idempotent: calling it twice does not throw. */ + @Test + public void testIndexInputMultipleClose() throws Exception { + String path = "index-input-multiple-close-test.txt"; + String content = "Multiple close test content"; + + pushContent(path, content); + + AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path)); + input.close(); + input.close(); + } + + /** + * Lucene's {@code CodecUtil.retrieveChecksum} and several other codec routines seek backward to + * positions before the current buffer (e.g. {@code seek(0)} after reading the trailing footer). + * Verify that an interleaved forward/backward seek pattern returns correct data. + */ + @Test + public void testIndexInputBackwardSeek() throws Exception { + String path = "index-input-backward-seek-test.txt"; + // Content larger than the default buffer so seeks cross buffer boundaries. + StringBuilder contentBuilder = new StringBuilder(); + for (int i = 0; i < 5000; i++) { + contentBuilder.append(String.format(Locale.ROOT, "line%04d ", i)); + } + String content = contentBuilder.toString(); + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + pushContent(path, contentBytes); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + // Read tail + long tailLength = 16; + input.seek(contentBytes.length - tailLength); + byte[] tail = new byte[(int) tailLength]; + input.readBytes(tail, 0, tail.length); + byte[] expectedTail = new byte[(int) tailLength]; + System.arraycopy( + contentBytes, contentBytes.length - (int) tailLength, expectedTail, 0, (int) tailLength); + assertArrayEquals("Tail should match", expectedTail, tail); + + // Seek back to the start and re-read + input.seek(0); + assertEquals("Position should be 0 after backward seek", 0, input.getFilePointer()); + byte[] head = new byte[32]; + input.readBytes(head, 0, head.length); + byte[] expectedHead = new byte[32]; + System.arraycopy(contentBytes, 0, expectedHead, 0, 32); + assertArrayEquals("Head bytes after backward seek should match", expectedHead, head); + + // Seek somewhere in the middle, both backward and forward, several times + int[] offsets = {2000, 100, 4000, 500, 3000}; + byte[] sample = new byte[8]; + for (int off : offsets) { + input.seek(off); + input.readBytes(sample, 0, sample.length); + byte[] expected = new byte[sample.length]; + System.arraycopy(contentBytes, off, expected, 0, sample.length); + assertArrayEquals("Sample at offset " + off, expected, sample); + } + } + } + + /** + * Verify that {@code IndexInput.slice(...)} produces an independent view of a portion of the blob + * with correct length and bytes. + */ + @Test + public void testIndexInputSlice() throws Exception { + String path = "index-input-slice-test.txt"; + String content = "abcdefghijklmnopqrstuvwxyz0123456789"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + pushContent(path, contentBytes); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + long sliceOffset = 10; + long sliceLength = 20; + try (IndexInput slice = input.slice("middle", sliceOffset, sliceLength)) { + assertEquals("Slice length", sliceLength, slice.length()); + assertEquals("Initial pointer", 0, slice.getFilePointer()); + + byte[] buf = new byte[(int) sliceLength]; + slice.readBytes(buf, 0, buf.length); + byte[] expected = new byte[(int) sliceLength]; + System.arraycopy(contentBytes, (int) sliceOffset, expected, 0, (int) sliceLength); + assertArrayEquals("Slice content", expected, buf); + + // backward seek inside the slice + slice.seek(0); + byte first = slice.readByte(); + assertEquals(contentBytes[(int) sliceOffset], first); + + // out-of-bounds slice should throw + expectThrows( + IllegalArgumentException.class, () -> input.slice("oob", 0, content.length() + 1L)); + expectThrows(IllegalArgumentException.class, () -> input.slice("neg", -1, 1)); + } + } + } + + /** + * A clone has an independent file pointer (per {@link BufferedIndexInput#clone()}): seeks and + * reads on the clone do not move the parent's position, and vice versa. + */ + @Test + public void testIndexInputCloneIndependent() throws Exception { + String path = "index-input-clone-test.txt"; + String content = "abcdefghijklmnopqrstuvwxyz0123456789"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + pushContent(path, contentBytes); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + input.seek(5); + IndexInput clone = input.clone(); + assertEquals("Clone starts at parent position", 5, clone.getFilePointer()); + + // Move clone forward; parent should be unaffected + clone.seek(20); + byte fromClone = clone.readByte(); + assertEquals(contentBytes[20], fromClone); + assertEquals("Parent position unchanged after clone read", 5, input.getFilePointer()); + + // Read from parent + byte fromParent = input.readByte(); + assertEquals(contentBytes[5], fromParent); + + // Clone can also seek backward independently + clone.seek(0); + byte cloneFirst = clone.readByte(); + assertEquals(contentBytes[0], cloneFirst); + } + } + + /** + * {@code readByte(long pos)} from arbitrary offsets after the buffer is seeded — exercises + * backward {@code seekInternal} that crosses the buffered window. + */ + @Test + public void testIndexInputRandomAccessReads() throws Exception { + String path = "index-input-random-access-test.txt"; + // 256 bytes of well-known data: byte at offset i has value (byte)(i & 0xFF) + byte[] contentBytes = new byte[256]; + for (int i = 0; i < contentBytes.length; i++) { + contentBytes[i] = (byte) i; + } + pushContent(path, contentBytes); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(client, path, client.length(path))) { + // Seed the buffer with a forward read first + input.seek(200); + input.readByte(); + + // Now exercise readByte(pos) backward — this calls seekInternal() with a smaller pos + assertEquals((byte) 0, input.readByte(0)); + assertEquals((byte) 1, input.readByte(1)); + assertEquals((byte) 100, input.readByte(100)); + assertEquals((byte) 255, input.readByte(255)); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java new file mode 100644 index 000000000000..6d146d4272e7 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.QuickPatchThreadsFilter; +import org.apache.solr.SolrIgnoredThreadsFilter; +import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest; +import org.apache.solr.cloud.api.collections.AbstractInstallShardTest; +import org.apache.solr.handler.admin.api.InstallShardData; +import org.junit.AfterClass; +import org.junit.Assume; +import org.junit.BeforeClass; + +/** + * Tests validating that the 'Install Shard API' works when used with {@link + * AzureBlobBackupRepository}, backed by an Azurite emulator. + * + * @see org.apache.solr.cloud.api.collections.AbstractInstallShardTest + * @see InstallShardData + */ +// Backups do checksum validation against a footer value not present in 'SimpleText' +@LuceneTestCase.SuppressCodecs({"SimpleText"}) +@ThreadLeakLingering(linger = 10) +@ThreadLeakFilters( + defaultFilters = true, + filters = { + SolrIgnoredThreadsFilter.class, + QuickPatchThreadsFilter.class, + AbstractAzureBlobClientTest.OkHttpThreadLeakFilterTest.class, + }) +public class AzureBlobInstallShardTest extends AbstractInstallShardTest { + + private static final String CONTAINER_NAME = "install-shard-test"; + + private static AzuriteTestContainer azurite; + + private static final String BACKUP_REPOSITORY_XML = + " \n" + + " \n" + + " azure\n" + + " \n" + + " \n" + + " azure\n" + + " ${hostPort:8983}\n" + + " \n" + + " \n" + + " CONTAINER\n" + + " CONNECTION_STRING\n" + + " \n" + + " \n"; + + private static final String SOLR_XML = + AbstractInstallShardTest.defaultSolrXmlTextWithBackupRepository(BACKUP_REPOSITORY_XML); + + @BeforeClass + public static void setupClass() throws Exception { + try { + azurite = AzuriteTestContainer.start(); + } catch (Throwable t) { + Assume.assumeNoException("Docker/Testcontainers not available; skipping Azure tests", t); + } + azurite.createContainerIfMissing(CONTAINER_NAME); + + configureCluster(2) // nodes + .addConfig("conf1", getFile("conf/solrconfig.xml").getParent()) + .withSolrXml( + SOLR_XML + .replace("CONTAINER", CONTAINER_NAME) + .replace("CONNECTION_STRING", azurite.connectionString())) + .configure(); + + bootstrapBackupRepositoryData("/"); + } + + @AfterClass + public static void tearDownClass() { + if (azurite != null) { + azurite.stop(); + azurite = null; + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java new file mode 100644 index 000000000000..6a891b91d577 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.BlockList; +import com.azure.storage.blob.models.BlockListType; +import com.azure.storage.blob.specialized.BlockBlobClient; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import org.junit.Test; + +public class AzureBlobOutputStreamTest extends AbstractAzureBlobClientTest { + + @Test + public void testBasicOutputStream() throws Exception { + String path = "output-stream-test.txt"; + String content = "Output stream test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByte() throws Exception { + String path = "output-stream-byte-test.txt"; + String content = "Byte by byte write test"; + + try (OutputStream output = client.pushStream(path)) { + for (byte b : content.getBytes(StandardCharsets.UTF_8)) { + output.write(b); + } + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByteArray() throws Exception { + String path = "output-stream-array-test.txt"; + String content = "Byte array write test"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + try (OutputStream output = client.pushStream(path)) { + output.write(contentBytes); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByteArrayWithOffset() throws Exception { + String path = "output-stream-offset-test.txt"; + String fullContent = "Full content for offset test"; + String partialContent = "offset test"; // Last part + byte[] fullBytes = fullContent.getBytes(StandardCharsets.UTF_8); + int offset = fullContent.indexOf(partialContent); + + try (OutputStream output = client.pushStream(path)) { + output.write(fullBytes, offset, partialContent.length()); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", partialContent, readContent); + } + } + + @Test + public void testOutputStreamFlush() throws Exception { + String path = "output-stream-flush-test.txt"; + String content = "Flush test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.flush(); + // flush() is a no-op: nothing is staged or committed until close(), so the blob is not yet + // visible. + assertFalse( + "File should not be visible after flush(); commit happens on close()", + client.pathExists(path)); + } + + assertTrue("File should exist after close", client.pathExists(path)); + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content written before flush should be preserved", content, readContent); + } + } + + @Test + public void testFlushDoesNotStageBlocks() throws Exception { + String path = "output-stream-flush-noop-test.bin"; + + // Write several sub-block records, flushing after each. Because flush() is a no-op, all + // records accumulate in a single buffer and are staged as ONE block on close() rather than one + // tiny block per flush (which could otherwise exhaust Azure's 50,000-committed-block limit). + int records = 50; + byte[] record = new byte[1024]; + Arrays.fill(record, (byte) 'x'); + + try (OutputStream output = client.pushStream(path)) { + for (int i = 0; i < records; i++) { + output.write(record); + output.flush(); + } + } + + assertTrue("File should exist after close", client.pathExists(path)); + assertEquals( + "Length should match total bytes written", + (long) records * record.length, + client.length(path)); + + BlobServiceClient serviceClient = + new BlobServiceClientBuilder().connectionString(getConnectionString()).buildClient(); + BlockBlobClient blockBlobClient = + serviceClient + .getBlobContainerClient(containerName) + .getBlobClient(path) + .getBlockBlobClient(); + BlockList blockList = blockBlobClient.listBlocks(BlockListType.COMMITTED); + assertEquals( + "flush() must not stage extra blocks; sub-block writes commit as a single block", + 1, + blockList.getCommittedBlocks().size()); + } + + @Test + public void testOutputStreamClose() throws Exception { + String path = "output-stream-close-test.txt"; + String content = "Close test content"; + + OutputStream output = client.pushStream(path); + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.close(); + + assertTrue("File should exist after close", client.pathExists(path)); + + OutputStream closedOutput = output; + expectThrows(IOException.class, () -> closedOutput.write(1)); + expectThrows(IOException.class, () -> closedOutput.flush()); + } + + @Test + public void testOutputStreamMultipleClose() throws Exception { + String path = "output-stream-multiple-close-test.txt"; + String content = "Multiple close test content"; + + OutputStream output = client.pushStream(path); + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.close(); + output.close(); + + assertTrue("File should exist", client.pathExists(path)); + } + + @Test + public void testOutputStreamLargeData() throws Exception { + String path = "output-stream-large-test.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 20000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("Large file should exist", client.pathExists(path)); + assertEquals("File length should match", content.length(), client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[8192]; + StringBuilder readContentBuilder = new StringBuilder(); + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + readContentBuilder.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + } + assertEquals("Large content should match", content, readContentBuilder.toString()); + } + } + + @Test + public void testOutputStreamChunkedWrite() throws Exception { + String path = "output-stream-chunked-test.txt"; + String content = "Chunked write test content"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + try (OutputStream output = client.pushStream(path)) { + int chunkSize = 5; + for (int i = 0; i < contentBytes.length; i += chunkSize) { + int remaining = Math.min(chunkSize, contentBytes.length - i); + output.write(contentBytes, i, remaining); + } + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Chunked content should match", content, readContent); + } + } + + @Test + public void testOutputStreamBinaryData() throws Exception { + String path = "output-stream-binary-test.bin"; + byte[] binaryData = new byte[1024]; + + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + try (OutputStream output = client.pushStream(path)) { + output.write(binaryData); + } + + assertTrue("Binary file should exist", client.pathExists(path)); + assertEquals("Binary file length should match", binaryData.length, client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] readData = new byte[binaryData.length]; + int bytesRead = input.read(readData); + assertEquals("Should read all bytes", binaryData.length, bytesRead); + + for (int i = 0; i < binaryData.length; i++) { + assertEquals("Binary data should match at position " + i, binaryData[i], readData[i]); + } + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java new file mode 100644 index 000000000000..a0043c0a0852 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.net.URI; +import java.util.HashSet; +import java.util.Set; +import java.util.UUID; +import org.junit.Test; + +public class AzureBlobPathsTest extends AbstractAzureBlobClientTest { + + /** {@code pathExists()} returns false before push, true after. */ + @Test + public void testPathExists() throws Exception { + String path = "path-exists-test-" + UUID.randomUUID() + ".txt"; + + assertFalse("Path should not exist initially", client.pathExists(path)); + + pushContent(path, "test content"); + + assertTrue("Path should exist after creation", client.pathExists(path)); + } + + /** {@code pathExists()} reports a freshly-created directory marker as present. */ + @Test + public void testDirectoryExists() throws Exception { + String dirPath = "test-directory-" + UUID.randomUUID() + "/"; + + assertFalse("Directory should not exist initially", client.pathExists(dirPath)); + + client.createDirectory(dirPath); + + assertTrue("Directory should exist after creation", client.pathExists(dirPath)); + } + + /** {@code isDirectory()} distinguishes directory markers from regular blobs. */ + @Test + public void testIsDirectory() throws Exception { + String dirPath = "is-directory-test/"; + String filePath = "is-directory-test.txt"; + + client.createDirectory(dirPath); + assertTrue("Should be a directory", client.isDirectory(dirPath)); + + pushContent(filePath, "test content"); + assertFalse("Should not be a directory", client.isDirectory(filePath)); + } + + /** {@code length()} returns the exact byte count of the uploaded content. */ + @Test + public void testFileLength() throws Exception { + String path = "file-length-test.txt"; + String content = "File length test content"; + + pushContent(path, content); + + assertEquals("File length should match", content.length(), client.length(path)); + } + + /** {@code length()} on a directory throws — directory markers have no meaningful size. */ + @Test + public void testDirectoryLength() throws Exception { + String dirPath = "directory-length-test/"; + + client.createDirectory(dirPath); + + expectThrows(AzureBlobException.class, () -> client.length(dirPath)); + } + + /** + * {@code listDir()} returns immediate children only — both files and sub-directories, with + * sub-directory entries returned as bare names (no trailing delimiter). + */ + @Test + public void testListDirectory() throws Exception { + String dirPath = "list-directory-test/"; + + client.createDirectory(dirPath); + + String[] files = client.listDir(dirPath); + assertEquals("Directory should be empty initially", 0, files.length); + + String[] toCreate = {"file1.txt", "file2.txt", "subdir/"}; + for (String fileName : toCreate) { + String fullPath = dirPath + fileName; + if (fileName.endsWith("/")) { + client.createDirectory(fullPath); + } else { + pushContent(fullPath, "Content of " + fileName); + } + } + + files = client.listDir(dirPath); + assertEquals("Should list all files and directories", toCreate.length, files.length); + + // Directory children are listed without a trailing slash. + String[] expectedNames = {"file1.txt", "file2.txt", "subdir"}; + for (String expected : expectedNames) { + boolean found = false; + for (String listedFile : files) { + if (expected.equals(listedFile)) { + found = true; + break; + } + } + assertTrue("Should find entry: " + expected, found); + } + } + + /** Recursive walk via {@code listDir()} reaches files nested under multiple sub-directories. */ + @Test + public void testListAll() throws Exception { + String dirPath = "list-all-test/"; + + client.createDirectory(dirPath); + client.createDirectory(dirPath + "subdir1/"); + client.createDirectory(dirPath + "subdir2/"); + + pushContent(dirPath + "file1.txt", "Content 1"); + pushContent(dirPath + "file2.txt", "Content 2"); + pushContent(dirPath + "subdir1/file3.txt", "Content 3"); + pushContent(dirPath + "subdir2/file4.txt", "Content 4"); + + Set allFiles = new HashSet<>(); + listAllRecursive(dirPath, allFiles); + + assertTrue("Should find file1.txt", allFiles.contains(dirPath + "file1.txt")); + assertTrue("Should find file2.txt", allFiles.contains(dirPath + "file2.txt")); + assertTrue("Should find subdir1/file3.txt", allFiles.contains(dirPath + "subdir1/file3.txt")); + assertTrue("Should find subdir2/file4.txt", allFiles.contains(dirPath + "subdir2/file4.txt")); + } + + /** Happy path: {@code delete()} of a single existing file removes it. */ + @Test + public void testDeleteFile() throws Exception { + String path = "delete-file-test.txt"; + + pushContent(path, "test content"); + assertTrue("File should exist", client.pathExists(path)); + + client.delete(Set.of(path)); + + assertFalse("File should not exist after deletion", client.pathExists(path)); + } + + /** {@code deleteDirectory()} recursively removes a directory and its contents. */ + @Test + public void testDeleteDirectory() throws Exception { + String dirPath = "delete-directory-test/"; + String filePath = dirPath + "nested-file.txt"; + + client.createDirectory(dirPath); + pushContent(filePath, "nested content"); + + assertTrue("Directory should exist", client.pathExists(dirPath)); + assertTrue("File should exist", client.pathExists(filePath)); + + client.deleteDirectory(dirPath); + + assertFalse("Directory should not exist after deletion", client.pathExists(dirPath)); + assertFalse("File should not exist after deletion", client.pathExists(filePath)); + } + + /** Strict {@code delete()}: a single missing path raises {@link AzureBlobNotFoundException}. */ + @Test + public void testDeleteNonExistentFile() throws Exception { + String path = "non-existent-file.txt"; + + assertFalse("File should not exist", client.pathExists(path)); + + AzureBlobNotFoundException thrown = + expectThrows(AzureBlobNotFoundException.class, () -> client.delete(Set.of(path))); + assertTrue( + "Exception message should reference the missing path: " + thrown.getMessage(), + thrown.getMessage().contains(path)); + } + + /** Lenient {@code deleteDirectory()}: a missing directory is a silent no-op (no exception). */ + @Test + public void testDeleteNonExistentDirectory() throws Exception { + String dirPath = "non-existent-directory/"; + + assertFalse("Directory should not exist", client.pathExists(dirPath)); + + client.deleteDirectory(dirPath); + } + + /** + * Deeply-nested directories + files are all observable via {@code pathExists()} after creation. + */ + @Test + public void testNestedDirectories() throws Exception { + String rootDir = "nested-test/"; + String subDir1 = rootDir + "subdir1/"; + String subDir2 = rootDir + "subdir2/"; + String deepDir = subDir1 + "deepdir/"; + + client.createDirectory(rootDir); + client.createDirectory(subDir1); + client.createDirectory(subDir2); + client.createDirectory(deepDir); + + assertTrue("Root directory should exist", client.pathExists(rootDir)); + assertTrue("Sub directory 1 should exist", client.pathExists(subDir1)); + assertTrue("Sub directory 2 should exist", client.pathExists(subDir2)); + assertTrue("Deep directory should exist", client.pathExists(deepDir)); + + pushContent(rootDir + "root-file.txt", "Root file content"); + pushContent(subDir1 + "sub-file.txt", "Sub file content"); + pushContent(deepDir + "deep-file.txt", "Deep file content"); + + assertTrue("Root file should exist", client.pathExists(rootDir + "root-file.txt")); + assertTrue("Sub file should exist", client.pathExists(subDir1 + "sub-file.txt")); + assertTrue("Deep file should exist", client.pathExists(deepDir + "deep-file.txt")); + } + + /** {@code sanitizedPath()} strips leading slashes from a variety of input shapes. */ + @Test + public void testPathSanitization() throws Exception { + String[] testPaths = { + "simple-file.txt", + "/leading-slash.txt", + "trailing-slash/", + "/both-slashes/", + "nested/path/file.txt", + "//double-slash.txt", + " spaced-file.txt ", + "special-chars!@#$%^&*().txt" + }; + + for (String testPath : testPaths) { + String sanitizedPath = client.sanitizedPath(testPath); + assertNotNull("Sanitized path should not be null", sanitizedPath); + assertFalse("Sanitized path should not start with slash", sanitizedPath.startsWith("/")); + } + } + + /** + * {@code sanitizedFilePath()} accepts valid file paths and rejects trailing-slash / blank input. + */ + @Test + public void testFilePathSanitization() throws Exception { + String[] validFilePaths = { + "simple-file.txt", "nested/path/file.txt", "file-with-dashes.txt", "file_with_underscores.txt" + }; + + for (String filePath : validFilePaths) { + String sanitizedPath = client.sanitizedFilePath(filePath); + assertNotNull("Sanitized file path should not be null", sanitizedPath); + assertFalse("Sanitized file path should not end with slash", sanitizedPath.endsWith("/")); + } + + String[] invalidFilePaths = {"file-with-trailing-slash/", "", " "}; + + for (String filePath : invalidFilePaths) { + final String path = filePath; + expectThrows(AzureBlobException.class, () -> client.sanitizedFilePath(path)); + } + } + + /** {@code sanitizedDirPath()} always appends a trailing slash to dir-shaped input. */ + @Test + public void testDirectoryPathSanitization() throws Exception { + String[] testDirPaths = { + "simple-dir", "nested/path/dir", "dir-with-dashes", "dir_with_underscores" + }; + + for (String dirPath : testDirPaths) { + String sanitizedPath = client.sanitizedDirPath(dirPath); + assertNotNull("Sanitized directory path should not be null", sanitizedPath); + assertTrue("Sanitized directory path should end with slash", sanitizedPath.endsWith("/")); + } + } + + /** {@code createURI()} normalizes plain, leading-slash, and already-schemed locations alike. */ + @Test + public void testCreateUri() throws Exception { + try (AzureBlobBackupRepository repository = new AzureBlobBackupRepository()) { + assertEquals("/loc", repository.createURI("loc").getPath()); + assertEquals("/loc", repository.createURI("/loc").getPath()); + assertEquals("/loc", repository.createURI("blob:/loc").getPath()); + assertEquals("blob", repository.createURI("loc").getScheme()); + + // createDirectoryURI appends a trailing slash. + assertEquals("/loc/", repository.createDirectoryURI("loc").getPath()); + } + } + + /** {@code resolve()} joins nested components in order under the base path. */ + @Test + public void testResolveNestedComponents() throws Exception { + try (AzureBlobBackupRepository repository = new AzureBlobBackupRepository()) { + URI base = repository.createURI("loc"); + + assertEquals("/loc/a/b/c", repository.resolve(base, "a", "b", "c").getPath()); + } + } + + /** Redundant slashes in components are collapsed by {@code resolve()} (no {@code //}). */ + @Test + public void testResolveCollapsesRedundantSlashes() throws Exception { + try (AzureBlobBackupRepository repository = new AzureBlobBackupRepository()) { + URI base = repository.createURI("loc"); + + URI resolved = repository.resolve(base, "a/", "b"); + assertEquals("/loc/a/b", resolved.getPath()); + assertFalse("resolved path should not contain '//'", resolved.getPath().contains("//")); + } + } + + /** {@code resolveDirectory()} guarantees a trailing slash on the final component. */ + @Test + public void testResolveDirectoryAppendsTrailingSlash() throws Exception { + try (AzureBlobBackupRepository repository = new AzureBlobBackupRepository()) { + URI base = repository.createURI("loc"); + + assertEquals("/loc/sub/", repository.resolveDirectory(base, "sub").getPath()); + } + } + + /** + * {@code resolveDirectory()} with no components keeps a single trailing slash and never produces + * a doubled separator. + */ + @Test + public void testResolveDirectoryEmptyComponents() throws Exception { + try (AzureBlobBackupRepository repository = new AzureBlobBackupRepository()) { + URI base = repository.createDirectoryURI("loc"); + + URI resolved = repository.resolveDirectory(base); + assertEquals("/loc/", resolved.getPath()); + assertFalse("resolved path should not contain '//'", resolved.getPath().contains("//")); + } + } + + private void listAllRecursive(String dirPath, Set allFiles) throws AzureBlobException { + String[] files = client.listDir(dirPath); + for (String file : files) { + String fullPath = dirPath + file; + // listDir returns bare names, so probe the type to decide whether to recurse. + if (client.isDirectory(fullPath)) { + String dirFullPath = fullPath + "/"; + allFiles.add(dirFullPath); + listAllRecursive(dirFullPath, allFiles); + } else { + allFiles.add(fullPath); + } + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java new file mode 100644 index 000000000000..66390aff85d5 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.carrotsearch.randomizedtesting.generators.RandomBytes; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.Test; + +public class AzureBlobReadWriteTest extends AbstractAzureBlobClientTest { + + /** + * Small UTF-8 string round-trips byte-for-byte through {@code pushContent} / {@code pullStream}. + */ + @Test + public void testBasicReadWrite() throws Exception { + String path = "test-file.txt"; + String content = "Hello, Azure Blob Storage!"; + + pushContent(path, content); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + /** Multi-block ~200 KB payload: byte-exact readback in 8 KB chunks; {@code length()} matches. */ + @Test + public void testLargeFileReadWrite() throws Exception { + String path = "large-file.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 10000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + pushContent(path, content); + + assertTrue("File should exist", client.pathExists(path)); + assertEquals("File length should match", content.length(), client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[8192]; + StringBuilder readContentBuilder = new StringBuilder(); + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + readContentBuilder.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + } + assertEquals("Content should match", content, readContentBuilder.toString()); + } + } + + /** + * 1 KB binary payload (every byte value 0..255 cycled) round-trips byte-exact, with no charset + * translation. + */ + @Test + public void testBinaryDataReadWrite() throws Exception { + String path = "binary-file.bin"; + byte[] binaryData = new byte[1024]; + + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + pushContent(path, binaryData); + + try (InputStream input = client.pullStream(path)) { + byte[] readData = new byte[binaryData.length]; + int bytesRead = input.read(readData); + assertEquals("Should read all bytes", binaryData.length, bytesRead); + + for (int i = 0; i < binaryData.length; i++) { + assertEquals("Binary data should match at position " + i, binaryData[i], readData[i]); + } + } + } + + /** + * Two independent {@code pullStream} instances against the same blob have isolated read state. + */ + @Test + public void testConcurrentReadWrite() throws Exception { + String path = "concurrent-file.txt"; + String content = "Concurrent read/write test content"; + + pushContent(path, content); + + try (InputStream input1 = client.pullStream(path); + InputStream input2 = client.pullStream(path)) { + + byte[] buffer1 = new byte[1024]; + byte[] buffer2 = new byte[1024]; + + int bytesRead1 = input1.read(buffer1); + int bytesRead2 = input2.read(buffer2); + + String readContent1 = new String(buffer1, 0, bytesRead1, StandardCharsets.UTF_8); + String readContent2 = new String(buffer2, 0, bytesRead2, StandardCharsets.UTF_8); + + assertEquals("Both reads should get same content", readContent1, readContent2); + assertEquals("Content should match original", content, readContent1); + } + } + + /** Repeat {@code close()} and post-close {@code read()} on a resumable stream do not throw. */ + @Test + public void testStreamClose() throws Exception { + String path = "stream-close-test.txt"; + String content = "Stream close test content"; + + pushContent(path, content); + + InputStream input = client.pullStream(path); + input.close(); + input.close(); + + int firstByte = input.read(); + assertTrue( + "Stream should be resumable after close (got byte: " + firstByte + ")", + firstByte >= 0 || firstByte == -1); + + input.close(); + } + + /** Zero-byte blob exists with length 0; first {@code read()} returns {@code -1} (EOF). */ + @Test + public void testEmptyFileReadWrite() throws Exception { + String path = "empty-file.txt"; + String content = ""; + + pushContent(path, content); + + assertTrue("Empty file should exist", client.pathExists(path)); + assertEquals("Empty file should have zero length", 0, client.length(path)); + + try (InputStream input = client.pullStream(path)) { + int bytesRead = input.read(); + assertEquals("Should return -1 for empty file", -1, bytesRead); + } + } + + /** Multi-byte UTF-8 content (CJK, emoji, Greek) round-trips byte-for-byte. */ + @Test + public void testUnicodeContentReadWrite() throws Exception { + String path = "unicode-file.txt"; + String content = "Hello 世界! 🌍 Unicode test: αβγδε"; + + pushContent(path, content); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Unicode content should match", content, readContent); + } + } + + /** {@code OutputStream.flush()} only stages bytes; the blob is committed by {@code close()}. */ + @Test + public void testOutputStreamFlush() throws Exception { + String path = "flush-test.txt"; + String content = "Flush test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.flush(); + } + + // OutputStream.flush() only stages buffered bytes; the block list is committed by close(), + // so the blob becomes visible only after the try-with-resources exits. + assertTrue("File should exist after close", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match after flush", content, readContent); + } + } + + /** + * ~2 MB read with randomized read/skip and periodic forced connection drops still delivers every + * byte. + */ + @Test + public void testReadWithConnectionLoss() throws Exception { + String key = "flush-very-large"; + + int numBytes = 2_000_000; + pushContent(key, RandomBytes.randomBytesOfLength(random(), numBytes)); + + int numExceptions = 5; + int bytesPerException = numBytes / numExceptions; + + int maxBuffer = 100; + byte[] buffer = new byte[maxBuffer]; + boolean done = false; + try (InputStream input = client.pullStream(key)) { + long byteCount = 0; + long lastResetBucket = -1; + while (!done) { + int numBytesToRead = random().nextInt(maxBuffer) + 1; + switch (random().nextInt(3)) { + case 0: + { + for (int i = 0; i < numBytesToRead && !done; i++) { + done = input.read() == -1; + if (!done) { + byteCount++; + } + } + } + break; + case 1: + { + int readLen = input.read(buffer, 0, numBytesToRead); + if (readLen > 0) { + byteCount += readLen; + } else { + done = true; + } + } + break; + case 2: + { + long bytesSkipped = input.skip(numBytesToRead); + byteCount += bytesSkipped; + if (bytesSkipped < numBytesToRead) { + done = true; + } + } + break; + } + + // Initiate a connection loss at the beginning of every "bytesPerException" cycle. + // The input stream will not immediately see an error, it will have pre-loaded some data. + long currentBucket = byteCount / bytesPerException; + if (currentBucket != lastResetBucket && (byteCount % bytesPerException <= maxBuffer)) { + initiateBlobConnectionLoss(); + lastResetBucket = currentBucket; + } + } + + assertEquals("Wrong amount of data found from InputStream", numBytes, byteCount); + } + } + + /** Happy path: a batch larger than the 256-op cap deletes every blob across multiple chunks. */ + @Test + public void testBatchedDeleteAllPresent() throws Exception { + final int totalFiles = AzureBlobStorageClient.DELETE_BATCH_SIZE + 4; // crosses chunk boundary + final String prefix = "batch-delete-all/file-"; + + List paths = new ArrayList<>(totalFiles); + for (int i = 0; i < totalFiles; i++) { + String path = prefix + i + ".txt"; + pushContent(path, "x"); + paths.add(path); + } + + assertTrue("First blob should exist before delete", client.pathExists(paths.get(0))); + assertTrue( + "Last blob should exist before delete", client.pathExists(paths.get(totalFiles - 1))); + + client.delete(paths); + + for (int i = 0; i < totalFiles; i++) { + String path = prefix + i + ".txt"; + assertFalse("Blob should be gone after batched delete: " + path, client.pathExists(path)); + } + } + + /** + * Strict {@code delete}: a batch with any missing path throws {@link AzureBlobNotFoundException}. + * Note: the batch is issued before the size-mismatch check, so the present blobs are still + * deleted server-side even though the call throws — this asserts that surprising partial effect. + */ + @Test + public void testDeleteThrowsWhenAnyPathMissing() throws Exception { + final int totalFiles = AzureBlobStorageClient.DELETE_BATCH_SIZE + 4; // crosses chunk boundary + final String prefix = "batch-delete-mixed/file-"; + final String missing1 = prefix + "missing-1.txt"; + final String missing2 = prefix + "missing-2.txt"; + + List paths = new ArrayList<>(totalFiles + 2); + for (int i = 0; i < totalFiles; i++) { + String path = prefix + i + ".txt"; + pushContent(path, "x"); + paths.add(path); + } + // Pre-conditions: real blobs exist, missing paths do not. + assertTrue("First real blob should exist", client.pathExists(paths.get(0))); + assertFalse("Missing-1 must not exist", client.pathExists(missing1)); + assertFalse("Missing-2 must not exist", client.pathExists(missing2)); + paths.addAll(Arrays.asList(missing1, missing2)); + + AzureBlobNotFoundException thrown = + expectThrows(AzureBlobNotFoundException.class, () -> client.delete(paths)); + assertTrue( + "Exception message should list missing-1: " + thrown.getMessage(), + thrown.getMessage().contains(missing1)); + assertTrue( + "Exception message should list missing-2: " + thrown.getMessage(), + thrown.getMessage().contains(missing2)); + + for (int i = 0; i < totalFiles; i++) { + String path = prefix + i + ".txt"; + assertFalse( + "Real blob should be gone after batched delete: " + path, client.pathExists(path)); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzuriteTestContainer.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzuriteTestContainer.java new file mode 100644 index 000000000000..6ef5e4bdd9ac --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzuriteTestContainer.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.BlobStorageException; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + +/** + * Lifecycle helper for a single Azurite (Azure Blob Storage emulator) Testcontainer. Used by tests + * that cannot extend {@link AbstractAzureBlobClientTest} because they already extend a shared Solr + * abstract test suite (e.g. the SolrCloud backup/restore and install-shard suites). + */ +final class AzuriteTestContainer { + + static final String AZURITE_IMAGE = "mcr.microsoft.com/azure-storage/azurite:3.35.0"; + static final int BLOB_SERVICE_PORT = 10000; + static final String ACCOUNT_NAME = "devstoreaccount1"; + static final String ACCOUNT_KEY = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + private static final int HTTP_CONFLICT = 409; + + private final GenericContainer container; + + private AzuriteTestContainer(GenericContainer container) { + this.container = container; + } + + /** Starts a fresh Azurite container. Throws if Docker/Testcontainers is unavailable. */ + @SuppressWarnings("resource") + static AzuriteTestContainer start() { + GenericContainer container = + new GenericContainer<>(DockerImageName.parse(AZURITE_IMAGE)) + .withExposedPorts(BLOB_SERVICE_PORT) + .withCommand("azurite-blob", "--blobHost", "0.0.0.0", "--skipApiVersionCheck"); + container.start(); + return new AzuriteTestContainer(container); + } + + String blobEndpoint() { + return "http://" + container.getHost() + ":" + container.getMappedPort(BLOB_SERVICE_PORT); + } + + String connectionString() { + return "DefaultEndpointsProtocol=http;AccountName=" + + ACCOUNT_NAME + + ";AccountKey=" + + ACCOUNT_KEY + + ";BlobEndpoint=" + + blobEndpoint() + + "/" + + ACCOUNT_NAME + + ";"; + } + + /** Creates the given blob container, tolerating the case where it already exists. */ + void createContainerIfMissing(String containerName) { + BlobServiceClient serviceClient = + new BlobServiceClientBuilder().connectionString(connectionString()).buildClient(); + try { + serviceClient.getBlobContainerClient(containerName).create(); + } catch (BlobStorageException e) { + if (e.getStatusCode() != HTTP_CONFLICT) { + throw e; + } + } + } + + void stop() { + try { + container.stop(); + container.close(); + } catch (Throwable ignored) { + // best-effort cleanup + } + } +} diff --git a/solr/server/etc/security.policy b/solr/server/etc/security.policy index f932cc0b461c..25ce5a337ff1 100644 --- a/solr/server/etc/security.policy +++ b/solr/server/etc/security.policy @@ -221,6 +221,9 @@ grant { }; // Permissions for OTEL Runtime Java 17 telemetry and metrics +// Also needed for Reactor (used by Azure SDK with OkHttp) grant { permission jdk.jfr.FlightRecorderPermission "accessFlightRecorder"; + permission jdk.jfr.FlightRecorderPermission "registerEvent"; + permission java.lang.RuntimePermission "accessClassInPackage.jdk.jfr.internal.event"; }; diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc index 7e9cdd35bf24..c36762c4edd6 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc @@ -383,7 +383,7 @@ If the status is anything other than "success", an error message will explain wh Solr provides a repository abstraction to allow users to backup and restore their data to a variety of different storage systems. For example, a Solr cluster running on a local filesystem (e.g., EXT3) can store backup data on the same disk, on a remote network-mounted drive, or in some popular "cloud storage" providers, depending on the 'repository' implementation chosen. -Solr offers multiple different repository implementations out of the box (`LocalFileSystemRepository`, `GCSBackupRepository` and `S3BackupRepository`), and allows users to create plugins for their own storage systems as needed. It is also possible to create a `DelegatingBackupRepository` that delegates to another `BackupRepository` and adds or modifies some behavior on top of it. +Solr offers multiple different repository implementations out of the box (`LocalFileSystemRepository`, `GCSBackupRepository`, `S3BackupRepository`, and `AzureBlobBackupRepository`), and allows users to create plugins for their own storage systems as needed. It is also possible to create a `DelegatingBackupRepository` that delegates to another `BackupRepository` and adds or modifies some behavior on top of it. Users can define any number of repositories in their `solr.xml` file. The backup and restore APIs described above allow users to select which of these definitions they want to use at runtime via the `repository` parameter. @@ -826,3 +826,157 @@ https://docs.aws.amazon.com/sdkref/latest/guide/settings-global.html[These optio * Retries ** RetryMode (`LEGACY`, `STANDARD`, `ADAPTIVE`) ** Max Attempts + +=== AzureBlobBackupRepository + +Stores and retrieves backup files in a Microsoft Azure Blob Storage container. + +This is provided via the `azure-blob-repository` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be enabled before use. + +This plugin supports multiple authentication methods: connection strings, account keys, SAS tokens, and Azure Identity (Managed Identity, Service Principal, Azure CLI). +For Azure Identity, ensure the identity has the "Storage Blob Data Contributor" role on the storage account. + +An example configuration, placed in `solr.xml`, can be seen below: + +[source,xml] +---- + + + solr-backup + DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=mykey;EndpointSuffix=core.windows.net + + +---- + +[NOTE] +==== +To avoid keeping secrets (connection strings, account keys, SAS tokens) in `solr.xml`, any of the options below may instead be supplied as a Java system property (or environment variable) of the same name. +A value provided that way takes precedence over the one in `solr.xml`, so the sensitive element can be omitted from the file entirely. +For example, set `-Dazure.blob.connection.string=...` on the Solr command line and leave only `azure.blob.container.name` in `solr.xml`. +==== + +AzureBlobBackupRepository accepts the following options (in `solr.xml`) for configuration: + +`azure.blob.container.name`:: ++ +[%autowidth,frame=none] +|=== +|Required |Default: none +|=== ++ +The name of the Azure Blob Storage container. The container must exist before performing backup operations. + +`azure.blob.connection.string`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Complete Azure Storage connection string. Mutually exclusive with other authentication methods. + +`azure.blob.account.name`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Storage account name. Used with account key or SAS token authentication. + +`azure.blob.account.key`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Storage account access key. Mutually exclusive with SAS token and Azure Identity. + +`azure.blob.sas.token`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +SAS token for time-limited access. Must include `srt=sco` and `sp=rwdlac` permissions. +The `&` characters must be XML-escaped as `&`. + +`azure.blob.endpoint`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Blob Storage endpoint URL (e.g., `https://myaccount.blob.core.windows.net`). +Required for Azure Identity authentication. + +`azure.blob.tenant.id`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD tenant ID for Service Principal authentication. + +`azure.blob.client.id`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD application (client) ID for Service Principal authentication. + +`azure.blob.client.secret`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD application secret for Service Principal authentication. + +`location`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Default path prefix within the container for backup storage. + +The target container must already exist; it is not created automatically. + +==== Azure Identity and the Java Security Manager + +Solr enables the Java Security Manager by default (`SOLR_SECURITY_MANAGER_ENABLED=true`). +The Connection String, Account Key, and SAS Token methods are unaffected. +Azure Identity behaves as follows: + +* Managed Identity and Service Principal (`azure.blob.tenant.id`, `azure.blob.client.id`, `azure.blob.client.secret`) obtain tokens over the network and are not blocked by the Security Manager; they work with the default `security.policy`. +* The Azure CLI and Azure PowerShell developer credentials in the `DefaultAzureCredential` chain spawn a child process (for example `az`), which the Security Manager blocks with `access denied ("java.io.FilePermission" "/bin/sh" "execute")`. + +If you must use the Azure CLI credential (typically only for local development), add the following to `server/etc/security.policy`, adjusting the shell path for your platform: + +[source] +---- +permission java.io.FilePermission "/bin/sh", "execute"; +permission java.io.FilePermission "/dev/null", "read,write"; +---- + +For production, prefer Managed Identity or a Service Principal. Alternatively, set `SOLR_SECURITY_MANAGER_ENABLED=false` if you depend on the CLI credential. + +==== Troubleshooting + +`403 Forbidden`:: +Check the SAS token permissions (`srt=sco`, `sp=rwdlac`) or the RBAC role assignment on the storage account. + +`Signature did not match`:: +Ensure `&` is escaped as `&` in XML and that the SAS token contains no surrounding whitespace. + +`DefaultAzureCredential failed to retrieve a token`:: +Run `az login`, or verify the Service Principal credentials (`azure.blob.tenant.id`, `azure.blob.client.id`, `azure.blob.client.secret`). See the Security Manager limitation above.