From ac64a5b26f7f7203721e363a2eea8a69acb40df7 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Mon, 25 May 2026 11:51:16 +0200 Subject: [PATCH] #1631 - Drop HDFS FS module - Remove the module - Bit of cleaning up --- .../core/api/io/JCasFileWriter_ImplBase.java | 3 +- dkpro-core-asl/pom.xml | 2 - .../resources/dkpro-core/version-rules.xml | 17 -- .../asciidoc/developer-guide/modules.adoc | 2 - dkpro-core-fs-hdfs-asl/LICENSE.txt | 202 ------------------ dkpro-core-fs-hdfs-asl/pom.xml | 166 -------------- .../fs/hdfs/HdfsResourceLoaderLocator.java | 70 ------ .../hdfs/HdfsResourceLoaderLocatorTest.java | 98 --------- .../src/test/resources/log4j2.xml | 16 -- 9 files changed, 2 insertions(+), 574 deletions(-) delete mode 100644 dkpro-core-fs-hdfs-asl/LICENSE.txt delete mode 100644 dkpro-core-fs-hdfs-asl/pom.xml delete mode 100644 dkpro-core-fs-hdfs-asl/src/main/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocator.java delete mode 100644 dkpro-core-fs-hdfs-asl/src/test/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocatorTest.java delete mode 100644 dkpro-core-fs-hdfs-asl/src/test/resources/log4j2.xml diff --git a/dkpro-core-api-io-asl/src/main/java/org/dkpro/core/api/io/JCasFileWriter_ImplBase.java b/dkpro-core-api-io-asl/src/main/java/org/dkpro/core/api/io/JCasFileWriter_ImplBase.java index 3ed63f196a..0266b95ecf 100644 --- a/dkpro-core-api-io-asl/src/main/java/org/dkpro/core/api/io/JCasFileWriter_ImplBase.java +++ b/dkpro-core-api-io-asl/src/main/java/org/dkpro/core/api/io/JCasFileWriter_ImplBase.java @@ -220,7 +220,8 @@ else if (targetLocation.startsWith(JAR_PREFIX)) { // Begin new entry String entryName = zipEntryPrefix + aRelativePath + aExtension + compression.getExtension(); - if (Paths.get(entryName).normalize().startsWith("..")) { + if (entryName.contains("..") || entryName.startsWith("/") || entryName.startsWith("\\") + || Paths.get(entryName).isAbsolute()) { throw new IOException( "ZIP entry name [" + entryName + "] would escape archive root"); } diff --git a/dkpro-core-asl/pom.xml b/dkpro-core-asl/pom.xml index 9682c44f35..bf8798d383 100644 --- a/dkpro-core-asl/pom.xml +++ b/dkpro-core-asl/pom.xml @@ -60,8 +60,6 @@ ../dkpro-core-api-ner-asl ../dkpro-core-api-frequency-asl ../dkpro-core-api-xml-asl - - ../dkpro-core-fs-hdfs-asl ../dkpro-core-io-aclanthology-asl ../dkpro-core-io-ancora-asl diff --git a/dkpro-core-build/src/main/resources/dkpro-core/version-rules.xml b/dkpro-core-build/src/main/resources/dkpro-core/version-rules.xml index 283e83f984..f61e6496c2 100644 --- a/dkpro-core-build/src/main/resources/dkpro-core/version-rules.xml +++ b/dkpro-core-build/src/main/resources/dkpro-core/version-rules.xml @@ -89,23 +89,6 @@ - - - - ^3\.[5-9].* - ^3\.[1-9][0-9]+.* - ^[4-9].* - ^[1-9][0-9]+.* - - - - ../dkpro-core-fs-hdfs-asl ../dkpro-core-io-aclanthology-asl ../dkpro-core-io-ancora-asl diff --git a/dkpro-core-fs-hdfs-asl/LICENSE.txt b/dkpro-core-fs-hdfs-asl/LICENSE.txt deleted file mode 100644 index d645695673..0000000000 --- a/dkpro-core-fs-hdfs-asl/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/dkpro-core-fs-hdfs-asl/pom.xml b/dkpro-core-fs-hdfs-asl/pom.xml deleted file mode 100644 index fa55d89405..0000000000 --- a/dkpro-core-fs-hdfs-asl/pom.xml +++ /dev/null @@ -1,166 +0,0 @@ - - - 4.0.0 - - dkpro-core-asl - org.dkpro.core - 3.0.0-SNAPSHOT - ../dkpro-core-asl - - - dkpro-core-fs-hdfs-asl - jar - DKPro Core ASL - FS - HDFS (v ${hadoop.version}) - https://dkpro.github.io/dkpro-core/ - - - 3.4.3 - - - - - org.apache.uima - uimafit-core - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - log4j - log4j - - - - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - - - ch.qos.reload4j - reload4j - - - - - org.springframework.data - spring-data-hadoop-core - 2.5.0.RELEASE - - - org.apache.uima - uimaj-core - - - org.springframework - spring-core - - - org.dkpro.core - dkpro-core-io-text-asl - ${project.version} - test - - - org.mockito - mockito-core - test - - - org.apache.hadoop - hadoop-hdfs-client - ${hadoop.version} - test - - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - tests - test - - - ch.qos.reload4j - reload4j - - - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - tests - test - - - org.slf4j - slf4j-log4j12 - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - log4j - log4j - - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - - org.apache.hadoop:hadoop-common - org.apache.hadoop:hadoop-hdfs - org.mockito:mockito-core - - - - - - - \ No newline at end of file diff --git a/dkpro-core-fs-hdfs-asl/src/main/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocator.java b/dkpro-core-fs-hdfs-asl/src/main/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocator.java deleted file mode 100644 index 481c9b651c..0000000000 --- a/dkpro-core-fs-hdfs-asl/src/main/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocator.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.fs.hdfs; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.uima.fit.component.Resource_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.ExternalResourceLocator; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceSpecifier; -import org.springframework.core.io.support.PathMatchingResourcePatternResolver; -import org.springframework.data.hadoop.fs.HdfsResourceLoader; - -public class HdfsResourceLoaderLocator - extends Resource_ImplBase - implements ExternalResourceLocator -{ - public static final String PARAM_FILESYSTEM = "fileSystem"; - @ConfigurationParameter(name = PARAM_FILESYSTEM, mandatory = false) - private String fileSystem; - - private HdfsResourceLoader resolverInstance; - - @Override - public boolean initialize(ResourceSpecifier aSpecifier, Map aAdditionalParams) - throws ResourceInitializationException - { - super.initialize(aSpecifier, aAdditionalParams); - try { - if (fileSystem == null) { - new HdfsResourceLoader(new Configuration(true)); - } - else { - resolverInstance = new HdfsResourceLoader(new Configuration(), new URI(fileSystem)); - resolverInstance - .setResourcePatternResolver(new PathMatchingResourcePatternResolver()); - } - } - catch (URISyntaxException e) { - throw new ResourceInitializationException(e); - } - return true; - - } - - @Override - public Object getResource() - { - return resolverInstance; - } -} diff --git a/dkpro-core-fs-hdfs-asl/src/test/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocatorTest.java b/dkpro-core-fs-hdfs-asl/src/test/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocatorTest.java deleted file mode 100644 index bb66f93852..0000000000 --- a/dkpro-core-fs-hdfs-asl/src/test/java/org/dkpro/core/fs/hdfs/HdfsResourceLoaderLocatorTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.fs.hdfs; - -import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; -import static org.apache.uima.fit.factory.ExternalResourceFactory.createResourceDescription; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assumptions.assumeFalse; - -import java.io.File; -import java.io.OutputStreamWriter; -import java.util.Locale; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.uima.fit.factory.JCasFactory; -import org.dkpro.core.io.text.TextReader; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class HdfsResourceLoaderLocatorTest -{ - private MiniDFSCluster hdfsCluster; - - @BeforeEach - public void startCluster(@TempDir File target, @TempDir File hadoopTmp) throws Exception - { - assumeFalse(System.getProperty("os.name").toLowerCase(Locale.US).contains("win"), - "HDFS on Windows would require native libs which we do not supply."); - - var baseDir = new File(target, "hdfs").getAbsoluteFile(); - FileUtil.fullyDelete(baseDir); - Configuration conf = new Configuration(); - conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath()); - conf.set("hadoop.tmp.dir", hadoopTmp.getAbsolutePath()); - MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); - hdfsCluster = builder.build(); - } - - @AfterEach - public void shutdownCluster() - { - if (hdfsCluster != null) { - hdfsCluster.shutdown(); - } - } - - @Test - public void testExternalLoaderLocator() throws Exception - { - var hdfsURI = "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"; - - var document = "This is a test."; - - // Write test document - hdfsCluster.getFileSystem().mkdirs(new Path("/user/test")); - try (var os = new OutputStreamWriter( - hdfsCluster.getFileSystem().create(new Path("/user/test/file.txt")), "UTF-8")) { - os.write(document); - } - - // Set up HDFS resource locator - var locator = createResourceDescription(HdfsResourceLoaderLocator.class, - HdfsResourceLoaderLocator.PARAM_FILESYSTEM, hdfsURI); - - // Configure reader to read from HDFS - var reader = createReader(TextReader.class, // - TextReader.PARAM_SOURCE_LOCATION, "hdfs:/user/test", // - TextReader.PARAM_PATTERNS, "file.txt", // - TextReader.KEY_RESOURCE_RESOLVER, locator); - - // Read data - var cas = JCasFactory.createJCas(); - reader.getNext(cas.getCas()); - - // Verify content - assertEquals(document, cas.getDocumentText()); - } -} diff --git a/dkpro-core-fs-hdfs-asl/src/test/resources/log4j2.xml b/dkpro-core-fs-hdfs-asl/src/test/resources/log4j2.xml deleted file mode 100644 index 31c71b9dc4..0000000000 --- a/dkpro-core-fs-hdfs-asl/src/test/resources/log4j2.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - -