diff --git a/parquet-cli/README.md b/parquet-cli/README.md index c7b3540a4b..4e9aea10e1 100644 --- a/parquet-cli/README.md +++ b/parquet-cli/README.md @@ -137,6 +137,7 @@ Usage: parquet [options] [command] [command options] ### Configuration Options - `--conf` or `--property`: Set any configuration property in format `key=value`. Can be specified multiple times. +- `--config-file`: Path to a configuration file (`.properties` or `.xml` format). Examples: ```bash @@ -147,4 +148,7 @@ parquet convert input.avro -o output.parquet --conf parquet.avro.write-old-list- # Multiple options parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf parquet.avro.write-parquet-uuid=true --conf parquet.avro.write-old-list-structure=false +# Using config file +parquet convert input.avro -o output.parquet --config-file config.properties + ``` diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java index e93a21e899..0f4932f3d9 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java @@ -25,12 +25,16 @@ import com.beust.jcommander.Parameters; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; +import java.io.FileInputStream; +import java.io.InputStream; import java.util.List; +import java.util.Properties; import java.util.Set; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; @@ -73,6 +77,11 @@ public class Main extends Configured implements Tool { description = "Set a configuration property (format: key=value). Can be specified multiple times.") private List confProperties; + @Parameter( + names = {"--config-file"}, + description = "Path to a configuration file (properties or Hadoop XML format).") + private String configFilePath; + @VisibleForTesting @Parameter(names = "--dollar-zero", description = "A way for the runtime path to be passed in", hidden = true) String programName = DEFAULT_PROGRAM_NAME; @@ -172,6 +181,24 @@ public int run(String[] args) throws Exception { // If the command does not support the configs, it would simply be ignored. if (command instanceof Configurable) { Configuration merged = new Configuration(getConf()); + + if (configFilePath != null) { + try { + if (isXmlConfigFile(configFilePath)) { + loadXmlConfiguration(merged, configFilePath); + } else if (isPropertiesConfigFile(configFilePath)) { + loadPropertiesConfiguration(merged, configFilePath); + } else { + throw new IllegalArgumentException( + "Unsupported config file format. Only .xml and .properties files are supported: " + + configFilePath); + } + } catch (Exception e) { + throw new IllegalArgumentException( + "Failed to load config file '" + configFilePath + "': " + e.getMessage(), e); + } + } + if (confProperties != null) { for (String prop : confProperties) { String[] parts = prop.split("=", 2); @@ -218,4 +245,27 @@ public static void main(String[] args) throws Exception { int rc = ToolRunner.run(new Configuration(), new Main(console), args); System.exit(rc); } + + private boolean isXmlConfigFile(String filePath) { + return filePath.toLowerCase().endsWith(".xml"); + } + + private boolean isPropertiesConfigFile(String filePath) { + String lowerPath = filePath.toLowerCase(); + return lowerPath.endsWith(".properties"); + } + + private void loadXmlConfiguration(Configuration config, String filePath) { + config.addResource(new Path(filePath)); + console.debug("Loaded XML configuration from file: {}", filePath); + } + + private void loadPropertiesConfiguration(Configuration config, String filePath) throws Exception { + try (InputStream in = new FileInputStream(filePath)) { + Properties props = new Properties(); + props.load(in); + props.forEach((key, value) -> config.set(key.toString(), value.toString())); + console.debug("Loaded properties configuration from file: {}", filePath); + } + } } diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java index ec4f8cc1f2..6bf54bdf05 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java @@ -18,6 +18,8 @@ */ package org.apache.parquet.cli; +import java.io.File; +import java.io.FileWriter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ToolRunner; import org.junit.Assert; @@ -31,4 +33,38 @@ public void mainTest() throws Exception { ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] {}); Assert.assertTrue("we simply verify there are no errors here", true); } + + @Test + public void testConfigFileLoading() throws Exception { + File configFile = File.createTempFile("test-config", ".properties"); + configFile.deleteOnExit(); + + try (FileWriter writer = new FileWriter(configFile)) { + writer.write("test.key=test.value\n"); + } + + try { + new Main(LoggerFactory.getLogger(MainTest.class)) + .run(new String[] {"--config-file", configFile.getAbsolutePath(), "help"}); + Assert.assertTrue("Config file loading should not throw exception", true); + } catch (IllegalArgumentException e) { + Assert.fail("Config file loading failed: " + e.getMessage()); + } + } + + @Test + public void testLocalPropertiesFile() throws Exception { + String configFile = getClass().getResource("/test-config.properties").getPath(); + ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] { + "--config-file", configFile, "version" + }); + } + + @Test + public void testLocalXmlFile() throws Exception { + String configFile = getClass().getResource("/test-config.xml").getPath(); + ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] { + "--config-file", configFile, "version" + }); + } } diff --git a/parquet-cli/src/test/resources/test-config.properties b/parquet-cli/src/test/resources/test-config.properties new file mode 100644 index 0000000000..40d792fcab --- /dev/null +++ b/parquet-cli/src/test/resources/test-config.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test.key=test.value +parquet.avro.write-old-list-structure=false +parquet.compression=SNAPPY +parquet.block.size=134217728 diff --git a/parquet-cli/src/test/resources/test-config.xml b/parquet-cli/src/test/resources/test-config.xml new file mode 100644 index 0000000000..b7d17e917e --- /dev/null +++ b/parquet-cli/src/test/resources/test-config.xml @@ -0,0 +1,36 @@ + + + + + test.key + test.value + + + + parquet.avro.write-old-list-structure + false + + + + parquet.compression + SNAPPY + + +