use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOOutputErrorTest method testTryToOverwrite.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testTryToOverwrite() throws IOException, URISyntaxException {
Path parent = new Path(mini.newFolder().toString());
Path dst = new Path(parent, "output");
String fileSpec = mini.getLocalFs().getUri().resolve(dst.toUri()).toString();
// Write something to the file before trying to run.
try (OutputStream out = mini.getLocalFs().create(new Path(dst, "part-00000"))) {
out.write(0);
}
// Trying to write to an existing destination throws an exception.
thrown.expect(TalendRuntimeException.class);
thrown.expect(hasProperty("code", is(SimpleFileIOErrorCode.OUTPUT_ALREADY_EXISTS)));
thrown.expectMessage("The path " + fileSpec + " already exists. Please remove it manually.");
// Now try using the component.
try {
// Configure the component.
SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
} catch (Pipeline.PipelineExecutionException e) {
if (e.getCause() instanceof TalendRuntimeException)
throw (TalendRuntimeException) e.getCause();
throw e;
}
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOOutputErrorTest method testUnauthorizedOverwrite.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testUnauthorizedOverwrite() throws IOException, URISyntaxException {
Path parent = new Path(mini.newFolder().toString());
Path dst = new Path(parent, "output");
String fileSpec = mini.getLocalFs().getUri().resolve(dst.toUri()).toString();
// Write something to the file before trying to run.
try (OutputStream out = mini.getLocalFs().create(new Path(dst, "part-00000"))) {
out.write(0);
}
// Ensure that the destination is unwritable.
FileUtil.chmod(dst.toUri().toString(), "000", true);
// Trying to overwrite an unmodifiable destination throws an exception.
thrown.expect(TalendRuntimeException.class);
thrown.expect(hasProperty("code", is(SimpleFileIOErrorCode.OUTPUT_NOT_AUTHORIZED)));
thrown.expectMessage("Can not write to " + fileSpec + ". Please check user permissions or existence of base directory.");
// Now try using the component.
try {
// Configure the component.
SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.overwrite.setValue(true);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
runtime.runAtDriver(null);
p.run().waitUntilFinish();
} catch (Pipeline.PipelineExecutionException e) {
if (e.getCause() instanceof TalendRuntimeException)
throw (TalendRuntimeException) e.getCause();
throw e;
}
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testParquet.
/**
* Basic Parquet test.
*/
@Test
public void testParquet() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
// Verify that the file on the filesystem was correctly written.
// TODO(rskraba): verify independently from
// mini.assertReadFile(
// mini.getLocalFs(),
// fileSpec,
// rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().recordDelimiter.getValue(),
// inputProps.getDatasetProperties().fieldDelimiter.getValue()));
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SparkSimpleFileIOOutputRuntimeTestIT method testAvro_merge.
@Test
public void testAvro_merge() throws IOException {
FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.avro").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
props.mergeOutput.setValue(true);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a Spark pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
MiniDfsResource.assertReadAvroFile(fs, fileSpec, new HashSet<IndexedRecord>(//
Arrays.asList(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))), false);
MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SparkSimpleFileIOOutputRuntimeTestIT method testParquet_merge.
@Test
public void testParquet_merge() throws IOException {
FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.parquet").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
props.mergeOutput.setValue(true);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a Spark pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
MiniDfsResource.assertReadParquetFile(fs, fileSpec, new HashSet<IndexedRecord>(//
Arrays.asList(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))), false);
MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}
Aggregations