use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SparkSimpleFileIOOutputRuntimeTestIT method testCsv_merge.
@Test
public void testCsv_merge() throws IOException {
FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.csv").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
props.mergeOutput.setValue(true);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a Spark pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
MiniDfsResource.assertReadFile(fs, fileSpec, "1;one", "2;two");
MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testInputParquetByteBufferSerialization.
/**
* Test to read an Parquet input and dump it on CSV. This is a special case to see the support of ByteBuffer
* coding/decoding. This test is currently not working due to log on the Beam class ExecutorServiceParallelExecutor,
* that will move the offset of any ByteBuffer.
*/
@Test
public void testInputParquetByteBufferSerialization() throws IOException, URISyntaxException {
InputStream in = getClass().getResourceAsStream("two_lines.snappy.parquet");
try (OutputStream inOnMinFS = mini.getFs().create(new Path("/user/test/two_lines.snappy.parquet"))) {
inOnMinFS.write(IOUtils.toByteArray(in));
}
String fileSpec = mini.getFs().getUri().resolve("/user/test/two_lines.snappy.parquet").toString();
String fileSpecOutput = mini.getLocalFs().getUri().resolve(new Path(mini.newFolder().toString(), "output.csv").toUri()).toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
SimpleFileIOOutputProperties outputProps = new SimpleFileIOOutputProperties(null);
outputProps.init();
outputProps.setDatasetProperties(SimpleFileIODatasetRuntimeTest.createDatasetProperties());
outputProps.getDatasetProperties().path.setValue(fileSpecOutput);
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
SimpleFileIOOutputRuntime runtimeO = new SimpleFileIOOutputRuntime();
runtimeO.initialize(null, outputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline(1);
p.apply(runtime).apply(runtimeO);
p.run().waitUntilFinish();
mini.assertReadFile(mini.getLocalFs(), fileSpecOutput, "1;rdubois", "2;clombard");
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOOutputRuntimeTest method testBasicAvroBytes.
/**
* Basic unit test writing to Avro.
*/
@Test
public void testBasicAvroBytes() throws IOException, URISyntaxException {
String fileSpec = mini.getLocalFs().getUri().resolve(new Path(mini.newFolder().toString(), "output.avro").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
Schema s = //
SchemaBuilder.record("test").fields().name("key").type(Schema.create(Schema.Type.BYTES)).noDefault().name("value").type(Schema.create(Schema.Type.STRING)).noDefault().endRecord();
IndexedRecord ir1 = new GenericData.Record(s);
IndexedRecord ir2 = new GenericData.Record(s);
ir1.put(0, ByteBuffer.wrap(new byte[] { 0x00, 0x01, 0x02 }));
ir1.put(1, "012");
ir2.put(0, ByteBuffer.wrap(new byte[] { 0x01, 0x02, 0x03 }));
ir2.put(1, "123");
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ir1, //
ir2));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
// TODO(rskraba): Implement a comparison for the file on disk.
// mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOOutputRuntimeTest method testBasicCsvFormat.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicCsvFormat() throws IOException, URISyntaxException {
// Fetch the expected results and input dataset.
List<IndexedRecord> inputs = new ArrayList<>();
List<String> expected = new ArrayList<>();
for (CsvExample csvEx : CsvExample.getCsvExamples()) {
// Ignore lines that don't have the same schema (3 columns)
if (csvEx.getValues().length == 3) {
expected.add(csvEx.getExpectedOutputLine());
inputs.add(ConvertToIndexedRecord.convertToAvro(csvEx.getValues()));
}
}
String fileSpec = mini.getLocalFs().getUri().resolve(new Path(mini.newFolder().toString(), "output.csv").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
//
PCollection<IndexedRecord> input = p.apply(Create.of(inputs));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
mini.assertReadFile(mini.getLocalFs(), fileSpec, expected.toArray(new String[0]));
}
use of org.talend.components.simplefileio.output.SimpleFileIOOutputProperties in project components by Talend.
the class SimpleFileIOOutputRuntimeTest method testBasicAvro.
/**
* Basic unit test writing to Avro.
*/
@Test
public void testBasicAvro() throws IOException, URISyntaxException {
String fileSpec = mini.getLocalFs().getUri().resolve(new Path(mini.newFolder().toString(), "output.avro").toUri()).toString();
// Configure the component.
SimpleFileIOOutputProperties props = createOutputComponentProperties();
props.getDatasetProperties().path.setValue(fileSpec);
props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
// Create the runtime.
SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
runtime.initialize(null, props);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
// Check the expected values.
// TODO(rskraba): Implement a comparison for the file on disk.
// mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}
Aggregations