use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testInputParquetByteBufferSerialization.
/**
* Test to read an Parquet input and dump it on CSV. This is a special case to see the support of ByteBuffer
* coding/decoding. This test is currently not working due to log on the Beam class ExecutorServiceParallelExecutor,
* that will move the offset of any ByteBuffer.
*/
@Test
public void testInputParquetByteBufferSerialization() throws IOException, URISyntaxException {
InputStream in = getClass().getResourceAsStream("two_lines.snappy.parquet");
try (OutputStream inOnMinFS = mini.getFs().create(new Path("/user/test/two_lines.snappy.parquet"))) {
inOnMinFS.write(IOUtils.toByteArray(in));
}
String fileSpec = mini.getFs().getUri().resolve("/user/test/two_lines.snappy.parquet").toString();
String fileSpecOutput = mini.getLocalFs().getUri().resolve(new Path(mini.newFolder().toString(), "output.csv").toUri()).toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
SimpleFileIOOutputProperties outputProps = new SimpleFileIOOutputProperties(null);
outputProps.init();
outputProps.setDatasetProperties(SimpleFileIODatasetRuntimeTest.createDatasetProperties());
outputProps.getDatasetProperties().path.setValue(fileSpecOutput);
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
SimpleFileIOOutputRuntime runtimeO = new SimpleFileIOOutputRuntime();
runtimeO.initialize(null, outputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline(1);
p.apply(runtime).apply(runtimeO);
p.run().waitUntilFinish();
mini.assertReadFile(mini.getLocalFs(), fileSpecOutput, "1;rdubois", "2;clombard");
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class GSRoundTripRuntimeTestIT method testCsv.
@Test
public void testCsv() {
List<IndexedRecord> expected = new ArrayList<>();
expected.add(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }));
expected.add(ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }));
SimpleFileIOOutputProperties outputProps = createOutputProps();
outputProps.getDatasetProperties().path.setValue(gsPath);
SimpleFileIOOutputRuntime outputRuntime = new SimpleFileIOOutputRuntime();
outputRuntime.initialize(null, outputProps);
PCollection<IndexedRecord> input = writeP.apply(Create.of(expected));
input.apply(outputRuntime);
writeP.run(pipelineOptions).waitUntilFinish();
SimpleFileIOInputProperties inputProps = createInputProps();
inputProps.getDatasetProperties().path.setValue(gsPath + "*");
SimpleFileIOInputRuntime inputRuntime = new SimpleFileIOInputRuntime();
inputRuntime.initialize(null, inputProps);
PCollection<IndexedRecord> readRecords = readP.apply(inputRuntime);
PAssert.that(readRecords).containsInAnyOrder(expected);
readP.run(pipelineOptions).waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputErrorTest method testUnauthorizedRead.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testUnauthorizedRead() throws IOException, URISyntaxException {
String inputFile = writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", 0, 0, 10, 10, 6, ";", "\n");
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
Path filePath = new Path(fileSpec);
// Ensure that the parent is unreadable.
mini.getFs().setPermission(filePath.getParent(), new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE));
mini.getFs().setOwner(filePath.getParent(), "gooduser", "gooduser");
// Configure the component.
SimpleFileIOInputProperties inputProps = SimpleFileIOInputRuntimeTest.createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
inputProps.getDatasetProperties().getDatastoreProperties().userName.setValue("baduser");
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// The exception that should be thrown.
thrown.expect(TalendRuntimeException.class);
thrown.expect(hasProperty("code", is(SimpleFileIOErrorCode.INPUT_NOT_AUTHORIZED)));
thrown.expectMessage("baduser can not read from " + fileSpec + ". Please check user permissions or existence of base directory.");
try {
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
List<IndexedRecord> expected = new ArrayList<>();
for (String record : inputFile.split("\n")) {
expected.add(ConvertToIndexedRecord.convertToAvro(record.split(";")));
}
PAssert.that(readLines).containsInAnyOrder(expected);
// And run the test.
p.run().waitUntilFinish();
} catch (Pipeline.PipelineExecutionException e) {
if (e.getCause() instanceof TalendRuntimeException)
throw (TalendRuntimeException) e.getCause();
throw e;
}
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testCsvWithDelimiters.
/**
* Test CSV with custom delimiters.
*/
@Test
public void testCsvWithDelimiters() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
outputProps.getDatasetProperties().path.setValue(fileSpec);
outputProps.getDatasetProperties().recordDelimiter.setValue(RecordDelimiterType.OTHER);
outputProps.getDatasetProperties().specificRecordDelimiter.setValue("---");
outputProps.getDatasetProperties().fieldDelimiter.setValue(FieldDelimiterType.OTHER);
outputProps.getDatasetProperties().specificFieldDelimiter.setValue("|");
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rewriteRecordsWithCsvSchema(rs.getAllData());
assertThat(expected, containsInAnyOrder(actual.toArray()));
// Verify that the file on the filesystem was correctly written.
mini.assertReadFile("---", mini.getLocalFs(), fileSpec, rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().getRecordDelimiter(), inputProps.getDatasetProperties().getFieldDelimiter()));
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testAvro.
/**
* Basic Avro test.
*/
@Test
public void testAvro() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
// Verify that the file on the filesystem was correctly written.
// TODO(rskraba): verify independently
// mini.assertReadFile(
// mini.getLocalFs(),
// fileSpec,
// rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().recordDelimiter.getValue(),
// inputProps.getDatasetProperties().fieldDelimiter.getValue()));
}
Aggregations