use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testParquet.
/**
* Basic Parquet test.
*/
@Test
public void testParquet() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
// Verify that the file on the filesystem was correctly written.
// TODO(rskraba): verify independently from
// mini.assertReadFile(
// mini.getLocalFs(),
// fileSpec,
// rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().recordDelimiter.getValue(),
// inputProps.getDatasetProperties().fieldDelimiter.getValue()));
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicAvro.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicAvro() throws IOException, URISyntaxException {
RecordSet rs = getSimpleTestData(0);
writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs);
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
PAssert.that(readLines).containsInAnyOrder(rs.getAllData());
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicParquet.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Ignore("To implement.")
@Test
public void testBasicParquet() throws IOException, URISyntaxException {
RecordSet rs = getSimpleTestData(0);
writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs);
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
PAssert.that(readLines).containsInAnyOrder(rs.getAllData());
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testCsvWithDelimiters.
/**
* Test CSV with custom delimiters.
*/
@Test
public void testCsvWithDelimiters() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
outputProps.getDatasetProperties().path.setValue(fileSpec);
outputProps.getDatasetProperties().recordDelimiter.setValue(RecordDelimiterType.OTHER);
outputProps.getDatasetProperties().specificRecordDelimiter.setValue("---");
outputProps.getDatasetProperties().fieldDelimiter.setValue(FieldDelimiterType.OTHER);
outputProps.getDatasetProperties().specificFieldDelimiter.setValue("|");
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rewriteRecordsWithCsvSchema(rs.getAllData());
assertThat(expected, containsInAnyOrder(actual.toArray()));
// Verify that the file on the filesystem was correctly written.
mini.assertReadFile("---", mini.getLocalFs(), fileSpec, rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().getRecordDelimiter(), inputProps.getDatasetProperties().getFieldDelimiter()));
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testAvro.
/**
* Basic Avro test.
*/
@Test
public void testAvro() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
// Verify that the file on the filesystem was correctly written.
// TODO(rskraba): verify independently
// mini.assertReadFile(
// mini.getLocalFs(),
// fileSpec,
// rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().recordDelimiter.getValue(),
// inputProps.getDatasetProperties().fieldDelimiter.getValue()));
}
Aggregations