use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testBasicDefaults.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicDefaults() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rewriteRecordsWithCsvSchema(rs.getAllData());
assertThat(expected, containsInAnyOrder(actual.toArray()));
// Verify that the file on the filesystem was correctly written.
mini.assertReadFile(mini.getLocalFs(), fileSpec, rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().getRecordDelimiter(), inputProps.getDatasetProperties().getFieldDelimiter()));
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class S3SparkRuntimeTestIT method test_noEncryption.
public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
// Configure the components.
S3OutputProperties outputProps = new S3OutputProperties("out");
outputProps.setDatasetProperties(datasetProps);
S3InputProperties inputProps = new S3InputProperties("in");
inputProps.setDatasetProperties(datasetProps);
List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
List<IndexedRecord> samples = getSample(datasetProps);
assertThat(samples, containsInAnyOrder(expected.toArray()));
Schema schema = getSchema(datasetProps);
assertEquals(expected.get(0).getSchema(), schema);
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class S3RoundTripRuntimeTestIT method test_noEncryption.
public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
// Configure the components.
S3OutputProperties outputProps = new S3OutputProperties("out");
outputProps.setDatasetProperties(datasetProps);
S3InputProperties inputProps = new S3InputProperties("in");
inputProps.setDatasetProperties(datasetProps);
List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
List<IndexedRecord> samples = getSample(datasetProps);
assertThat(samples, containsInAnyOrder(expected.toArray()));
Schema schema = getSchema(datasetProps);
assertEquals(expected.get(0).getSchema(), schema);
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIODatasetRuntimeTest method testGetSampleCsv_multipleSources.
@Test
public void testGetSampleCsv_multipleSources() throws Exception {
RecordSet rs1 = getSimpleTestData(0);
writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00000", rs1);
RecordSet rs2 = getSimpleTestData(100);
writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00001", rs2);
RecordSet rs3 = getSimpleTestData(100);
writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00002", rs3);
String fileSpec = mini.getFs().getUri().resolve("/user/test/input/").toString();
// Configure the component.
SimpleFileIODatasetProperties props = createDatasetProperties();
props.format.setValue(SimpleFileIOFormat.CSV);
props.path.setValue(fileSpec);
// Create the runtime.
SimpleFileIODatasetRuntime runtime = new SimpleFileIODatasetRuntime();
runtime.initialize(null, props);
// Attempt to get a sample using the runtime methods.
{
final List<IndexedRecord> actual = new ArrayList<>();
runtime.getSample(15, new Consumer<IndexedRecord>() {
@Override
public void accept(IndexedRecord ir) {
actual.add(ir);
}
});
// Check that the result was limited (15 out of 30 records)
assertThat(actual, hasSize(15));
}
// Run it again to verify that the static state is not retained.
{
final List<IndexedRecord> actual = new ArrayList<>();
runtime.getSample(15, new Consumer<IndexedRecord>() {
@Override
public void accept(IndexedRecord ir) {
actual.add(ir);
}
});
assertThat(actual, hasSize(15));
}
}
use of org.talend.components.test.RecordSet in project components by Talend.
the class SimpleFileIODatasetRuntimeTest method testGetSampleCsv.
@Test
public void testGetSampleCsv() throws Exception {
RecordSet rs = getSimpleTestData(0);
writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", rs);
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
// Configure the component.
SimpleFileIODatasetProperties props = createDatasetProperties();
props.format.setValue(SimpleFileIOFormat.CSV);
props.path.setValue(fileSpec);
// Create the runtime.
SimpleFileIODatasetRuntime runtime = new SimpleFileIODatasetRuntime();
runtime.initialize(null, props);
// Attempt to get a sample using the runtime methods.
final List<IndexedRecord> actual = new ArrayList<>();
runtime.getSample(100, new Consumer<IndexedRecord>() {
@Override
public void accept(IndexedRecord ir) {
actual.add(ir);
}
});
// Check the expected values match.
assertThat(actual, hasSize(10));
// assertThat(actual, (Matcher) equalTo(rs.getAllData()));
}
Aggregations