use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIORoundTripRuntimeTest method testBasicDefaults.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicDefaults() throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
String fileSpec = mini.getLocalFsNewFolder() + "output/";
// Configure the components.
SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
outputProps.getDatasetProperties().path.setValue(fileSpec);
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.setDatasetProperties(outputProps.getDatasetProperties());
List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
// Generate the set of expected records. By default, CSV turns all columns into String and loses the original
// column name.
List<IndexedRecord> expected = rewriteRecordsWithCsvSchema(rs.getAllData());
assertThat(expected, containsInAnyOrder(actual.toArray()));
// Verify that the file on the filesystem was correctly written.
mini.assertReadFile(mini.getLocalFs(), fileSpec, rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().getRecordDelimiter(), inputProps.getDatasetProperties().getFieldDelimiter()));
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicCsvLimit.
@Test
public void testBasicCsvLimit() throws IOException, URISyntaxException {
String inputFile = writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", 0, 0, 10, 10, 6, ";", "\n");
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
inputProps.limit.setValue(2);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
// TODO(rskraba): This fails for certain values of targetParallelism! To fix.
final Pipeline p = beam.createPipeline(1);
PCollection<IndexedRecord> readLines = p.apply(runtime);
List<IndexedRecord> expected = new ArrayList<>();
for (String record : inputFile.split("\n")) {
expected.add(ConvertToIndexedRecord.convertToAvro(record.split(";")));
}
expected = expected.subList(0, 2);
PAssert.that(readLines).containsInAnyOrder(expected);
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SparkSimpleFileIOInputRuntimeTestIT method testBasicDefaults.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Category(ValidatesRunner.class)
@Test
public void testBasicDefaults() throws IOException {
FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
String inputFile = writeRandomCsvFile(fs, "/tmp/test/input.csv", 0, 0, 10, 10, 6, ";", "\n");
String fileSpec = fs.getUri().resolve("/tmp/test/input.csv").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = SimpleFileIOInputRuntimeTest.createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
List<IndexedRecord> expected = new ArrayList<>();
for (String record : inputFile.split("\n")) {
expected.add(ConvertToIndexedRecord.convertToAvro(record.split(";")));
}
PAssert.that(readLines).containsInAnyOrder(expected);
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class GSRoundTripRuntimeTestIT method createInputProps.
private SimpleFileIOInputProperties createInputProps() {
SimpleFileIOInputProperties inputProps = new SimpleFileIOInputProperties(null);
inputProps.init();
inputProps.setDatasetProperties(SimpleFileIODatasetRuntimeTest.createDatasetProperties());
return inputProps;
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIODatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create an input runtime based on the properties.
SimpleFileIOInputRuntime inputRuntime = new SimpleFileIOInputRuntime();
SimpleFileIOInputProperties inputProperties = new SimpleFileIOInputProperties(null);
inputProperties.limit.setValue(limit);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(null, inputProperties);
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(//
Sample.<IndexedRecord>any(limit)).apply(collector);
try {
p.run().waitUntilFinish();
} catch (Pipeline.PipelineExecutionException e) {
if (e.getCause() instanceof TalendRuntimeException)
throw (TalendRuntimeException) e.getCause();
throw e;
}
}
}
Aggregations