use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicCSV_changeSeparator2.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicCSV_changeSeparator2() throws IOException, URISyntaxException {
String inputFile = writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", 0, 0, 10, 10, 6, "\t", "\r");
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
inputProps.getDatasetProperties().recordDelimiter.setValue(RecordDelimiterType.CR);
inputProps.getDatasetProperties().fieldDelimiter.setValue(FieldDelimiterType.TABULATION);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
List<IndexedRecord> expected = new ArrayList<>();
for (String record : inputFile.split("\r")) {
expected.add(ConvertToIndexedRecord.convertToAvro(record.split("\t")));
}
PAssert.that(readLines).containsInAnyOrder(expected);
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicCsvFormatting.
@Test
public void testBasicCsvFormatting() throws IOException, URISyntaxException {
// Create an input file with all of the 3 column examples from the examples.
List<IndexedRecord> expected = new ArrayList<>();
List<String> file = new ArrayList<>();
for (CsvExample csvEx : CsvExample.getCsvExamples()) {
// Ignore lines that don't have the same schema (3 columns)
if (csvEx.getValues().length == 3) {
for (String inputLine : csvEx.getPossibleInputLines()) {
file.add(inputLine);
expected.add(ConvertToIndexedRecord.convertToAvro(csvEx.getValues()));
}
}
}
mini.writeFile(mini.getFs(), "/user/test/input.csv", file.toArray(new String[0]));
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline(3);
PCollection<IndexedRecord> readLines = p.apply(runtime);
PAssert.that(readLines).containsInAnyOrder(expected);
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicParquet.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Ignore("To implement.")
@Test
public void testBasicParquet() throws IOException, URISyntaxException {
RecordSet rs = getSimpleTestData(0);
writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs);
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
PAssert.that(readLines).containsInAnyOrder(rs.getAllData());
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicDefaultsInvalidColumnNumber.
/**
* In the next test, we are providing a file with 32 columns, but some of the lines does not contains 32 columns.
*/
@Ignore("Encoding trouble in the test.")
@Test
public void testBasicDefaultsInvalidColumnNumber() throws IOException {
InputStream in = getClass().getResourceAsStream("invalidColumnNumber.txt");
try (OutputStream inOnMinFS = mini.getFs().create(new Path("/user/test/invalidColumnNumber.txt"))) {
inOnMinFS.write(IOUtils.toByteArray(in));
}
String fileSpec = mini.getFs().getUri().resolve("/user/test/invalidColumnNumber.txt").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
in = getClass().getResourceAsStream("invalidColumnNumber.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(in));
List<IndexedRecord> expected = new ArrayList<>();
String sCurrentLine = "";
while ((sCurrentLine = br.readLine()) != null) {
expected.add(ConvertToIndexedRecord.convertToAvro(sCurrentLine.split(";")));
}
PAssert.that(readLines).containsInAnyOrder(expected);
// And run the test.
p.run().waitUntilFinish();
}
use of org.talend.components.simplefileio.input.SimpleFileIOInputProperties in project components by Talend.
the class SimpleFileIOInputRuntimeTest method testBasicCSV_changeSeparator.
/**
* Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
*/
@Test
public void testBasicCSV_changeSeparator() throws IOException, URISyntaxException {
String inputFile = writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", 0, 0, 10, 10, 6, " ", "\r\n");
String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
// Configure the component.
SimpleFileIOInputProperties inputProps = createInputComponentProperties();
inputProps.getDatasetProperties().path.setValue(fileSpec);
inputProps.getDatasetProperties().recordDelimiter.setValue(RecordDelimiterType.CRLF);
inputProps.getDatasetProperties().fieldDelimiter.setValue(FieldDelimiterType.SPACE);
// Create the runtime.
SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
runtime.initialize(null, inputProps);
// Use the runtime in a direct pipeline to test.
final Pipeline p = beam.createPipeline();
PCollection<IndexedRecord> readLines = p.apply(runtime);
// Check the expected values.
List<IndexedRecord> expected = new ArrayList<>();
for (String record : inputFile.split("\r\n")) {
expected.add(ConvertToIndexedRecord.convertToAvro(record.split(" ")));
}
PAssert.that(readLines).containsInAnyOrder(expected);
// And run the test.
p.run().waitUntilFinish();
}
Aggregations