Search in sources :

Example 6 with RecordSet

use of org.talend.components.test.RecordSet in project components by Talend.

the class SimpleFileIORoundTripRuntimeTest method testBasicDefaults.

/**
 * Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
 */
@Test
public void testBasicDefaults() throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    String fileSpec = mini.getLocalFsNewFolder() + "output/";
    // Configure the components.
    SimpleFileIOOutputProperties outputProps = createOutputComponentProperties();
    outputProps.getDatasetProperties().path.setValue(fileSpec);
    SimpleFileIOInputProperties inputProps = createInputComponentProperties();
    inputProps.setDatasetProperties(outputProps.getDatasetProperties());
    List<IndexedRecord> actual = runRoundTripPipelines(beam, rs.getAllData(), outputProps, inputProps);
    // Generate the set of expected records. By default, CSV turns all columns into String and loses the original
    // column name.
    List<IndexedRecord> expected = rewriteRecordsWithCsvSchema(rs.getAllData());
    assertThat(expected, containsInAnyOrder(actual.toArray()));
    // Verify that the file on the filesystem was correctly written.
    mini.assertReadFile(mini.getLocalFs(), fileSpec, rewriteRecordsAsCsvLines(expected, inputProps.getDatasetProperties().getRecordDelimiter(), inputProps.getDatasetProperties().getFieldDelimiter()));
}
Also used : SimpleFileIOOutputProperties(org.talend.components.simplefileio.output.SimpleFileIOOutputProperties) ConvertToIndexedRecord(org.talend.components.adapter.beam.transform.ConvertToIndexedRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) SimpleFileIOInputProperties(org.talend.components.simplefileio.input.SimpleFileIOInputProperties) RecordSet(org.talend.components.test.RecordSet) Test(org.junit.Test)

Example 7 with RecordSet

use of org.talend.components.test.RecordSet in project components by Talend.

the class S3SparkRuntimeTestIT method test_noEncryption.

public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    // Configure the components.
    S3OutputProperties outputProps = new S3OutputProperties("out");
    outputProps.setDatasetProperties(datasetProps);
    S3InputProperties inputProps = new S3InputProperties("in");
    inputProps.setDatasetProperties(datasetProps);
    List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
    List<IndexedRecord> expected = rs.getAllData();
    assertThat(actual, containsInAnyOrder(expected.toArray()));
    List<IndexedRecord> samples = getSample(datasetProps);
    assertThat(samples, containsInAnyOrder(expected.toArray()));
    Schema schema = getSchema(datasetProps);
    assertEquals(expected.get(0).getSchema(), schema);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) Schema(org.apache.avro.Schema) RecordSet(org.talend.components.test.RecordSet) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties)

Example 8 with RecordSet

use of org.talend.components.test.RecordSet in project components by Talend.

the class S3RoundTripRuntimeTestIT method test_noEncryption.

public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    // Configure the components.
    S3OutputProperties outputProps = new S3OutputProperties("out");
    outputProps.setDatasetProperties(datasetProps);
    S3InputProperties inputProps = new S3InputProperties("in");
    inputProps.setDatasetProperties(datasetProps);
    List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
    List<IndexedRecord> expected = rs.getAllData();
    assertThat(actual, containsInAnyOrder(expected.toArray()));
    List<IndexedRecord> samples = getSample(datasetProps);
    assertThat(samples, containsInAnyOrder(expected.toArray()));
    Schema schema = getSchema(datasetProps);
    assertEquals(expected.get(0).getSchema(), schema);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) Schema(org.apache.avro.Schema) RecordSet(org.talend.components.test.RecordSet) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties)

Example 9 with RecordSet

use of org.talend.components.test.RecordSet in project components by Talend.

the class SimpleFileIODatasetRuntimeTest method testGetSampleCsv_multipleSources.

@Test
public void testGetSampleCsv_multipleSources() throws Exception {
    RecordSet rs1 = getSimpleTestData(0);
    writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00000", rs1);
    RecordSet rs2 = getSimpleTestData(100);
    writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00001", rs2);
    RecordSet rs3 = getSimpleTestData(100);
    writeRandomCsvFile(mini.getFs(), "/user/test/input/part-00002", rs3);
    String fileSpec = mini.getFs().getUri().resolve("/user/test/input/").toString();
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.format.setValue(SimpleFileIOFormat.CSV);
    props.path.setValue(fileSpec);
    // Create the runtime.
    SimpleFileIODatasetRuntime runtime = new SimpleFileIODatasetRuntime();
    runtime.initialize(null, props);
    // Attempt to get a sample using the runtime methods.
    {
        final List<IndexedRecord> actual = new ArrayList<>();
        runtime.getSample(15, new Consumer<IndexedRecord>() {

            @Override
            public void accept(IndexedRecord ir) {
                actual.add(ir);
            }
        });
        // Check that the result was limited (15 out of 30 records)
        assertThat(actual, hasSize(15));
    }
    // Run it again to verify that the static state is not retained.
    {
        final List<IndexedRecord> actual = new ArrayList<>();
        runtime.getSample(15, new Consumer<IndexedRecord>() {

            @Override
            public void accept(IndexedRecord ir) {
                actual.add(ir);
            }
        });
        assertThat(actual, hasSize(15));
    }
}
Also used : Consumer(org.talend.daikon.java8.Consumer) IndexedRecord(org.apache.avro.generic.IndexedRecord) ArrayList(java.util.ArrayList) List(java.util.List) RecordSet(org.talend.components.test.RecordSet) SimpleFileIODatasetProperties(org.talend.components.simplefileio.SimpleFileIODatasetProperties) Test(org.junit.Test)

Example 10 with RecordSet

use of org.talend.components.test.RecordSet in project components by Talend.

the class SimpleFileIODatasetRuntimeTest method testGetSampleCsv.

@Test
public void testGetSampleCsv() throws Exception {
    RecordSet rs = getSimpleTestData(0);
    writeRandomCsvFile(mini.getFs(), "/user/test/input.csv", rs);
    String fileSpec = mini.getFs().getUri().resolve("/user/test/input.csv").toString();
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.format.setValue(SimpleFileIOFormat.CSV);
    props.path.setValue(fileSpec);
    // Create the runtime.
    SimpleFileIODatasetRuntime runtime = new SimpleFileIODatasetRuntime();
    runtime.initialize(null, props);
    // Attempt to get a sample using the runtime methods.
    final List<IndexedRecord> actual = new ArrayList<>();
    runtime.getSample(100, new Consumer<IndexedRecord>() {

        @Override
        public void accept(IndexedRecord ir) {
            actual.add(ir);
        }
    });
    // Check the expected values match.
    assertThat(actual, hasSize(10));
// assertThat(actual, (Matcher) equalTo(rs.getAllData()));
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) ArrayList(java.util.ArrayList) RecordSet(org.talend.components.test.RecordSet) SimpleFileIODatasetProperties(org.talend.components.simplefileio.SimpleFileIODatasetProperties) Test(org.junit.Test)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)11 RecordSet (org.talend.components.test.RecordSet)11 Test (org.junit.Test)9 ConvertToIndexedRecord (org.talend.components.adapter.beam.transform.ConvertToIndexedRecord)6 SimpleFileIOInputProperties (org.talend.components.simplefileio.input.SimpleFileIOInputProperties)6 SimpleFileIOOutputProperties (org.talend.components.simplefileio.output.SimpleFileIOOutputProperties)4 ArrayList (java.util.ArrayList)3 SimpleFileIODatasetProperties (org.talend.components.simplefileio.SimpleFileIODatasetProperties)3 Schema (org.apache.avro.Schema)2 Pipeline (org.apache.beam.sdk.Pipeline)2 S3InputProperties (org.talend.components.simplefileio.s3.input.S3InputProperties)2 S3OutputProperties (org.talend.components.simplefileio.s3.output.S3OutputProperties)2 List (java.util.List)1 Ignore (org.junit.Ignore)1 Consumer (org.talend.daikon.java8.Consumer)1