Search in sources :

Example 1 with S3InputProperties

use of org.talend.components.simplefileio.s3.input.S3InputProperties in project components by Talend.

the class S3DatasetRuntime method getSample.

@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
    // Create an input runtime based on the properties.
    S3InputRuntime inputRuntime = new S3InputRuntime();
    S3InputProperties inputProperties = new S3InputProperties(null);
    inputProperties.limit.setValue(limit);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(null, inputProperties);
    // Create a pipeline using the input component to get records.
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);
    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        // 
        p.apply(inputRuntime).apply(// 
        Sample.<IndexedRecord>any(limit)).apply(collector);
        p.run().waitUntilFinish();
    }
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Example 2 with S3InputProperties

use of org.talend.components.simplefileio.s3.input.S3InputProperties in project components by Talend.

the class S3SparkRuntimeTestIT method test_noEncryption.

public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    // Configure the components.
    S3OutputProperties outputProps = new S3OutputProperties("out");
    outputProps.setDatasetProperties(datasetProps);
    S3InputProperties inputProps = new S3InputProperties("in");
    inputProps.setDatasetProperties(datasetProps);
    List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
    List<IndexedRecord> expected = rs.getAllData();
    assertThat(actual, containsInAnyOrder(expected.toArray()));
    List<IndexedRecord> samples = getSample(datasetProps);
    assertThat(samples, containsInAnyOrder(expected.toArray()));
    Schema schema = getSchema(datasetProps);
    assertEquals(expected.get(0).getSchema(), schema);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) Schema(org.apache.avro.Schema) RecordSet(org.talend.components.test.RecordSet) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties)

Example 3 with S3InputProperties

use of org.talend.components.simplefileio.s3.input.S3InputProperties in project components by Talend.

the class S3RoundTripRuntimeTestIT method test_noEncryption.

public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    // Configure the components.
    S3OutputProperties outputProps = new S3OutputProperties("out");
    outputProps.setDatasetProperties(datasetProps);
    S3InputProperties inputProps = new S3InputProperties("in");
    inputProps.setDatasetProperties(datasetProps);
    List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
    List<IndexedRecord> expected = rs.getAllData();
    assertThat(actual, containsInAnyOrder(expected.toArray()));
    List<IndexedRecord> samples = getSample(datasetProps);
    assertThat(samples, containsInAnyOrder(expected.toArray()));
    Schema schema = getSchema(datasetProps);
    assertEquals(expected.get(0).getSchema(), schema);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) Schema(org.apache.avro.Schema) RecordSet(org.talend.components.test.RecordSet) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)3 S3InputProperties (org.talend.components.simplefileio.s3.input.S3InputProperties)3 Schema (org.apache.avro.Schema)2 S3OutputProperties (org.talend.components.simplefileio.s3.output.S3OutputProperties)2 RecordSet (org.talend.components.test.RecordSet)2 DirectOptions (org.apache.beam.runners.direct.DirectOptions)1 Pipeline (org.apache.beam.sdk.Pipeline)1