Search in sources :

Example 11 with S3DatasetProperties

use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.

the class S3DatasetRuntimeTestIT method listBuckets.

@Test
@Ignore("It's slow (10 or more mins), our account doesn't allow to create this amount of buckets")
public void listBuckets() {
    String uuid = UUID.randomUUID().toString().substring(0, 8);
    String bucketFormat = "tcomp-s3-dataset-test-%s-" + uuid;
    S3DatasetProperties s3DatasetProperties = s3.createS3DatasetProperties();
    runtime.initialize(null, s3DatasetProperties);
    AmazonS3 client = S3Connection.createClient(s3.createS3DatastoreProperties());
    for (S3Region s3Region : getTestableS3Regions()) {
        client.setEndpoint(s3Region.toEndpoint());
        if (s3Region.equals(S3Region.US_EAST_1)) {
            client.createBucket(String.format(bucketFormat, s3Region.getValue()));
        } else {
            client.createBucket(String.format(bucketFormat, s3Region.getValue()), s3Region.getValue());
        }
        s3DatasetProperties.region.setValue(s3Region);
        Set<String> bucketNames = runtime.listBuckets();
        assertTrue(bucketNames.size() > 0);
        assertThat(bucketNames, hasItems(String.format(bucketFormat, s3Region.getValue())));
        client.setEndpoint(s3Region.toEndpoint());
        client.deleteBucket(String.format(bucketFormat, s3Region.getValue()));
    }
}
Also used : AmazonS3(com.talend.shaded.com.amazonaws.services.s3.AmazonS3) S3DatasetProperties(org.talend.components.simplefileio.s3.S3DatasetProperties) S3Region(org.talend.components.simplefileio.s3.S3Region) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 12 with S3DatasetProperties

use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.

the class S3OutputRuntimeTestIT method testCsv_merge.

@Test
public void testCsv_merge() throws IOException {
    S3DatasetProperties datasetProps = s3.createS3DatasetProperties();
    datasetProps.format.setValue(SimpleFileIOFormat.CSV);
    datasetProps.recordDelimiter.setValue(SimpleFileIODatasetProperties.RecordDelimiterType.LF);
    datasetProps.fieldDelimiter.setValue(SimpleFileIODatasetProperties.FieldDelimiterType.SEMICOLON);
    S3OutputProperties outputProperties = new S3OutputProperties("out");
    outputProperties.init();
    outputProperties.setDatasetProperties(datasetProps);
    outputProperties.mergeOutput.setValue(true);
    // Create the runtime.
    S3OutputRuntime runtime = new S3OutputRuntime();
    runtime.initialize(null, outputProperties);
    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = // 
    p.apply(// 
    Create.of(// 
    ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), // 
    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
    input.apply(runtime);
    // And run the test.
    p.run().waitUntilFinish();
    FileSystem s3FileSystem = S3Connection.createFileSystem(datasetProps);
    MiniDfsResource.assertReadFile(s3FileSystem, s3.getS3APath(datasetProps), "1;one", "2;two");
    MiniDfsResource.assertFileNumber(s3FileSystem, s3.getS3APath(datasetProps), 1);
}
Also used : S3DatasetProperties(org.talend.components.simplefileio.s3.S3DatasetProperties) ConvertToIndexedRecord(org.talend.components.adapter.beam.transform.ConvertToIndexedRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) FileSystem(org.apache.hadoop.fs.FileSystem) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 13 with S3DatasetProperties

use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.

the class S3OutputRuntimeTestIT method testAvro_merge.

@Test
public void testAvro_merge() throws IOException {
    S3DatasetProperties datasetProps = s3.createS3DatasetProperties();
    datasetProps.format.setValue(SimpleFileIOFormat.AVRO);
    S3OutputProperties outputProperties = new S3OutputProperties("out");
    outputProperties.init();
    outputProperties.setDatasetProperties(datasetProps);
    outputProperties.mergeOutput.setValue(true);
    // Create the runtime.
    S3OutputRuntime runtime = new S3OutputRuntime();
    runtime.initialize(null, outputProperties);
    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = // 
    p.apply(// 
    Create.of(// 
    ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), // 
    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
    input.apply(runtime);
    // And run the test.
    p.run().waitUntilFinish();
    FileSystem s3FileSystem = S3Connection.createFileSystem(datasetProps);
    MiniDfsResource.assertReadAvroFile(s3FileSystem, s3.getS3APath(datasetProps), new HashSet<IndexedRecord>(// 
    Arrays.asList(// 
    ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))), false);
    MiniDfsResource.assertFileNumber(s3FileSystem, s3.getS3APath(datasetProps), 1);
}
Also used : S3DatasetProperties(org.talend.components.simplefileio.s3.S3DatasetProperties) ConvertToIndexedRecord(org.talend.components.adapter.beam.transform.ConvertToIndexedRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) FileSystem(org.apache.hadoop.fs.FileSystem) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 14 with S3DatasetProperties

use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.

the class S3SparkRuntimeTestIT method testAvro_sseKmsEncryption.

/**
 * Basic Avro test with sseKmsEncryption.
 */
@Test
public void testAvro_sseKmsEncryption() throws IOException {
    S3DatasetProperties datasetProps = s3.createS3DatasetProperties(true, false);
    datasetProps.format.setValue(SimpleFileIOFormat.AVRO);
    test_noEncryption(datasetProps);
    // Get some object metadata from the results.
    ObjectMetadata md = s3.getObjectMetadata(datasetProps);
    assertThat(md.getSSEAlgorithm(), is("aws:kms"));
    assertThat(md.getSSEAwsKmsKeyId(), is(datasetProps.kmsForDataAtRest.getValue()));
}
Also used : S3DatasetProperties(org.talend.components.simplefileio.s3.S3DatasetProperties) ObjectMetadata(com.talend.shaded.com.amazonaws.services.s3.model.ObjectMetadata) Test(org.junit.Test)

Example 15 with S3DatasetProperties

use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.

the class S3SparkRuntimeTestIT method test_noEncryption.

public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
    // The file that we will be creating.
    RecordSet rs = getSimpleTestData(0);
    // Configure the components.
    S3OutputProperties outputProps = new S3OutputProperties("out");
    outputProps.setDatasetProperties(datasetProps);
    S3InputProperties inputProps = new S3InputProperties("in");
    inputProps.setDatasetProperties(datasetProps);
    List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
    List<IndexedRecord> expected = rs.getAllData();
    assertThat(actual, containsInAnyOrder(expected.toArray()));
    List<IndexedRecord> samples = getSample(datasetProps);
    assertThat(samples, containsInAnyOrder(expected.toArray()));
    Schema schema = getSchema(datasetProps);
    assertEquals(expected.get(0).getSchema(), schema);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) S3OutputProperties(org.talend.components.simplefileio.s3.output.S3OutputProperties) Schema(org.apache.avro.Schema) RecordSet(org.talend.components.test.RecordSet) S3InputProperties(org.talend.components.simplefileio.s3.input.S3InputProperties)

Aggregations

S3DatasetProperties (org.talend.components.simplefileio.s3.S3DatasetProperties)22 Test (org.junit.Test)16 Ignore (org.junit.Ignore)7 S3OutputProperties (org.talend.components.simplefileio.s3.output.S3OutputProperties)6 IndexedRecord (org.apache.avro.generic.IndexedRecord)5 ObjectMetadata (com.talend.shaded.com.amazonaws.services.s3.model.ObjectMetadata)4 Pipeline (org.apache.beam.sdk.Pipeline)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 ConvertToIndexedRecord (org.talend.components.adapter.beam.transform.ConvertToIndexedRecord)3 S3DatastoreProperties (org.talend.components.simplefileio.s3.S3DatastoreProperties)3 Schema (org.apache.avro.Schema)2 S3InputProperties (org.talend.components.simplefileio.s3.input.S3InputProperties)2 RecordSet (org.talend.components.test.RecordSet)2 AWSCredentialsProvider (com.amazonaws.auth.AWSCredentialsProvider)1 StaticCredentialsProvider (com.amazonaws.internal.StaticCredentialsProvider)1 Region (com.amazonaws.regions.Region)1 AmazonS3 (com.amazonaws.services.s3.AmazonS3)1 AmazonS3Client (com.amazonaws.services.s3.AmazonS3Client)1 AmazonS3EncryptionClient (com.amazonaws.services.s3.AmazonS3EncryptionClient)1 CryptoConfiguration (com.amazonaws.services.s3.model.CryptoConfiguration)1