use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.
the class S3DatasetRuntimeTestIT method listBuckets.
@Test
@Ignore("It's slow (10 or more mins), our account doesn't allow to create this amount of buckets")
public void listBuckets() {
String uuid = UUID.randomUUID().toString().substring(0, 8);
String bucketFormat = "tcomp-s3-dataset-test-%s-" + uuid;
S3DatasetProperties s3DatasetProperties = s3.createS3DatasetProperties();
runtime.initialize(null, s3DatasetProperties);
AmazonS3 client = S3Connection.createClient(s3.createS3DatastoreProperties());
for (S3Region s3Region : getTestableS3Regions()) {
client.setEndpoint(s3Region.toEndpoint());
if (s3Region.equals(S3Region.US_EAST_1)) {
client.createBucket(String.format(bucketFormat, s3Region.getValue()));
} else {
client.createBucket(String.format(bucketFormat, s3Region.getValue()), s3Region.getValue());
}
s3DatasetProperties.region.setValue(s3Region);
Set<String> bucketNames = runtime.listBuckets();
assertTrue(bucketNames.size() > 0);
assertThat(bucketNames, hasItems(String.format(bucketFormat, s3Region.getValue())));
client.setEndpoint(s3Region.toEndpoint());
client.deleteBucket(String.format(bucketFormat, s3Region.getValue()));
}
}
use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.
the class S3OutputRuntimeTestIT method testCsv_merge.
@Test
public void testCsv_merge() throws IOException {
S3DatasetProperties datasetProps = s3.createS3DatasetProperties();
datasetProps.format.setValue(SimpleFileIOFormat.CSV);
datasetProps.recordDelimiter.setValue(SimpleFileIODatasetProperties.RecordDelimiterType.LF);
datasetProps.fieldDelimiter.setValue(SimpleFileIODatasetProperties.FieldDelimiterType.SEMICOLON);
S3OutputProperties outputProperties = new S3OutputProperties("out");
outputProperties.init();
outputProperties.setDatasetProperties(datasetProps);
outputProperties.mergeOutput.setValue(true);
// Create the runtime.
S3OutputRuntime runtime = new S3OutputRuntime();
runtime.initialize(null, outputProperties);
// Use the runtime in a Spark pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
FileSystem s3FileSystem = S3Connection.createFileSystem(datasetProps);
MiniDfsResource.assertReadFile(s3FileSystem, s3.getS3APath(datasetProps), "1;one", "2;two");
MiniDfsResource.assertFileNumber(s3FileSystem, s3.getS3APath(datasetProps), 1);
}
use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.
the class S3OutputRuntimeTestIT method testAvro_merge.
@Test
public void testAvro_merge() throws IOException {
S3DatasetProperties datasetProps = s3.createS3DatasetProperties();
datasetProps.format.setValue(SimpleFileIOFormat.AVRO);
S3OutputProperties outputProperties = new S3OutputProperties("out");
outputProperties.init();
outputProperties.setDatasetProperties(datasetProps);
outputProperties.mergeOutput.setValue(true);
// Create the runtime.
S3OutputRuntime runtime = new S3OutputRuntime();
runtime.initialize(null, outputProperties);
// Use the runtime in a Spark pipeline to test.
final Pipeline p = spark.createPipeline();
PCollection<IndexedRecord> input = //
p.apply(//
Create.of(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" })));
input.apply(runtime);
// And run the test.
p.run().waitUntilFinish();
FileSystem s3FileSystem = S3Connection.createFileSystem(datasetProps);
MiniDfsResource.assertReadAvroFile(s3FileSystem, s3.getS3APath(datasetProps), new HashSet<IndexedRecord>(//
Arrays.asList(//
ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))), false);
MiniDfsResource.assertFileNumber(s3FileSystem, s3.getS3APath(datasetProps), 1);
}
use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.
the class S3SparkRuntimeTestIT method testAvro_sseKmsEncryption.
/**
* Basic Avro test with sseKmsEncryption.
*/
@Test
public void testAvro_sseKmsEncryption() throws IOException {
S3DatasetProperties datasetProps = s3.createS3DatasetProperties(true, false);
datasetProps.format.setValue(SimpleFileIOFormat.AVRO);
test_noEncryption(datasetProps);
// Get some object metadata from the results.
ObjectMetadata md = s3.getObjectMetadata(datasetProps);
assertThat(md.getSSEAlgorithm(), is("aws:kms"));
assertThat(md.getSSEAwsKmsKeyId(), is(datasetProps.kmsForDataAtRest.getValue()));
}
use of org.talend.components.simplefileio.s3.S3DatasetProperties in project components by Talend.
the class S3SparkRuntimeTestIT method test_noEncryption.
public void test_noEncryption(S3DatasetProperties datasetProps) throws IOException {
// The file that we will be creating.
RecordSet rs = getSimpleTestData(0);
// Configure the components.
S3OutputProperties outputProps = new S3OutputProperties("out");
outputProps.setDatasetProperties(datasetProps);
S3InputProperties inputProps = new S3InputProperties("in");
inputProps.setDatasetProperties(datasetProps);
List<IndexedRecord> actual = runRoundTripPipelines(rs.getAllData(), outputProps, inputProps);
List<IndexedRecord> expected = rs.getAllData();
assertThat(actual, containsInAnyOrder(expected.toArray()));
List<IndexedRecord> samples = getSample(datasetProps);
assertThat(samples, containsInAnyOrder(expected.toArray()));
Schema schema = getSchema(datasetProps);
assertEquals(expected.get(0).getSchema(), schema);
}
Aggregations