use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project presto by prestodb.
the class S3SelectCsvRecordReader method buildSelectObjectRequest.
@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
URI uri = path.toUri();
selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
selectObjectRequest.setExpression(query);
selectObjectRequest.setExpressionType(ExpressionType.SQL);
String fieldDelimiter = getFieldDelimiter(schema);
String quoteChar = schema.getProperty(QUOTE_CHAR, null);
String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
CSVInput selectObjectCSVInputSerialization = new CSVInput();
selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
InputSerialization selectObjectInputSerialization = new InputSerialization();
CompressionCodec codec = compressionCodecFactory.getCodec(path);
if (codec instanceof GzipCodec) {
selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
} else if (codec instanceof BZip2Codec) {
selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
} else if (codec != null) {
throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
}
selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
return selectObjectRequest;
}
use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project urban-eureka by errir503.
the class S3SelectCsvRecordReader method buildSelectObjectRequest.
@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
URI uri = path.toUri();
selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
selectObjectRequest.setExpression(query);
selectObjectRequest.setExpressionType(ExpressionType.SQL);
String fieldDelimiter = getFieldDelimiter(schema);
String quoteChar = schema.getProperty(QUOTE_CHAR, null);
String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
CSVInput selectObjectCSVInputSerialization = new CSVInput();
selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
InputSerialization selectObjectInputSerialization = new InputSerialization();
CompressionCodec codec = compressionCodecFactory.getCodec(path);
if (codec instanceof GzipCodec) {
selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
} else if (codec instanceof BZip2Codec) {
selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
} else if (codec != null) {
throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
}
selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
return selectObjectRequest;
}
use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project aws-sdk-java-v2 by aws.
the class SelectObjectContentTest method runSimpleQuery.
private static CompletableFuture<Void> runSimpleQuery(S3AsyncClient s3, SelectObjectContentResponseHandler handler) {
InputSerialization inputSerialization = InputSerialization.builder().csv(CSVInput.builder().build()).compressionType(CompressionType.NONE).build();
OutputSerialization outputSerialization = OutputSerialization.builder().csv(CSVOutput.builder().build()).build();
SelectObjectContentRequest select = SelectObjectContentRequest.builder().bucket("test-bucket").key("test-key").expression("test-query").expressionType(ExpressionType.SQL).inputSerialization(inputSerialization).outputSerialization(outputSerialization).build();
return s3.selectObjectContent(select, handler);
}
use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project pxf by greenplum-db.
the class S3SelectAccessorTest method testCorrectlyParsesDataSourceWithNoKey.
@Test
public void testCorrectlyParsesDataSourceWithNoKey() {
RequestContext context = getDefaultRequestContext();
context.setConfig("default");
context.setUser("test-user");
context.setDataSource("s3a://my-bucket");
context.setConfiguration(new Configuration());
S3SelectAccessor accessor = new S3SelectAccessor();
accessor.setRequestContext(context);
accessor.afterPropertiesSet();
SelectObjectContentRequest request = accessor.generateBaseCSVRequest(context);
assertEquals("my-bucket", request.getBucketName());
assertEquals("", request.getKey());
}
use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project pxf by greenplum-db.
the class S3SelectAccessor method openForRead.
@Override
public boolean openForRead() {
isResultComplete = new AtomicBoolean(false);
SelectObjectContentRequest request = generateBaseCSVRequest(context);
result = s3Client.selectObjectContent(request);
resultInputStream = result.getPayload().getRecordsInputStream(new SelectObjectContentEventVisitor() {
@Override
public void visit(SelectObjectContentEvent.StatsEvent event) {
LOG.debug("Received Stats, Bytes Scanned: {}. Bytes Processed: {}", event.getDetails().getBytesScanned(), event.getDetails().getBytesProcessed());
}
/*
* An End Event informs that the request has finished successfully.
*/
@Override
public void visit(SelectObjectContentEvent.EndEvent event) {
isResultComplete.set(true);
LOG.debug("Received End Event. Result is complete.");
}
});
reader = new BufferedReader(new InputStreamReader(resultInputStream));
return resultInputStream != null;
}
Aggregations