Search in sources :

Example 1 with SelectObjectContentRequest

use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project presto by prestodb.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 2 with SelectObjectContentRequest

use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project urban-eureka by errir503.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 3 with SelectObjectContentRequest

use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project aws-sdk-java-v2 by aws.

the class SelectObjectContentTest method runSimpleQuery.

private static CompletableFuture<Void> runSimpleQuery(S3AsyncClient s3, SelectObjectContentResponseHandler handler) {
    InputSerialization inputSerialization = InputSerialization.builder().csv(CSVInput.builder().build()).compressionType(CompressionType.NONE).build();
    OutputSerialization outputSerialization = OutputSerialization.builder().csv(CSVOutput.builder().build()).build();
    SelectObjectContentRequest select = SelectObjectContentRequest.builder().bucket("test-bucket").key("test-key").expression("test-query").expressionType(ExpressionType.SQL).inputSerialization(inputSerialization).outputSerialization(outputSerialization).build();
    return s3.selectObjectContent(select, handler);
}
Also used : SelectObjectContentRequest(software.amazon.awssdk.services.s3.model.SelectObjectContentRequest) InputSerialization(software.amazon.awssdk.services.s3.model.InputSerialization) OutputSerialization(software.amazon.awssdk.services.s3.model.OutputSerialization)

Example 4 with SelectObjectContentRequest

use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project pxf by greenplum-db.

the class S3SelectAccessorTest method testCorrectlyParsesDataSourceWithNoKey.

@Test
public void testCorrectlyParsesDataSourceWithNoKey() {
    RequestContext context = getDefaultRequestContext();
    context.setConfig("default");
    context.setUser("test-user");
    context.setDataSource("s3a://my-bucket");
    context.setConfiguration(new Configuration());
    S3SelectAccessor accessor = new S3SelectAccessor();
    accessor.setRequestContext(context);
    accessor.afterPropertiesSet();
    SelectObjectContentRequest request = accessor.generateBaseCSVRequest(context);
    assertEquals("my-bucket", request.getBucketName());
    assertEquals("", request.getKey());
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) Configuration(org.apache.hadoop.conf.Configuration) RequestContext(org.greenplum.pxf.api.model.RequestContext) Test(org.junit.jupiter.api.Test)

Example 5 with SelectObjectContentRequest

use of software.amazon.awssdk.services.s3.model.SelectObjectContentRequest in project pxf by greenplum-db.

the class S3SelectAccessor method openForRead.

@Override
public boolean openForRead() {
    isResultComplete = new AtomicBoolean(false);
    SelectObjectContentRequest request = generateBaseCSVRequest(context);
    result = s3Client.selectObjectContent(request);
    resultInputStream = result.getPayload().getRecordsInputStream(new SelectObjectContentEventVisitor() {

        @Override
        public void visit(SelectObjectContentEvent.StatsEvent event) {
            LOG.debug("Received Stats, Bytes Scanned: {}. Bytes Processed: {}", event.getDetails().getBytesScanned(), event.getDetails().getBytesProcessed());
        }

        /*
                     * An End Event informs that the request has finished successfully.
                     */
        @Override
        public void visit(SelectObjectContentEvent.EndEvent event) {
            isResultComplete.set(true);
            LOG.debug("Received End Event. Result is complete.");
        }
    });
    reader = new BufferedReader(new InputStreamReader(resultInputStream));
    return resultInputStream != null;
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) SelectObjectContentEventVisitor(com.amazonaws.services.s3.model.SelectObjectContentEventVisitor)

Aggregations

SelectObjectContentRequest (com.amazonaws.services.s3.model.SelectObjectContentRequest)9 InputSerialization (com.amazonaws.services.s3.model.InputSerialization)6 OutputSerialization (com.amazonaws.services.s3.model.OutputSerialization)6 CSVInput (com.amazonaws.services.s3.model.CSVInput)5 CSVOutput (com.amazonaws.services.s3.model.CSVOutput)5 URI (java.net.URI)5 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)4 PrestoException (com.facebook.presto.spi.PrestoException)2 PrestoException (io.prestosql.spi.PrestoException)2 Configuration (org.apache.hadoop.conf.Configuration)2 RequestContext (org.greenplum.pxf.api.model.RequestContext)2 Test (org.junit.jupiter.api.Test)2 InputSerialization (software.amazon.awssdk.services.s3.model.InputSerialization)2 OutputSerialization (software.amazon.awssdk.services.s3.model.OutputSerialization)2 SelectObjectContentRequest (software.amazon.awssdk.services.s3.model.SelectObjectContentRequest)2 SelectObjectContentEventVisitor (com.amazonaws.services.s3.model.SelectObjectContentEventVisitor)1 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1