Search in sources :

Example 1 with OutputSerialization

use of software.amazon.awssdk.services.s3.model.OutputSerialization in project presto by prestodb.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 2 with OutputSerialization

use of software.amazon.awssdk.services.s3.model.OutputSerialization in project urban-eureka by errir503.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 3 with OutputSerialization

use of software.amazon.awssdk.services.s3.model.OutputSerialization in project aws-sdk-java-v2 by aws.

the class SelectObjectContentTest method runSimpleQuery.

private static CompletableFuture<Void> runSimpleQuery(S3AsyncClient s3, SelectObjectContentResponseHandler handler) {
    InputSerialization inputSerialization = InputSerialization.builder().csv(CSVInput.builder().build()).compressionType(CompressionType.NONE).build();
    OutputSerialization outputSerialization = OutputSerialization.builder().csv(CSVOutput.builder().build()).build();
    SelectObjectContentRequest select = SelectObjectContentRequest.builder().bucket("test-bucket").key("test-key").expression("test-query").expressionType(ExpressionType.SQL).inputSerialization(inputSerialization).outputSerialization(outputSerialization).build();
    return s3.selectObjectContent(select, handler);
}
Also used : SelectObjectContentRequest(software.amazon.awssdk.services.s3.model.SelectObjectContentRequest) InputSerialization(software.amazon.awssdk.services.s3.model.InputSerialization) OutputSerialization(software.amazon.awssdk.services.s3.model.OutputSerialization)

Example 4 with OutputSerialization

use of software.amazon.awssdk.services.s3.model.OutputSerialization in project aws-sdk-java-v2 by aws.

the class SelectObjectContentIntegrationTest method executeSqlQueryWithHandler.

private static CompletableFuture<Void> executeSqlQueryWithHandler(String query, SelectObjectContentResponseHandler handler) {
    InputSerialization inputSerialization = InputSerialization.builder().csv(CSVInput.builder().build()).compressionType(CompressionType.NONE).build();
    OutputSerialization outputSerialization = OutputSerialization.builder().csv(CSVOutput.builder().build()).build();
    SelectObjectContentRequest select = SelectObjectContentRequest.builder().bucket(BUCKET_NAME).key(KEY).expression(query).expressionType(ExpressionType.SQL).inputSerialization(inputSerialization).outputSerialization(outputSerialization).build();
    return s3Async.selectObjectContent(select, handler);
}
Also used : SelectObjectContentRequest(software.amazon.awssdk.services.s3.model.SelectObjectContentRequest) InputSerialization(software.amazon.awssdk.services.s3.model.InputSerialization) OutputSerialization(software.amazon.awssdk.services.s3.model.OutputSerialization)

Example 5 with OutputSerialization

use of software.amazon.awssdk.services.s3.model.OutputSerialization in project pxf by greenplum-db.

the class S3SelectAccessor method generateBaseCSVRequest.

/**
 * Generates the {@link SelectObjectContentRequest} object from
 * the request context.
 *
 * @param context the request context
 * @return a {@link SelectObjectContentRequest}
 */
SelectObjectContentRequest generateBaseCSVRequest(RequestContext context) {
    InputSerialization inputSerialization = getInputSerialization(context);
    String fileHeaderInfo = context.getOption(FILE_HEADER_INFO);
    boolean usePositionToIdentifyColumn = inputSerialization.getCsv() != null && (StringUtils.isBlank(fileHeaderInfo) || !StringUtils.equalsIgnoreCase(FILE_HEADER_INFO_USE, fileHeaderInfo));
    String query = null;
    try {
        S3SelectQueryBuilder queryBuilder = new S3SelectQueryBuilder(context, usePositionToIdentifyColumn);
        query = queryBuilder.buildSelectQuery();
    } catch (SQLException e) {
        LOG.error("Unable to build select query for filter string {}", context.getFilterString());
    }
    LOG.trace("Select query: {}", query);
    SelectObjectContentRequest request = new SelectObjectContentRequest();
    request.setBucketName(name.getHost());
    request.setKey(StringUtils.removeStart(name.getPath(), "/"));
    request.setExpression(query);
    request.setExpressionType(ExpressionType.SQL);
    LOG.debug("With bucket name '{}'", request.getBucketName());
    LOG.debug("With key '{}'", request.getKey());
    LOG.debug("With expression query '{}'", query);
    request.setInputSerialization(inputSerialization);
    OutputSerialization outputSerialization = getOutputSerialization(context);
    request.setOutputSerialization(outputSerialization);
    return request;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) SQLException(java.sql.SQLException) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization)

Aggregations

OutputSerialization (com.amazonaws.services.s3.model.OutputSerialization)6 CSVOutput (com.amazonaws.services.s3.model.CSVOutput)5 InputSerialization (com.amazonaws.services.s3.model.InputSerialization)5 SelectObjectContentRequest (com.amazonaws.services.s3.model.SelectObjectContentRequest)5 CSVInput (com.amazonaws.services.s3.model.CSVInput)4 URI (java.net.URI)4 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)3 PrestoException (com.facebook.presto.spi.PrestoException)2 InputSerialization (software.amazon.awssdk.services.s3.model.InputSerialization)2 OutputSerialization (software.amazon.awssdk.services.s3.model.OutputSerialization)2 SelectObjectContentRequest (software.amazon.awssdk.services.s3.model.SelectObjectContentRequest)2 PrestoException (io.prestosql.spi.PrestoException)1 SQLException (java.sql.SQLException)1 GreenplumCSV (org.greenplum.pxf.api.model.GreenplumCSV)1