Search in sources :

Example 1 with CSVInput

use of software.amazon.awssdk.services.s3.model.CSVInput in project presto by prestodb.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 2 with CSVInput

use of software.amazon.awssdk.services.s3.model.CSVInput in project urban-eureka by errir503.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(com.facebook.presto.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 3 with CSVInput

use of software.amazon.awssdk.services.s3.model.CSVInput in project pxf by greenplum-db.

the class S3SelectAccessor method getCSVInput.

/**
 * Returns a {@link com.amazonaws.services.s3.model.CSVInput}
 * object with parsed values from the request context.
 *
 * @param context the request context
 * @return a {@link CSVInput}
 */
CSVInput getCSVInput(RequestContext context) {
    CSVInput csvInput = new CSVInput();
    GreenplumCSV csv = context.getGreenplumCSV();
    String fileHeaderInfo = context.getOption(FILE_HEADER_INFO);
    if (fileHeaderInfo != null) {
        LOG.debug("With CSV FileHeaderInfo '{}'", fileHeaderInfo);
        csvInput.setFileHeaderInfo(fileHeaderInfo);
    }
    if (csv.getDelimiter() != null) {
        LOG.debug("With CSV field delimiter '{}'", csv.getDelimiter());
        csvInput.setFieldDelimiter(csv.getDelimiter());
    }
    if (csv.getNewline() != null) {
        LOG.debug("With CSV NEWLINE '{}'", csv.getNewline());
        csvInput.setRecordDelimiter(csv.getNewline());
    }
    if (csv.getEscape() != null) {
        LOG.debug("With CSV quote escape character '{}'", csv.getEscape());
        csvInput.setQuoteEscapeCharacter(csv.getEscape());
    }
    LOG.debug("With CSV quote character '{}'", csv.getQuote());
    csvInput.setQuoteCharacter(csv.getQuote());
    return csvInput;
}
Also used : GreenplumCSV(org.greenplum.pxf.api.model.GreenplumCSV) CSVInput(com.amazonaws.services.s3.model.CSVInput)

Example 4 with CSVInput

use of software.amazon.awssdk.services.s3.model.CSVInput in project hetu-core by openlookeng.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) PrestoException(io.prestosql.spi.PrestoException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Example 5 with CSVInput

use of software.amazon.awssdk.services.s3.model.CSVInput in project trino by trinodb.

the class S3SelectCsvRecordReader method buildSelectObjectRequest.

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(TrinoS3FileSystem.extractBucketName(uri));
    selectObjectRequest.setKey(TrinoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);
    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();
    selectObjectInputSerialization.setCompressionType(getCompressionType(path));
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
    return selectObjectRequest;
}
Also used : SelectObjectContentRequest(com.amazonaws.services.s3.model.SelectObjectContentRequest) InputSerialization(com.amazonaws.services.s3.model.InputSerialization) CSVInput(com.amazonaws.services.s3.model.CSVInput) URI(java.net.URI) OutputSerialization(com.amazonaws.services.s3.model.OutputSerialization) CSVOutput(com.amazonaws.services.s3.model.CSVOutput)

Aggregations

CSVInput (com.amazonaws.services.s3.model.CSVInput)6 CSVOutput (com.amazonaws.services.s3.model.CSVOutput)5 InputSerialization (com.amazonaws.services.s3.model.InputSerialization)5 OutputSerialization (com.amazonaws.services.s3.model.OutputSerialization)5 SelectObjectContentRequest (com.amazonaws.services.s3.model.SelectObjectContentRequest)5 URI (java.net.URI)5 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)4 PrestoException (com.facebook.presto.spi.PrestoException)2 PrestoException (io.prestosql.spi.PrestoException)2 GreenplumCSV (org.greenplum.pxf.api.model.GreenplumCSV)1