Search in sources :

Example 1 with NullableDateMilliHolder

use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandler method buildRowWriter.

private GeneratedRowWriter buildRowWriter(ReadRecordsRequest request) {
    GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
    int fieldNum = 0;
    for (Field nextField : request.getSchema().getFields()) {
        int curFieldNum = fieldNum++;
        switch(Types.getMinorTypeForArrowType(nextField.getType())) {
            case VARCHAR:
                builder.withExtractor(nextField.getName(), (VarCharExtractor) (Object context, NullableVarCharHolder value) -> {
                    value.isSet = 1;
                    value.value = ((Row) context).getData().get(curFieldNum).getScalarValue();
                });
                break;
            case FLOAT8:
                builder.withExtractor(nextField.getName(), (Float8Extractor) (Object context, NullableFloat8Holder value) -> {
                    value.isSet = 1;
                    value.value = Double.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
                });
                break;
            case BIT:
                builder.withExtractor(nextField.getName(), (BitExtractor) (Object context, NullableBitHolder value) -> {
                    value.isSet = 1;
                    value.value = Boolean.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue()) == false ? 0 : 1;
                });
                break;
            case BIGINT:
                builder.withExtractor(nextField.getName(), (BigIntExtractor) (Object context, NullableBigIntHolder value) -> {
                    value.isSet = 1;
                    value.value = Long.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
                });
                break;
            case DATEMILLI:
                builder.withExtractor(nextField.getName(), (DateMilliExtractor) (Object context, NullableDateMilliHolder value) -> {
                    value.isSet = 1;
                    value.value = TIMESTAMP_FORMATTER.parse(((Row) context).getData().get(curFieldNum).getScalarValue()).getTime();
                });
                break;
            case LIST:
                // TODO: This presently only supports TimeSeries results but it is possible that customers may
                // generate LIST type results for other reasons when using VIEWs. For now this seems like an OK
                // compromise since it enables an important capability of TimeStream even if it doesn't enable arbitrary
                // complex types.
                buildTimeSeriesExtractor(builder, nextField, curFieldNum);
                break;
            default:
                throw new RuntimeException("Unsupported field type[" + nextField.getType() + "] for field[" + nextField.getName() + "]");
        }
    }
    return builder.build();
}
Also used : NullableBitHolder(org.apache.arrow.vector.holders.NullableBitHolder) Field(org.apache.arrow.vector.types.pojo.Field) NullableVarCharHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarCharHolder) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) NullableDateMilliHolder(org.apache.arrow.vector.holders.NullableDateMilliHolder) NullableFloat8Holder(org.apache.arrow.vector.holders.NullableFloat8Holder) Row(com.amazonaws.services.timestreamquery.model.Row) TimeSeriesDataPoint(com.amazonaws.services.timestreamquery.model.TimeSeriesDataPoint) NullableBigIntHolder(org.apache.arrow.vector.holders.NullableBigIntHolder)

Example 2 with NullableDateMilliHolder

use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.

the class ElasticsearchTypeUtilsTest method testField.

/**
 * Uses the correct field extractor to extract values from a document.
 * @param mapping is the metadata definitions of the document being processed.
 * @param document contains the values to be extracted.
 * @return a map of the field names and their associated values extracted from the document.
 * @throws Exception
 */
private Map<String, Object> testField(Schema mapping, Map<String, Object> document) throws Exception {
    Map<String, Object> results = new HashMap<>();
    for (Field field : mapping.getFields()) {
        Extractor extractor = typeUtils.makeExtractor(field);
        if (extractor instanceof VarCharExtractor) {
            NullableVarCharHolder holder = new NullableVarCharHolder();
            ((VarCharExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof BigIntExtractor) {
            NullableBigIntHolder holder = new NullableBigIntHolder();
            ((BigIntExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof IntExtractor) {
            NullableIntHolder holder = new NullableIntHolder();
            ((IntExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof SmallIntExtractor) {
            NullableSmallIntHolder holder = new NullableSmallIntHolder();
            ((SmallIntExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof TinyIntExtractor) {
            NullableTinyIntHolder holder = new NullableTinyIntHolder();
            ((TinyIntExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof Float8Extractor) {
            NullableFloat8Holder holder = new NullableFloat8Holder();
            ((Float8Extractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof Float4Extractor) {
            NullableFloat4Holder holder = new NullableFloat4Holder();
            ((Float4Extractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof DateMilliExtractor) {
            NullableDateMilliHolder holder = new NullableDateMilliHolder();
            ((DateMilliExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        } else if (extractor instanceof BitExtractor) {
            NullableBitHolder holder = new NullableBitHolder();
            ((BitExtractor) extractor).extract(document, holder);
            assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
            results.put(field.getName(), holder.value);
        }
    }
    return results;
}
Also used : BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) NullableDateMilliHolder(org.apache.arrow.vector.holders.NullableDateMilliHolder) HashMap(java.util.HashMap) NullableSmallIntHolder(org.apache.arrow.vector.holders.NullableSmallIntHolder) NullableFloat8Holder(org.apache.arrow.vector.holders.NullableFloat8Holder) NullableTinyIntHolder(org.apache.arrow.vector.holders.NullableTinyIntHolder) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) NullableBigIntHolder(org.apache.arrow.vector.holders.NullableBigIntHolder) NullableBitHolder(org.apache.arrow.vector.holders.NullableBitHolder) Field(org.apache.arrow.vector.types.pojo.Field) NullableVarCharHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarCharHolder) NullableFloat4Holder(org.apache.arrow.vector.holders.NullableFloat4Holder) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) NullableIntHolder(org.apache.arrow.vector.holders.NullableIntHolder) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor)

Example 3 with NullableDateMilliHolder

use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandler method makeExtractor.

/**
 * Creates an Extractor for the given field. In this example the extractor just creates some random data.
 */
private Extractor makeExtractor(Field field, RowContext rowContext) {
    Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
    // they need to match the split otherwise filtering will brake in unexpected ways.
    if (field.getName().equals("year")) {
        return (IntExtractor) (Object context, NullableIntHolder dst) -> {
            dst.isSet = 1;
            dst.value = rowContext.getYear();
        };
    } else if (field.getName().equals("month")) {
        return (IntExtractor) (Object context, NullableIntHolder dst) -> {
            dst.isSet = 1;
            dst.value = rowContext.getMonth();
        };
    } else if (field.getName().equals("day")) {
        return (IntExtractor) (Object context, NullableIntHolder dst) -> {
            dst.isSet = 1;
            dst.value = rowContext.getDay();
        };
    }
    switch(fieldType) {
        case INT:
            return (IntExtractor) (Object context, NullableIntHolder dst) -> {
                dst.isSet = 1;
                dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
            };
        case DATEMILLI:
            return (DateMilliExtractor) (Object context, NullableDateMilliHolder dst) -> {
                dst.isSet = 1;
                dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
            };
        case DATEDAY:
            return (DateDayExtractor) (Object context, NullableDateDayHolder dst) -> {
                dst.isSet = 1;
                dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
            };
        case TINYINT:
            return (TinyIntExtractor) (Object context, NullableTinyIntHolder dst) -> {
                dst.isSet = 1;
                dst.value = (byte) ((((RowContext) context).seed % 4) * (((RowContext) context).negative ? -1 : 1));
            };
        case SMALLINT:
            return (SmallIntExtractor) (Object context, NullableSmallIntHolder dst) -> {
                dst.isSet = 1;
                dst.value = (short) ((((RowContext) context).seed % 4) * (((RowContext) context).negative ? -1 : 1));
            };
        case FLOAT4:
            return (Float4Extractor) (Object context, NullableFloat4Holder dst) -> {
                dst.isSet = 1;
                dst.value = ((float) ((RowContext) context).seed) * 1.1f * (((RowContext) context).negative ? -1f : 1f);
            };
        case FLOAT8:
            return (Float8Extractor) (Object context, NullableFloat8Holder dst) -> {
                dst.isSet = 1;
                dst.value = ((double) ((RowContext) context).seed) * 1.1D;
            };
        case DECIMAL:
            return (DecimalExtractor) (Object context, NullableDecimalHolder dst) -> {
                dst.isSet = 1;
                double d8Val = ((RowContext) context).seed * 1.1D * (((RowContext) context).negative ? -1d : 1d);
                BigDecimal bdVal = new BigDecimal(d8Val);
                dst.value = bdVal.setScale(((ArrowType.Decimal) field.getType()).getScale(), RoundingMode.HALF_UP);
            };
        case BIT:
            return (BitExtractor) (Object context, NullableBitHolder dst) -> {
                dst.isSet = 1;
                dst.value = ((RowContext) context).seed % 2;
            };
        case BIGINT:
            return (BigIntExtractor) (Object context, NullableBigIntHolder dst) -> {
                dst.isSet = 1;
                dst.value = ((RowContext) context).seed * 1L * (((RowContext) context).negative ? -1 : 1);
            };
        case VARCHAR:
            return (VarCharExtractor) (Object context, NullableVarCharHolder dst) -> {
                dst.isSet = 1;
                dst.value = "VarChar" + ((RowContext) context).seed;
            };
        case VARBINARY:
            return (VarBinaryExtractor) (Object context, NullableVarBinaryHolder dst) -> {
                dst.isSet = 1;
                dst.value = ("VarChar" + ((RowContext) context).seed).getBytes(Charsets.UTF_8);
            };
        default:
            return null;
    }
}
Also used : Types(org.apache.arrow.vector.types.Types) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) NullableDateMilliHolder(org.apache.arrow.vector.holders.NullableDateMilliHolder) NullableFloat8Holder(org.apache.arrow.vector.holders.NullableFloat8Holder) NullableTinyIntHolder(org.apache.arrow.vector.holders.NullableTinyIntHolder) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) DecimalExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor) NullableVarBinaryHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarBinaryHolder) NullableVarCharHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarCharHolder) BigDecimal(java.math.BigDecimal) NullableIntHolder(org.apache.arrow.vector.holders.NullableIntHolder) DateDayExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) NullableDateDayHolder(org.apache.arrow.vector.holders.NullableDateDayHolder) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) NullableSmallIntHolder(org.apache.arrow.vector.holders.NullableSmallIntHolder) NullableDecimalHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableDecimalHolder) BigDecimal(java.math.BigDecimal) NullableBigIntHolder(org.apache.arrow.vector.holders.NullableBigIntHolder) NullableBitHolder(org.apache.arrow.vector.holders.NullableBitHolder) NullableFloat4Holder(org.apache.arrow.vector.holders.NullableFloat4Holder) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) VarBinaryExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarBinaryExtractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor)

Example 4 with NullableDateMilliHolder

use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.

the class ElasticsearchTypeUtils method makeDateMilliExtractor.

/**
 * Create an DATEMILLI field extractor to extract a date value from a Document. The Document value can be returned
 * as a numeric value, a String, or a List. For the latter, extract the first element only.
 * For dates extracted as a string, the ISO_ZONED_DATE_TIME format will be attempted first, followed by the
 * ISO_LOCAL_DATE_TIME format if the previous one fails. Examples of formats that will work:
 * 1) "2020-05-18T10:15:30.123456789"
 * 2) "2020-05-15T06:50:01.123Z"
 * 3) "2020-05-15T06:49:30.123-05:00".
 * Numeric dates values should be a long numeric value representing epoch milliseconds (e.g. 1589525370001)
 * Nanoseconds will be rounded to the nearest millisecond.
 * @param field is used to determine which extractor to generate based on the field type.
 * @return a field extractor.
 */
private Extractor makeDateMilliExtractor(Field field) {
    return (DateMilliExtractor) (Object context, NullableDateMilliHolder dst) -> {
        Object fieldValue = ((Map) context).get(field.getName());
        dst.isSet = 1;
        if (fieldValue instanceof String) {
            try {
                long epochSeconds;
                double nanoSeconds;
                try {
                    ZonedDateTime zonedDateTime = ZonedDateTime.parse((String) fieldValue, DateTimeFormatter.ISO_ZONED_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
                    epochSeconds = zonedDateTime.toEpochSecond();
                    nanoSeconds = zonedDateTime.getNano();
                } catch (DateTimeParseException error) {
                    LocalDateTime localDateTime = LocalDateTime.parse((String) fieldValue, DateTimeFormatter.ISO_LOCAL_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
                    epochSeconds = localDateTime.toEpochSecond(ZoneOffset.UTC);
                    nanoSeconds = localDateTime.getNano();
                }
                dst.value = epochSeconds * 1000 + Math.round(nanoSeconds / 1000000);
            } catch (DateTimeParseException error) {
                logger.warn("Error parsing localDateTime: {}.", error.getMessage());
                dst.isSet = 0;
            }
        } else if (fieldValue instanceof Number) {
            dst.value = ((Number) fieldValue).longValue();
        } else if (fieldValue instanceof List) {
            Object value = ((List) fieldValue).get(0);
            if (value instanceof String) {
                try {
                    long epochSeconds;
                    double nanoSeconds;
                    try {
                        ZonedDateTime zonedDateTime = ZonedDateTime.parse((String) value, DateTimeFormatter.ISO_ZONED_DATE_TIME.withZone(ZoneId.of("UTC")).withResolverStyle(ResolverStyle.SMART));
                        epochSeconds = zonedDateTime.toEpochSecond();
                        nanoSeconds = zonedDateTime.getNano();
                    } catch (DateTimeParseException error) {
                        LocalDateTime localDateTime = LocalDateTime.parse((String) value, DateTimeFormatter.ISO_LOCAL_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
                        epochSeconds = localDateTime.toEpochSecond(ZoneOffset.UTC);
                        nanoSeconds = localDateTime.getNano();
                    }
                    dst.value = epochSeconds * 1000 + Math.round(nanoSeconds / 1000000);
                } catch (DateTimeParseException error) {
                    logger.warn("Error parsing localDateTime: {}.", error.getMessage());
                    dst.isSet = 0;
                }
            } else if (value instanceof Number) {
                dst.value = ((Number) value).longValue();
            } else {
                dst.isSet = 0;
            }
        } else {
            dst.isSet = 0;
        }
    };
}
Also used : LocalDateTime(java.time.LocalDateTime) DateTimeParseException(java.time.format.DateTimeParseException) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) NullableDateMilliHolder(org.apache.arrow.vector.holders.NullableDateMilliHolder) ZonedDateTime(java.time.ZonedDateTime) List(java.util.List) Map(java.util.Map)

Aggregations

NullableDateMilliHolder (org.apache.arrow.vector.holders.NullableDateMilliHolder)4 DateMilliExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor)3 NullableVarCharHolder (com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarCharHolder)3 NullableBigIntHolder (org.apache.arrow.vector.holders.NullableBigIntHolder)3 NullableBitHolder (org.apache.arrow.vector.holders.NullableBitHolder)3 NullableFloat8Holder (org.apache.arrow.vector.holders.NullableFloat8Holder)3 BigIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor)2 BitExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor)2 Float4Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor)2 Float8Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor)2 IntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor)2 SmallIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor)2 TinyIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor)2 VarCharExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor)2 NullableFloat4Holder (org.apache.arrow.vector.holders.NullableFloat4Holder)2 NullableIntHolder (org.apache.arrow.vector.holders.NullableIntHolder)2 NullableSmallIntHolder (org.apache.arrow.vector.holders.NullableSmallIntHolder)2 NullableTinyIntHolder (org.apache.arrow.vector.holders.NullableTinyIntHolder)2 Field (org.apache.arrow.vector.types.pojo.Field)2 GeneratedRowWriter (com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter)1