use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandler method buildRowWriter.
private GeneratedRowWriter buildRowWriter(ReadRecordsRequest request) {
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
int fieldNum = 0;
for (Field nextField : request.getSchema().getFields()) {
int curFieldNum = fieldNum++;
switch(Types.getMinorTypeForArrowType(nextField.getType())) {
case VARCHAR:
builder.withExtractor(nextField.getName(), (VarCharExtractor) (Object context, NullableVarCharHolder value) -> {
value.isSet = 1;
value.value = ((Row) context).getData().get(curFieldNum).getScalarValue();
});
break;
case FLOAT8:
builder.withExtractor(nextField.getName(), (Float8Extractor) (Object context, NullableFloat8Holder value) -> {
value.isSet = 1;
value.value = Double.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
});
break;
case BIT:
builder.withExtractor(nextField.getName(), (BitExtractor) (Object context, NullableBitHolder value) -> {
value.isSet = 1;
value.value = Boolean.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue()) == false ? 0 : 1;
});
break;
case BIGINT:
builder.withExtractor(nextField.getName(), (BigIntExtractor) (Object context, NullableBigIntHolder value) -> {
value.isSet = 1;
value.value = Long.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
});
break;
case DATEMILLI:
builder.withExtractor(nextField.getName(), (DateMilliExtractor) (Object context, NullableDateMilliHolder value) -> {
value.isSet = 1;
value.value = TIMESTAMP_FORMATTER.parse(((Row) context).getData().get(curFieldNum).getScalarValue()).getTime();
});
break;
case LIST:
// TODO: This presently only supports TimeSeries results but it is possible that customers may
// generate LIST type results for other reasons when using VIEWs. For now this seems like an OK
// compromise since it enables an important capability of TimeStream even if it doesn't enable arbitrary
// complex types.
buildTimeSeriesExtractor(builder, nextField, curFieldNum);
break;
default:
throw new RuntimeException("Unsupported field type[" + nextField.getType() + "] for field[" + nextField.getName() + "]");
}
}
return builder.build();
}
use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.
the class ElasticsearchTypeUtilsTest method testField.
/**
* Uses the correct field extractor to extract values from a document.
* @param mapping is the metadata definitions of the document being processed.
* @param document contains the values to be extracted.
* @return a map of the field names and their associated values extracted from the document.
* @throws Exception
*/
private Map<String, Object> testField(Schema mapping, Map<String, Object> document) throws Exception {
Map<String, Object> results = new HashMap<>();
for (Field field : mapping.getFields()) {
Extractor extractor = typeUtils.makeExtractor(field);
if (extractor instanceof VarCharExtractor) {
NullableVarCharHolder holder = new NullableVarCharHolder();
((VarCharExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof BigIntExtractor) {
NullableBigIntHolder holder = new NullableBigIntHolder();
((BigIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof IntExtractor) {
NullableIntHolder holder = new NullableIntHolder();
((IntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof SmallIntExtractor) {
NullableSmallIntHolder holder = new NullableSmallIntHolder();
((SmallIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof TinyIntExtractor) {
NullableTinyIntHolder holder = new NullableTinyIntHolder();
((TinyIntExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof Float8Extractor) {
NullableFloat8Holder holder = new NullableFloat8Holder();
((Float8Extractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof Float4Extractor) {
NullableFloat4Holder holder = new NullableFloat4Holder();
((Float4Extractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof DateMilliExtractor) {
NullableDateMilliHolder holder = new NullableDateMilliHolder();
((DateMilliExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
} else if (extractor instanceof BitExtractor) {
NullableBitHolder holder = new NullableBitHolder();
((BitExtractor) extractor).extract(document, holder);
assertEquals("Could not extract value for: " + field.getName(), 1, holder.isSet);
results.put(field.getName(), holder.value);
}
}
return results;
}
use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method makeExtractor.
/**
* Creates an Extractor for the given field. In this example the extractor just creates some random data.
*/
private Extractor makeExtractor(Field field, RowContext rowContext) {
Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
// they need to match the split otherwise filtering will brake in unexpected ways.
if (field.getName().equals("year")) {
return (IntExtractor) (Object context, NullableIntHolder dst) -> {
dst.isSet = 1;
dst.value = rowContext.getYear();
};
} else if (field.getName().equals("month")) {
return (IntExtractor) (Object context, NullableIntHolder dst) -> {
dst.isSet = 1;
dst.value = rowContext.getMonth();
};
} else if (field.getName().equals("day")) {
return (IntExtractor) (Object context, NullableIntHolder dst) -> {
dst.isSet = 1;
dst.value = rowContext.getDay();
};
}
switch(fieldType) {
case INT:
return (IntExtractor) (Object context, NullableIntHolder dst) -> {
dst.isSet = 1;
dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
};
case DATEMILLI:
return (DateMilliExtractor) (Object context, NullableDateMilliHolder dst) -> {
dst.isSet = 1;
dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
};
case DATEDAY:
return (DateDayExtractor) (Object context, NullableDateDayHolder dst) -> {
dst.isSet = 1;
dst.value = ((RowContext) context).seed * (((RowContext) context).negative ? -1 : 1);
};
case TINYINT:
return (TinyIntExtractor) (Object context, NullableTinyIntHolder dst) -> {
dst.isSet = 1;
dst.value = (byte) ((((RowContext) context).seed % 4) * (((RowContext) context).negative ? -1 : 1));
};
case SMALLINT:
return (SmallIntExtractor) (Object context, NullableSmallIntHolder dst) -> {
dst.isSet = 1;
dst.value = (short) ((((RowContext) context).seed % 4) * (((RowContext) context).negative ? -1 : 1));
};
case FLOAT4:
return (Float4Extractor) (Object context, NullableFloat4Holder dst) -> {
dst.isSet = 1;
dst.value = ((float) ((RowContext) context).seed) * 1.1f * (((RowContext) context).negative ? -1f : 1f);
};
case FLOAT8:
return (Float8Extractor) (Object context, NullableFloat8Holder dst) -> {
dst.isSet = 1;
dst.value = ((double) ((RowContext) context).seed) * 1.1D;
};
case DECIMAL:
return (DecimalExtractor) (Object context, NullableDecimalHolder dst) -> {
dst.isSet = 1;
double d8Val = ((RowContext) context).seed * 1.1D * (((RowContext) context).negative ? -1d : 1d);
BigDecimal bdVal = new BigDecimal(d8Val);
dst.value = bdVal.setScale(((ArrowType.Decimal) field.getType()).getScale(), RoundingMode.HALF_UP);
};
case BIT:
return (BitExtractor) (Object context, NullableBitHolder dst) -> {
dst.isSet = 1;
dst.value = ((RowContext) context).seed % 2;
};
case BIGINT:
return (BigIntExtractor) (Object context, NullableBigIntHolder dst) -> {
dst.isSet = 1;
dst.value = ((RowContext) context).seed * 1L * (((RowContext) context).negative ? -1 : 1);
};
case VARCHAR:
return (VarCharExtractor) (Object context, NullableVarCharHolder dst) -> {
dst.isSet = 1;
dst.value = "VarChar" + ((RowContext) context).seed;
};
case VARBINARY:
return (VarBinaryExtractor) (Object context, NullableVarBinaryHolder dst) -> {
dst.isSet = 1;
dst.value = ("VarChar" + ((RowContext) context).seed).getBytes(Charsets.UTF_8);
};
default:
return null;
}
}
use of org.apache.arrow.vector.holders.NullableDateMilliHolder in project aws-athena-query-federation by awslabs.
the class ElasticsearchTypeUtils method makeDateMilliExtractor.
/**
* Create an DATEMILLI field extractor to extract a date value from a Document. The Document value can be returned
* as a numeric value, a String, or a List. For the latter, extract the first element only.
* For dates extracted as a string, the ISO_ZONED_DATE_TIME format will be attempted first, followed by the
* ISO_LOCAL_DATE_TIME format if the previous one fails. Examples of formats that will work:
* 1) "2020-05-18T10:15:30.123456789"
* 2) "2020-05-15T06:50:01.123Z"
* 3) "2020-05-15T06:49:30.123-05:00".
* Numeric dates values should be a long numeric value representing epoch milliseconds (e.g. 1589525370001)
* Nanoseconds will be rounded to the nearest millisecond.
* @param field is used to determine which extractor to generate based on the field type.
* @return a field extractor.
*/
private Extractor makeDateMilliExtractor(Field field) {
return (DateMilliExtractor) (Object context, NullableDateMilliHolder dst) -> {
Object fieldValue = ((Map) context).get(field.getName());
dst.isSet = 1;
if (fieldValue instanceof String) {
try {
long epochSeconds;
double nanoSeconds;
try {
ZonedDateTime zonedDateTime = ZonedDateTime.parse((String) fieldValue, DateTimeFormatter.ISO_ZONED_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
epochSeconds = zonedDateTime.toEpochSecond();
nanoSeconds = zonedDateTime.getNano();
} catch (DateTimeParseException error) {
LocalDateTime localDateTime = LocalDateTime.parse((String) fieldValue, DateTimeFormatter.ISO_LOCAL_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
epochSeconds = localDateTime.toEpochSecond(ZoneOffset.UTC);
nanoSeconds = localDateTime.getNano();
}
dst.value = epochSeconds * 1000 + Math.round(nanoSeconds / 1000000);
} catch (DateTimeParseException error) {
logger.warn("Error parsing localDateTime: {}.", error.getMessage());
dst.isSet = 0;
}
} else if (fieldValue instanceof Number) {
dst.value = ((Number) fieldValue).longValue();
} else if (fieldValue instanceof List) {
Object value = ((List) fieldValue).get(0);
if (value instanceof String) {
try {
long epochSeconds;
double nanoSeconds;
try {
ZonedDateTime zonedDateTime = ZonedDateTime.parse((String) value, DateTimeFormatter.ISO_ZONED_DATE_TIME.withZone(ZoneId.of("UTC")).withResolverStyle(ResolverStyle.SMART));
epochSeconds = zonedDateTime.toEpochSecond();
nanoSeconds = zonedDateTime.getNano();
} catch (DateTimeParseException error) {
LocalDateTime localDateTime = LocalDateTime.parse((String) value, DateTimeFormatter.ISO_LOCAL_DATE_TIME.withResolverStyle(ResolverStyle.SMART));
epochSeconds = localDateTime.toEpochSecond(ZoneOffset.UTC);
nanoSeconds = localDateTime.getNano();
}
dst.value = epochSeconds * 1000 + Math.round(nanoSeconds / 1000000);
} catch (DateTimeParseException error) {
logger.warn("Error parsing localDateTime: {}.", error.getMessage());
dst.isSet = 0;
}
} else if (value instanceof Number) {
dst.value = ((Number) value).longValue();
} else {
dst.isSet = 0;
}
} else {
dst.isSet = 0;
}
};
}
Aggregations