use of org.apache.arrow.util.VisibleForTesting in project aws-athena-query-federation by awslabs.
the class ElasticsearchSchemaUtils method mappingsEqual.
/**
* Checks that two Schema objects are equal using the following criteria:
* 1) The Schemas must have the same number of fields.
* 2) The corresponding fields in the two Schema objects must also be the same irrespective of ordering within
* the Schema object using the following criteria:
* a) The fields' names must match.
* b) The fields' Arrow types must match.
* c) The fields' children lists (used for complex fields, e.g. LIST and STRUCT) must match irrespective of
* field ordering within the lists.
* d) The fields' metadata maps must match. Currently that's only applicable for scaled_float data types that
* use the field's metadata map to store the scaling factor associated with the data type.
* @param mapping1 is a mapping to be compared.
* @param mapping2 is a mapping to be compared.
* @return true if the lists are equal, false otherwise.
*/
@VisibleForTesting
protected static final boolean mappingsEqual(Schema mapping1, Schema mapping2) {
logger.info("mappingsEqual - Enter - Mapping1: {}, Mapping2: {}", mapping1, mapping2);
// Schemas must have the same number of elements.
if (mapping1.getFields().size() != mapping2.getFields().size()) {
logger.warn("Mappings are different sizes - Mapping1: {}, Mapping2: {}", mapping1.getFields().size(), mapping2.getFields().size());
return false;
}
// Mappings must have the same fields (irrespective of internal ordering).
for (Field field1 : mapping1.getFields()) {
Field field2 = mapping2.findField(field1.getName());
// Corresponding fields must have the same Arrow types or the Schemas are deemed not equal.
if (field2 == null || field1.getType() != field2.getType()) {
logger.warn("Fields' types do not match - Field1: {}, Field2: {}", field1.getType(), field2 == null ? "null" : field2.getType());
return false;
}
logger.info("Field1 Name: {}, Field1 Type: {}, Field1 Metadata: {}", field1.getName(), field1.getType(), field1.getMetadata());
logger.info("Field2 Name: {}, Field2 Type: {}, Field2 Metadata: {}", field2.getName(), field2.getType(), field2.getMetadata());
// The corresponding fields' children and metadata maps must also match or the Schemas are deemed not equal.
if (!childrenEqual(field1.getChildren(), field2.getChildren()) || !field1.getMetadata().equals(field2.getMetadata())) {
return false;
}
}
return true;
}
use of org.apache.arrow.util.VisibleForTesting in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeStruct.
/**
* Used to write a Struct value.
*
* @param allocator The BlockAllocator which can be used to generate Apache Arrow Buffers for types
* which require conversion to an Arrow Buffer before they can be written using the FieldWriter.
* @param writer The FieldWriter for the Struct field we'd like to write into.
* @param field The Schema details of the Struct Field we are writing into.
* @param pos The position (row) in the Apache Arrow batch we are writing to.
* @param value The value we'd like to write as a struct.
* @param resolver The field resolver that can be used to extract individual Struct fields from the value.
*/
@VisibleForTesting
protected static void writeStruct(BufferAllocator allocator, StructWriter writer, Field field, int pos, Object value, FieldResolver resolver) {
// We expect null writes to have been handled earlier so this is a no-op.
if (value == null) {
return;
}
// Indicate the beginning of the struct value, this is how Apache Arrow handles the variable length of Struct types.
writer.start();
for (Field nextChild : field.getChildren()) {
// For each child field that comprises the struct, attempt to extract and write the corresponding value
// using the FieldResolver.
Object childValue = resolver.getFieldValue(nextChild, value);
switch(Types.getMinorTypeForArrowType(nextChild.getType())) {
case LIST:
writeList(allocator, (FieldWriter) writer.list(nextChild.getName()), nextChild, pos, ((List) childValue), resolver);
break;
case STRUCT:
writeStruct(allocator, writer.struct(nextChild.getName()), nextChild, pos, childValue, resolver);
break;
default:
writeStructValue(writer, nextChild, allocator, childValue);
break;
}
}
writer.end();
}
use of org.apache.arrow.util.VisibleForTesting in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeMap.
/**
* Used to write a Map value.
*
* @param allocator The BlockAllocator which can be used to generate Apache Arrow Buffers for types
* which require conversion to an Arrow Buffer before they can be written using the FieldWriter.
* @param writer The FieldWriter for the Map field we'd like to write into.
* @param field The Schema details of the Map Field we are writing into.
* @param pos The position (row) in the Apache Arrow batch we are writing to.
* @param value The value we'd like to write as a Map.
* @param resolver The field resolver that can be used to extract individual Struct Map from the value.
*/
@VisibleForTesting
protected static void writeMap(BufferAllocator allocator, UnionMapWriter writer, Field field, int pos, Object value, FieldResolver resolver) {
// We expect null writes to have been handled earlier so this is a no-op.
if (value == null) {
return;
}
// Indicate the beginning of the Map value, this is how Apache Arrow handles the variable length of map types.
writer.startMap();
List<Field> children = field.getChildren();
Field keyValueStructField;
if (children.size() != 1) {
throw new IllegalStateException("Invalid Arrow Map schema: " + field);
} else {
keyValueStructField = children.get(0);
if (!ENTRIES.equals(keyValueStructField.getName()) || !(keyValueStructField.getType() instanceof ArrowType.Struct)) {
throw new IllegalStateException("Invalid Arrow Map schema: " + field);
}
}
List<Field> keyValueChildren = keyValueStructField.getChildren();
Field keyField;
Field valueField;
if (keyValueChildren.size() != 2) {
throw new IllegalStateException("Invalid Arrow Map schema: " + field);
} else {
keyField = keyValueChildren.get(0);
valueField = keyValueChildren.get(1);
if (!KEY.equals(keyField.getName()) || !VALUE.equals(valueField.getName())) {
throw new IllegalStateException("Invalid Arrow Map schema: " + field);
}
}
for (Field nextChild : keyValueChildren) {
// For each child field that comprises the Map, attempt to extract and write the corresponding value
// using the FieldResolver.
Object childValue = resolver.getFieldValue(nextChild, value);
switch(Types.getMinorTypeForArrowType(nextChild.getType())) {
case LIST:
writeList(allocator, (FieldWriter) writer.list(nextChild.getName()), nextChild, pos, ((List) childValue), resolver);
break;
case STRUCT:
writeStruct(allocator, writer.struct(nextChild.getName()), nextChild, pos, childValue, resolver);
break;
default:
writeMapValue(writer, nextChild, allocator, childValue);
break;
}
}
writer.endEntry();
}
use of org.apache.arrow.util.VisibleForTesting in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeList.
/**
* Used to write a List value.
*
* @param allocator The BlockAllocator which can be used to generate Apache Arrow Buffers for types
* which require conversion to an Arrow Buffer before they can be written using the FieldWriter.
* @param writer The FieldWriter for the List field we'd like to write into.
* @param field The Schema details of the List Field we are writing into.
* @param pos The position (row) in the Apache Arrow batch we are writing to.
* @param value An iterator to the collection of values we want to write into the row.
* @param resolver The field resolver that can be used to extract individual values from the value iterator.
*/
@VisibleForTesting
protected static void writeList(BufferAllocator allocator, FieldWriter writer, Field field, int pos, Iterable value, FieldResolver resolver) {
if (value == null) {
return;
}
// Apache Arrow List types have a single 'special' child field which gives us the concrete type of the values
// stored in the list.
Field child = null;
if (field.getChildren() != null && !field.getChildren().isEmpty()) {
child = field.getChildren().get(0);
}
// Mark the beginning of the list, this is essentially how Apache Arrow handles the variable length nature
// of lists.
writer.startList();
Iterator itr = value.iterator();
while (itr.hasNext()) {
// For each item in the iterator, attempt to write it to the list.
Object val = itr.next();
if (val != null) {
switch(Types.getMinorTypeForArrowType(child.getType())) {
case LIST:
try {
writeList(allocator, (FieldWriter) writer.list(), child, pos, ((List) val), resolver);
} catch (Exception ex) {
throw ex;
}
break;
case STRUCT:
writeStruct(allocator, writer.struct(), child, pos, val, resolver);
break;
default:
writeListValue(writer, child.getType(), allocator, val);
break;
}
}
}
writer.endList();
}
use of org.apache.arrow.util.VisibleForTesting in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeStructValue.
/**
* Used to write a value into a specific child field within a Struct. Multiple calls to this method per-cell are
* expected in order to write to all N fields of a Struct.
*
* @param writer The FieldWriter (already positioned at the row and list entry number) that we want to write into.
* @param field The child field we are attempting to write into.
* @param allocator The BlockAllocator that can be used for allocating Arrow Buffers for fields which require conversion
* to Arrow Buff before being written.
* @param value The value to write.
* @note This method and its List complement violate the DRY mantra because ListWriter and StructWriter don't share
* a meaningful ancestor despite having identical methods. This requires us to either further wrap and abstract the writer
* or duplicate come code. In a future release we hope to have contributed a better option to Apache Arrow which allows
* us to simplify this method.
*/
@VisibleForTesting
protected static void writeStructValue(StructWriter writer, Field field, BufferAllocator allocator, Object value) {
if (value == null) {
return;
}
ArrowType type = field.getType();
try {
switch(Types.getMinorTypeForArrowType(type)) {
case TIMESTAMPMILLITZ:
long dateTimeWithZone;
if (value instanceof ZonedDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone((ZonedDateTime) value);
} else if (value instanceof LocalDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(((LocalDateTime) value).atZone(UTC_ZONE_ID).toInstant().toEpochMilli(), UTC_ZONE_ID.getId());
} else if (value instanceof Date) {
long ldtInLong = Instant.ofEpochMilli(((Date) value).getTime()).atZone(UTC_ZONE_ID).toInstant().toEpochMilli();
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(ldtInLong, UTC_ZONE_ID.getId());
} else {
dateTimeWithZone = (long) value;
}
writer.timeStampMilliTZ(field.getName()).writeTimeStampMilliTZ(dateTimeWithZone);
case DATEMILLI:
if (value instanceof Date) {
writer.dateMilli(field.getName()).writeDateMilli(((Date) value).getTime());
} else {
writer.dateMilli(field.getName()).writeDateMilli((long) value);
}
break;
case DATEDAY:
if (value instanceof Date) {
org.joda.time.Days days = org.joda.time.Days.daysBetween(EPOCH, new org.joda.time.DateTime(((Date) value).getTime()));
writer.dateDay(field.getName()).writeDateDay(days.getDays());
} else if (value instanceof LocalDate) {
int days = (int) ((LocalDate) value).toEpochDay();
writer.dateDay(field.getName()).writeDateDay(days);
} else if (value instanceof Long) {
writer.dateDay(field.getName()).writeDateDay(((Long) value).intValue());
} else {
writer.dateDay(field.getName()).writeDateDay((int) value);
}
break;
case FLOAT8:
writer.float8(field.getName()).writeFloat8((double) value);
break;
case FLOAT4:
writer.float4(field.getName()).writeFloat4((float) value);
break;
case INT:
if (value != null && value instanceof Long) {
// This may seem odd at first but many frameworks (like Presto) use long as the preferred
// native java type for representing integers. We do this to keep type conversions simple.
writer.integer(field.getName()).writeInt(((Long) value).intValue());
} else {
writer.integer(field.getName()).writeInt((int) value);
}
break;
case TINYINT:
writer.tinyInt(field.getName()).writeTinyInt((byte) value);
break;
case SMALLINT:
writer.smallInt(field.getName()).writeSmallInt((short) value);
break;
case UINT1:
writer.uInt1(field.getName()).writeUInt1((byte) value);
break;
case UINT2:
writer.uInt2(field.getName()).writeUInt2((char) value);
break;
case UINT4:
writer.uInt4(field.getName()).writeUInt4((int) value);
break;
case UINT8:
writer.uInt8(field.getName()).writeUInt8((long) value);
break;
case BIGINT:
writer.bigInt(field.getName()).writeBigInt((long) value);
break;
case VARBINARY:
if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.capacity()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.readableBytes()), buf);
}
}
break;
case DECIMAL:
int scale = ((ArrowType.Decimal) type).getScale();
int precision = ((ArrowType.Decimal) type).getPrecision();
if (value instanceof Double) {
BigDecimal bdVal = new BigDecimal((double) value);
bdVal = bdVal.setScale(scale, RoundingMode.HALF_UP);
writer.decimal(field.getName(), scale, precision).writeDecimal(bdVal);
} else {
BigDecimal scaledValue = ((BigDecimal) value).setScale(scale, RoundingMode.HALF_UP);
writer.decimal(field.getName(), scale, precision).writeDecimal(scaledValue);
}
break;
case VARCHAR:
if (value instanceof String) {
byte[] bytes = ((String) value).getBytes(Charsets.UTF_8);
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
}
} else if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
break;
case BIT:
if (value instanceof Integer && (int) value > 0) {
writer.bit(field.getName()).writeBit(1);
} else if (value instanceof Boolean && (boolean) value) {
writer.bit(field.getName()).writeBit(1);
} else {
writer.bit(field.getName()).writeBit(0);
}
break;
default:
throw new IllegalArgumentException("Unknown type " + type);
}
} catch (RuntimeException ex) {
throw new RuntimeException("Unable to write value for field " + field.getName() + " using value " + value, ex);
}
}
Aggregations