use of org.apache.avro.generic.GenericFixed in project parquet-mr by apache.
the class TestReadWriteOldListBehavior method testAllUsingDefaultAvroSchema.
@Test
public void testAllUsingDefaultAvroSchema() throws Exception {
File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
tmp.deleteOnExit();
tmp.delete();
Path file = new Path(tmp.getPath());
// write file using Parquet APIs
ParquetWriter<Map<String, Object>> parquetWriter = new ParquetWriter<Map<String, Object>>(file, new WriteSupport<Map<String, Object>>() {
private RecordConsumer recordConsumer;
@Override
public WriteContext init(Configuration configuration) {
return new WriteContext(MessageTypeParser.parseMessageType(TestAvroSchemaConverter.ALL_PARQUET_SCHEMA), new HashMap<String, String>());
}
@Override
public void prepareForWrite(RecordConsumer recordConsumer) {
this.recordConsumer = recordConsumer;
}
@Override
public void write(Map<String, Object> record) {
recordConsumer.startMessage();
int index = 0;
recordConsumer.startField("myboolean", index);
recordConsumer.addBoolean((Boolean) record.get("myboolean"));
recordConsumer.endField("myboolean", index++);
recordConsumer.startField("myint", index);
recordConsumer.addInteger((Integer) record.get("myint"));
recordConsumer.endField("myint", index++);
recordConsumer.startField("mylong", index);
recordConsumer.addLong((Long) record.get("mylong"));
recordConsumer.endField("mylong", index++);
recordConsumer.startField("myfloat", index);
recordConsumer.addFloat((Float) record.get("myfloat"));
recordConsumer.endField("myfloat", index++);
recordConsumer.startField("mydouble", index);
recordConsumer.addDouble((Double) record.get("mydouble"));
recordConsumer.endField("mydouble", index++);
recordConsumer.startField("mybytes", index);
recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) record.get("mybytes")));
recordConsumer.endField("mybytes", index++);
recordConsumer.startField("mystring", index);
recordConsumer.addBinary(Binary.fromString((String) record.get("mystring")));
recordConsumer.endField("mystring", index++);
recordConsumer.startField("mynestedrecord", index);
recordConsumer.startGroup();
recordConsumer.startField("mynestedint", 0);
recordConsumer.addInteger((Integer) record.get("mynestedint"));
recordConsumer.endField("mynestedint", 0);
recordConsumer.endGroup();
recordConsumer.endField("mynestedrecord", index++);
recordConsumer.startField("myenum", index);
recordConsumer.addBinary(Binary.fromString((String) record.get("myenum")));
recordConsumer.endField("myenum", index++);
recordConsumer.startField("myarray", index);
recordConsumer.startGroup();
recordConsumer.startField("array", 0);
for (int val : (int[]) record.get("myarray")) {
recordConsumer.addInteger(val);
}
recordConsumer.endField("array", 0);
recordConsumer.endGroup();
recordConsumer.endField("myarray", index++);
recordConsumer.startField("myoptionalarray", index);
recordConsumer.startGroup();
recordConsumer.startField("array", 0);
for (int val : (int[]) record.get("myoptionalarray")) {
recordConsumer.addInteger(val);
}
recordConsumer.endField("array", 0);
recordConsumer.endGroup();
recordConsumer.endField("myoptionalarray", index++);
recordConsumer.startField("myarrayofoptional", index);
recordConsumer.startGroup();
recordConsumer.startField("list", 0);
for (Integer val : (Integer[]) record.get("myarrayofoptional")) {
recordConsumer.startGroup();
if (val != null) {
recordConsumer.startField("element", 0);
recordConsumer.addInteger(val);
recordConsumer.endField("element", 0);
}
recordConsumer.endGroup();
}
recordConsumer.endField("list", 0);
recordConsumer.endGroup();
recordConsumer.endField("myarrayofoptional", index++);
recordConsumer.startField("myrecordarray", index);
recordConsumer.startGroup();
recordConsumer.startField("array", 0);
recordConsumer.startGroup();
recordConsumer.startField("a", 0);
for (int val : (int[]) record.get("myrecordarraya")) {
recordConsumer.addInteger(val);
}
recordConsumer.endField("a", 0);
recordConsumer.startField("b", 1);
for (int val : (int[]) record.get("myrecordarrayb")) {
recordConsumer.addInteger(val);
}
recordConsumer.endField("b", 1);
recordConsumer.endGroup();
recordConsumer.endField("array", 0);
recordConsumer.endGroup();
recordConsumer.endField("myrecordarray", index++);
recordConsumer.startField("mymap", index);
recordConsumer.startGroup();
recordConsumer.startField("map", 0);
recordConsumer.startGroup();
Map<String, Integer> mymap = (Map<String, Integer>) record.get("mymap");
recordConsumer.startField("key", 0);
for (String key : mymap.keySet()) {
recordConsumer.addBinary(Binary.fromString(key));
}
recordConsumer.endField("key", 0);
recordConsumer.startField("value", 1);
for (int val : mymap.values()) {
recordConsumer.addInteger(val);
}
recordConsumer.endField("value", 1);
recordConsumer.endGroup();
recordConsumer.endField("map", 0);
recordConsumer.endGroup();
recordConsumer.endField("mymap", index++);
recordConsumer.startField("myfixed", index);
recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) record.get("myfixed")));
recordConsumer.endField("myfixed", index++);
recordConsumer.endMessage();
}
});
Map<String, Object> record = new HashMap<String, Object>();
record.put("myboolean", true);
record.put("myint", 1);
record.put("mylong", 2L);
record.put("myfloat", 3.1f);
record.put("mydouble", 4.1);
record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)));
record.put("mystring", "hello");
record.put("myenum", "a");
record.put("mynestedint", 1);
record.put("myarray", new int[] { 1, 2, 3 });
record.put("myoptionalarray", new int[] { 1, 2, 3 });
record.put("myarrayofoptional", new Integer[] { 1, null, 2, null, 3 });
record.put("myrecordarraya", new int[] { 1, 2, 3 });
record.put("myrecordarrayb", new int[] { 4, 5, 6 });
record.put("mymap", ImmutableMap.of("a", 1, "b", 2));
record.put("myfixed", new byte[] { (byte) 65 });
parquetWriter.write(record);
parquetWriter.close();
Schema nestedRecordSchema = Schema.createRecord("mynestedrecord", null, null, false);
nestedRecordSchema.setFields(Arrays.asList(new Schema.Field("mynestedint", Schema.create(Schema.Type.INT), null, null)));
GenericData.Record nestedRecord = new GenericRecordBuilder(nestedRecordSchema).set("mynestedint", 1).build();
List<Integer> integerArray = Arrays.asList(1, 2, 3);
Schema recordArraySchema = Schema.createRecord("array", null, null, false);
recordArraySchema.setFields(Arrays.asList(new Schema.Field("a", Schema.create(Schema.Type.INT), null, null), new Schema.Field("b", Schema.create(Schema.Type.INT), null, null)));
GenericRecordBuilder builder = new GenericRecordBuilder(recordArraySchema);
List<GenericData.Record> recordArray = new ArrayList<GenericData.Record>();
recordArray.add(builder.set("a", 1).set("b", 4).build());
recordArray.add(builder.set("a", 2).set("b", 5).build());
recordArray.add(builder.set("a", 3).set("b", 6).build());
GenericData.Array<GenericData.Record> genericRecordArray = new GenericData.Array<GenericData.Record>(Schema.createArray(recordArraySchema), recordArray);
GenericFixed genericFixed = new GenericData.Fixed(Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });
// 3-level lists are deserialized with the extra layer present
Schema elementSchema = record("list", optionalField("element", primitive(Schema.Type.INT)));
GenericRecordBuilder elementBuilder = new GenericRecordBuilder(elementSchema);
GenericData.Array<GenericData.Record> genericRecordArrayWithNullIntegers = new GenericData.Array<GenericData.Record>(array(elementSchema), Arrays.asList(elementBuilder.set("element", 1).build(), elementBuilder.set("element", null).build(), elementBuilder.set("element", 2).build(), elementBuilder.set("element", null).build(), elementBuilder.set("element", 3).build()));
AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file);
GenericRecord nextRecord = reader.read();
assertNotNull(nextRecord);
assertEquals(true, nextRecord.get("myboolean"));
assertEquals(1, nextRecord.get("myint"));
assertEquals(2L, nextRecord.get("mylong"));
assertEquals(3.1f, nextRecord.get("myfloat"));
assertEquals(4.1, nextRecord.get("mydouble"));
assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes"));
assertEquals(str("hello"), nextRecord.get("mystring"));
assertEquals(str("a"), nextRecord.get("myenum"));
assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
assertEquals(integerArray, nextRecord.get("myarray"));
assertEquals(integerArray, nextRecord.get("myoptionalarray"));
assertEquals(genericRecordArrayWithNullIntegers, nextRecord.get("myarrayofoptional"));
assertEquals(genericRecordArray, nextRecord.get("myrecordarray"));
assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
assertEquals(genericFixed, nextRecord.get("myfixed"));
}
use of org.apache.avro.generic.GenericFixed in project parquet-mr by apache.
the class TestReadWriteOldListBehavior method testArrayWithNullValues.
@Test
public void testArrayWithNullValues() throws Exception {
Schema schema = new Schema.Parser().parse(Resources.getResource("all.avsc").openStream());
File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
tmp.deleteOnExit();
tmp.delete();
Path file = new Path(tmp.getPath());
GenericData.Record nestedRecord = new GenericRecordBuilder(schema.getField("mynestedrecord").schema()).set("mynestedint", 1).build();
List<Integer> integerArray = Arrays.asList(1, 2, 3);
GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);
GenericFixed genericFixed = new GenericData.Fixed(Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });
List<Integer> emptyArray = new ArrayList<Integer>();
ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();
Schema arrayOfOptionalIntegers = Schema.createArray(optional(Schema.create(Schema.Type.INT)));
GenericData.Array<Integer> genericIntegerArrayWithNulls = new GenericData.Array<Integer>(arrayOfOptionalIntegers, Arrays.asList(1, null, 2, null, 3));
GenericData.Record record = new GenericRecordBuilder(schema).set("mynull", null).set("myboolean", true).set("myint", 1).set("mylong", 2L).set("myfloat", 3.1f).set("mydouble", 4.1).set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))).set("mystring", "hello").set("mynestedrecord", nestedRecord).set("myenum", "a").set("myarray", genericIntegerArray).set("myemptyarray", emptyArray).set("myoptionalarray", genericIntegerArray).set("myarrayofoptional", genericIntegerArrayWithNulls).set("mymap", ImmutableMap.of("a", 1, "b", 2)).set("myemptymap", emptyMap).set("myfixed", genericFixed).build();
final AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema);
try {
writer.write(record);
fail("Should not succeed writing an array with null values");
} catch (Exception e) {
Assert.assertTrue("Error message should provide context and help", e.getMessage().contains("parquet.avro.write-old-list-structure"));
} finally {
writer.close();
}
}
use of org.apache.avro.generic.GenericFixed in project streamline by hortonworks.
the class AvroStreamsSnapshotDeserializer method convertValue.
private Object convertValue(Object deserializedObj) {
Object value;
// check for specific-record type and build a map from that
if (deserializedObj instanceof IndexedRecord) {
// record
IndexedRecord indexedRecord = (IndexedRecord) deserializedObj;
List<Schema.Field> fields = indexedRecord.getSchema().getFields();
ImmutableMap.Builder<String, Object> keyValues = ImmutableMap.builder();
for (Schema.Field field : fields) {
Object currentValue = convertValue(indexedRecord.get(field.pos()));
if (currentValue != null) {
keyValues.put(field.name(), currentValue);
}
}
value = keyValues.build();
} else if (deserializedObj instanceof ByteBuffer) {
// byte array representation
ByteBuffer byteBuffer = (ByteBuffer) deserializedObj;
byte[] bytes = new byte[byteBuffer.remaining()];
byteBuffer.get(bytes);
value = bytes;
} else if (deserializedObj instanceof GenericEnumSymbol) {
// enums
GenericEnumSymbol symbol = (GenericEnumSymbol) deserializedObj;
value = symbol.toString();
} else if (deserializedObj instanceof CharSequence) {
// symbols
value = deserializedObj.toString();
} else if (deserializedObj instanceof Map) {
// type of map
Map<Object, Object> map = (Map<Object, Object>) deserializedObj;
ImmutableMap.Builder<String, Object> keyValues = ImmutableMap.builder();
for (Map.Entry entry : map.entrySet()) {
Object currentValue = convertValue(entry.getValue());
if (currentValue != null) {
keyValues.put(entry.getKey().toString(), currentValue);
}
}
value = keyValues.build();
} else if (deserializedObj instanceof Collection) {
// type of array
Collection<Object> collection = (Collection<Object>) deserializedObj;
ImmutableList.Builder<Object> values = ImmutableList.builder();
for (Object obj : collection) {
Object currentValue = convertValue(obj);
if (currentValue != null) {
values.add(currentValue);
}
}
value = values.build();
} else if (deserializedObj instanceof GenericFixed) {
// fixed type
GenericFixed genericFixed = (GenericFixed) deserializedObj;
value = genericFixed.bytes();
} else {
// other primitive types
value = deserializedObj;
}
return value;
}
use of org.apache.avro.generic.GenericFixed in project flink by apache.
the class AvroRowDeserializationSchema method convertAvroType.
private Object convertAvroType(Schema schema, TypeInformation<?> info, Object object) {
if (object == null) {
return null;
}
switch(schema.getType()) {
case RECORD:
if (object instanceof IndexedRecord) {
return convertAvroRecordToRow(schema, (RowTypeInfo) info, (IndexedRecord) object);
}
throw new IllegalStateException("IndexedRecord expected but was: " + object.getClass());
case ENUM:
case STRING:
return object.toString();
case ARRAY:
if (info instanceof BasicArrayTypeInfo) {
final TypeInformation<?> elementInfo = ((BasicArrayTypeInfo<?, ?>) info).getComponentInfo();
return convertToObjectArray(schema.getElementType(), elementInfo, object);
} else {
final TypeInformation<?> elementInfo = ((ObjectArrayTypeInfo<?, ?>) info).getComponentInfo();
return convertToObjectArray(schema.getElementType(), elementInfo, object);
}
case MAP:
final MapTypeInfo<?, ?> mapTypeInfo = (MapTypeInfo<?, ?>) info;
final Map<String, Object> convertedMap = new HashMap<>();
final Map<?, ?> map = (Map<?, ?>) object;
for (Map.Entry<?, ?> entry : map.entrySet()) {
convertedMap.put(entry.getKey().toString(), convertAvroType(schema.getValueType(), mapTypeInfo.getValueTypeInfo(), entry.getValue()));
}
return convertedMap;
case UNION:
final List<Schema> types = schema.getTypes();
final int size = types.size();
final Schema actualSchema;
if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
actualSchema = types.get(1);
} else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
actualSchema = types.get(0);
} else if (size == 1) {
actualSchema = types.get(0);
} else {
// generic type
return object;
}
return convertAvroType(actualSchema, info, object);
case FIXED:
final byte[] fixedBytes = ((GenericFixed) object).bytes();
if (info == Types.BIG_DEC) {
return convertToDecimal(schema, fixedBytes);
}
return fixedBytes;
case BYTES:
final ByteBuffer byteBuffer = (ByteBuffer) object;
final byte[] bytes = new byte[byteBuffer.remaining()];
byteBuffer.get(bytes);
if (info == Types.BIG_DEC) {
return convertToDecimal(schema, bytes);
}
return bytes;
case INT:
if (info == Types.SQL_DATE) {
return convertToDate(object);
} else if (info == Types.SQL_TIME) {
return convertToTime(object);
}
return object;
case LONG:
if (info == Types.SQL_TIMESTAMP) {
return convertToTimestamp(object, schema.getLogicalType() == LogicalTypes.timestampMicros());
} else if (info == Types.SQL_TIME) {
return convertToTime(object);
}
return object;
case FLOAT:
case DOUBLE:
case BOOLEAN:
return object;
}
throw new RuntimeException("Unsupported Avro type:" + schema);
}
use of org.apache.avro.generic.GenericFixed in project flink by apache.
the class AvroToRowDataConverters method createDecimalConverter.
private static AvroToRowDataConverter createDecimalConverter(DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return avroObject -> {
final byte[] bytes;
if (avroObject instanceof GenericFixed) {
bytes = ((GenericFixed) avroObject).bytes();
} else if (avroObject instanceof ByteBuffer) {
ByteBuffer byteBuffer = (ByteBuffer) avroObject;
bytes = new byte[byteBuffer.remaining()];
byteBuffer.get(bytes);
} else {
bytes = (byte[]) avroObject;
}
return DecimalData.fromUnscaledBytes(bytes, precision, scale);
};
}
Aggregations