use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.
the class TestCircularReferences method test.
@Test
public void test() throws IOException {
ReferenceManager manager = new ReferenceManager();
GenericData model = new GenericData();
model.addLogicalTypeConversion(manager.getTracker());
model.addLogicalTypeConversion(manager.getHandler());
Schema parentSchema = Schema.createRecord("Parent", null, null, false);
Schema placeholderSchema = Schema.createRecord("Placeholder", null, null, false);
List<Schema.Field> placeholderFields = new ArrayList<Schema.Field>();
// at least one field is needed to be a valid schema
placeholderFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
placeholderSchema.setFields(placeholderFields);
Referenceable idRef = new Referenceable("id");
Schema parentRefSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.LONG), idRef.addToSchema(placeholderSchema));
Reference parentRef = new Reference("parent");
List<Schema.Field> childFields = new ArrayList<Schema.Field>();
childFields.add(new Schema.Field("c", Schema.create(Schema.Type.STRING), null, null));
childFields.add(new Schema.Field("parent", parentRefSchema, null, null));
Schema childSchema = parentRef.addToSchema(Schema.createRecord("Child", null, null, false, childFields));
List<Schema.Field> parentFields = new ArrayList<Schema.Field>();
parentFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
parentFields.add(new Schema.Field("p", Schema.create(Schema.Type.STRING), null, null));
parentFields.add(new Schema.Field("child", childSchema, null, null));
parentSchema.setFields(parentFields);
Schema schema = idRef.addToSchema(parentSchema);
System.out.println("Schema: " + schema.toString(true));
Record parent = new Record(schema);
parent.put("id", 1L);
parent.put("p", "parent data!");
Record child = new Record(childSchema);
child.put("c", "child data!");
child.put("parent", parent);
parent.put("child", child);
// serialization round trip
File data = AvroTestUtil.write(temp, model, schema, parent);
List<Record> records = AvroTestUtil.read(model, schema, data);
Record actual = records.get(0);
// because the record is a recursive structure, equals won't work
Assert.assertEquals("Should correctly read back the parent id", 1L, actual.get("id"));
Assert.assertEquals("Should correctly read back the parent data", new Utf8("parent data!"), actual.get("p"));
Record actualChild = (Record) actual.get("child");
Assert.assertEquals("Should correctly read back the child data", new Utf8("child data!"), actualChild.get("c"));
Object childParent = actualChild.get("parent");
Assert.assertTrue("Should have a parent Record object", childParent instanceof Record);
Record childParentRecord = (Record) actualChild.get("parent");
Assert.assertEquals("Should have the right parent id", 1L, childParentRecord.get("id"));
Assert.assertEquals("Should have the right parent data", new Utf8("parent data!"), childParentRecord.get("p"));
}
use of org.apache.avro.generic.GenericData.Record in project components by Talend.
the class KeyValueUtils method getField.
/**
* Retrieve a field from on indexedRecord.
*
* @param fieldPath the field name. Can be a path for hierarchical element
* @param record an Indexed record
* @return the Object matching to the fieldName if it was found, null otherwise
*/
public static Record getField(String fieldPath, IndexedRecord record) {
// TODO current implementation will only extract one element, but
// further implementation may
String[] path = fieldPath.split("\\.");
Schema schema = record.getSchema();
for (Integer i = 0; i < path.length - 1; i++) {
if (schema.getField(path[i]) == null) {
return null;
}
// The column was existing on the input record, we forward it to the
// output record.
Object inputValue = record.get(schema.getField(path[i]).pos());
// or directly a value.
if (inputValue instanceof Record) {
// If we are on a record, we need to recursively do the process
record = (IndexedRecord) inputValue;
// record, so we need to get the true sub-schema
if (schema.getField(path[i]).schema().getType().equals(Type.RECORD)) {
schema = schema.getField(path[i]).schema();
} else if (schema.getField(path[i]).schema().getType().equals(Type.UNION)) {
for (Schema childSchema : schema.getField(path[i]).schema().getTypes()) {
if (childSchema.getType().equals(Type.RECORD)) {
schema = childSchema;
break;
}
}
}
} else {
// No need to go further, return an empty element
return null;
}
}
Field field = schema.getField(path[path.length - 1]);
Schema fieldRecordSchema = Schema.createRecord("temp", null, null, false, Arrays.asList(new Field[] { new Field(field.name(), AvroUtils.wrapAsNullable(field.schema()), field.doc(), field.defaultVal()) }));
Record fieldRecord = new Record(fieldRecordSchema);
fieldRecord.put(0, record.get(field.pos()));
return fieldRecord;
}
use of org.apache.avro.generic.GenericData.Record in project components by Talend.
the class KeyValueUtils method getFieldValue.
// TODO externalize the following method
/**
* Retrieve a value of field from on indexedRecord.
*
* @param fieldPath the field name. Can be a path for hierarchical element
* @param record an Indexed record
* @return the Object matching to the fieldName if it was found, null otherwise
*/
public static Object getFieldValue(String fieldPath, IndexedRecord record) {
// TODO current implementation will only extract one element, but
// further implementation may
String[] path = fieldPath.split("\\.");
Schema schema = record.getSchema();
for (Integer i = 0; i < path.length; i++) {
if (schema.getField(path[i]) == null) {
return null;
}
// The column was existing on the input record, we forward it to the
// output record.
Object inputValue = record.get(schema.getField(path[i]).pos());
// or directly a value.
if (inputValue instanceof Record) {
// If we are on a record, we need to recursively do the process
record = (IndexedRecord) inputValue;
// record, so we need to get the true sub-schema
if (schema.getField(path[i]).schema().getType().equals(Type.RECORD)) {
schema = schema.getField(path[i]).schema();
} else if (schema.getField(path[i]).schema().getType().equals(Type.UNION)) {
for (Schema childSchema : schema.getField(path[i]).schema().getTypes()) {
if (childSchema.getType().equals(Type.RECORD)) {
schema = childSchema;
break;
}
}
}
} else {
// if we are on a object, then this is or the expected value of an error.
if (i == path.length - 1) {
return inputValue;
} else {
// No need to go further, return an empty element
return null;
}
}
}
// field not found, return an empty element
return null;
}
use of org.apache.avro.generic.GenericData.Record in project components by Talend.
the class KeyValueUtils method extractIndexedRecord.
/**
* Generate a new Index Record which is the filtered result of the input record.
*
* The user can freely remove column, add empty column or change the place of column in the same hierarchical level.
*
* @return the new record
*/
public static IndexedRecord extractIndexedRecord(IndexedRecord inputRecord, Schema outputSchema) {
GenericRecordBuilder outputRecord = new GenericRecordBuilder(outputSchema);
Schema inputSchema = getUnwrappedSchema(inputRecord);
for (Field field : outputSchema.getFields()) {
if (inputSchema.getField(field.name()) != null) {
// The column was existing on the input record, we forward it to the output record.
Object inputValue = inputRecord.get(inputSchema.getField(field.name()).pos());
// if we are on a object, we save it to the output.
if (inputValue instanceof Record) {
// The sub-schema at this level is a union of "empty" and a record,
// so we need to get the true sub-schema
Schema inputChildSchema = getUnwrappedSchema(inputSchema.getField(field.name()));
Schema outputChildSchema = getUnwrappedSchema(outputSchema.getField(field.name()));
if (inputChildSchema.getType().equals(Type.RECORD) && outputChildSchema.getType().equals(Type.RECORD)) {
Object childRecord = extractIndexedRecord((IndexedRecord) inputValue, outputChildSchema);
outputRecord.set(field.name(), childRecord);
}
} else {
outputRecord.set(field.name(), inputValue);
}
} else {
// element not found => set to the value and its hierarchy to null
outputRecord.set(field.name(), KeyValueUtils.generateEmptyRecord(outputSchema, field.name()));
}
}
return outputRecord.build();
}
use of org.apache.avro.generic.GenericData.Record in project components by Talend.
the class AzureStorageQueueWriter method sendParallelMessages.
private synchronized void sendParallelMessages() {
messagesBuffer.parallelStream().forEach(new Consumer<QueueMessage>() {
@Override
public void accept(QueueMessage queueMessage) {
try {
queue.addMessage(queueMessage.getMsg(), queueMessage.getTimeToLiveInSeconds(), queueMessage.getInitialVisibilityDelayInSeconds(), null, null);
result.successCount++;
IndexedRecord record = new Record(writeSchema);
record.put(0, queueMessage.getMsg().getMessageContentAsString());
successfulWrites.add(record);
} catch (StorageException e) {
result.rejectCount++;
LOGGER.error(e.getLocalizedMessage());
}
}
});
messagesBuffer.clear();
}
Aggregations