use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class GoraStorage method putNext.
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple pigTuple) throws IOException {
PersistentBase persistentObj = this.dataStore.newPersistent();
if (LOG.isTraceEnabled())
LOG.trace("key: {}", pigTuple.get(pigFieldKeyIndex));
for (String fieldName : this.loadQueryFields) {
if (LOG.isTraceEnabled()) {
LOG.trace(" Put fieldname: {}", fieldName);
LOG.trace(" resourcefield schema: {}", this.writeResourceFieldSchemaMap.get(fieldName).getResourceFieldSchema());
LOG.trace(" value: {} - {}", this.writeResourceFieldSchemaMap.get(fieldName).getIndex(), pigTuple.get(this.writeResourceFieldSchemaMap.get(fieldName).getIndex()));
}
ResourceFieldSchemaWithIndex writeResourceFieldSchemaWithIndex = this.writeResourceFieldSchemaMap.get(fieldName);
if (writeResourceFieldSchemaWithIndex == null) {
if (LOG.isTraceEnabled())
LOG.trace("Field {} defined in constructor not found in the tuple to persist, skipping field", fieldName);
continue;
}
Field persistentField = persistentSchema.getField(fieldName);
if (persistentField == null) {
throw new IOException("Field " + fieldName + " does not exist in the Gora's Avro schema.");
}
ResourceFieldSchema pigFieldSchema = writeResourceFieldSchemaWithIndex.getResourceFieldSchema();
if (pigFieldSchema == null) {
throw new IOException("The field " + fieldName + " does not have a Pig schema when writing.");
}
// TODO Move this put to PersistentUtils
// TODO Here is used the resourceFieldSchema and the index. Think about optimize if possible
// TODO Find a better name to this.writeField, like 'tupleToPersistent'
int persistentFieldIndex = persistentObj.getSchema().getField(fieldName).pos();
persistentObj.put(persistentFieldIndex, this.writeField(persistentField.schema(), pigFieldSchema, pigTuple.get(writeResourceFieldSchemaWithIndex.getIndex())));
persistentObj.setDirty(persistentFieldIndex);
}
try {
((GoraRecordWriter<Object, PersistentBase>) this.writer).write(pigTuple.get(pigFieldKeyIndex), (PersistentBase) persistentObj);
} catch (InterruptedException e) {
throw new IOException("Error writing the tuple.", e);
}
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class GoraStorage method writeField.
/**
* Converts one pig field data to PersistentBase Data.
*
* @param avroSchema PersistentBase schema used to create new nested records
* @param pigField Pig schema of the field being converted
* @param pigData Pig data relative to the schema
* @return PersistentBase data
* @throws IOException
*/
private Object writeField(Schema avroSchema, ResourceFieldSchema pigField, Object pigData) throws IOException {
// If data is null, return null (check if avro schema is right)
if (pigData == null) {
if (avroSchema.getType() != Type.UNION && avroSchema.getType() != Type.NULL) {
throw new IOException("Tuple field " + pigField.getName() + " is null, but Avro Schema is not union nor null");
} else {
return null;
}
}
// ONLY SUPPORT 2 ELEMENTS UNION!
if (avroSchema.getType() == Type.UNION) {
if (avroSchema.getTypes().get(0).getType() == Schema.Type.NULL) {
avroSchema = avroSchema.getTypes().get(1);
} else {
avroSchema = avroSchema.getTypes().get(0);
}
}
switch(pigField.getType()) {
case DataType.DOUBLE:
case DataType.FLOAT:
case DataType.LONG:
case DataType.BOOLEAN:
case DataType.NULL:
if (LOG.isTraceEnabled())
LOG.trace(" Writing double, float, long, boolean or null.");
return (Object) pigData;
case DataType.CHARARRAY:
if (LOG.isTraceEnabled())
LOG.trace(" Writing chararray.");
return pigData.toString();
case DataType.INTEGER:
if (LOG.isTraceEnabled())
LOG.trace(" Writing integer/enum.");
if (avroSchema.getType() == Type.ENUM) {
return AvroUtils.getEnumValue(avroSchema, ((Number) pigData).intValue());
} else {
return ((Number) pigData).intValue();
}
case DataType.BYTEARRAY:
if (LOG.isTraceEnabled())
LOG.trace(" Writing bytearray.");
return ByteBuffer.wrap(((DataByteArray) pigData).get());
case // Pig Map -> Avro Map
DataType.MAP:
if (LOG.isTraceEnabled())
LOG.trace(" Writing map.");
@SuppressWarnings("unchecked") Map<String, Object> pigMap = (Map<String, Object>) pigData;
Map<String, Object> goraMap = new HashMap<String, Object>(pigMap.size());
if (pigField.getSchema() == null) {
throw new IOException("The map being written does not have schema.");
}
for (Entry<String, Object> pigEntry : pigMap.entrySet()) {
goraMap.put(pigEntry.getKey(), this.writeField(avroSchema.getValueType(), pigField.getSchema().getFields()[0], pigEntry.getValue()));
}
return goraMap;
case // Pig Bag -> Avro Array
DataType.BAG:
if (LOG.isTraceEnabled())
LOG.trace(" Writing bag.");
Array<Object> persistentArray = new Array<Object>((int) ((DataBag) pigData).size(), avroSchema);
for (Object pigArrayElement : (DataBag) pigData) {
if (avroSchema.getElementType().getType() == Type.RECORD) {
// If element type is record, the mapping Persistent->PigType deletes one nested tuple:
// We want the map as: map((a1,a2,a3), (b1,b2,b3),...) instead of map(((a1,a2,a3)), ((b1,b2,b3)), ...)
persistentArray.add(this.writeField(avroSchema.getElementType(), pigField.getSchema().getFields()[0], pigArrayElement));
} else {
// Every bag has a tuple as element type. Since this is not a record, that "tuple" container must be ignored
persistentArray.add(this.writeField(avroSchema.getElementType(), pigField.getSchema().getFields()[0].getSchema().getFields()[0], ((Tuple) pigArrayElement).get(0)));
}
}
return persistentArray;
case // Pig Tuple -> Avro Record
DataType.TUPLE:
if (LOG.isTraceEnabled())
LOG.trace(" Writing tuple.");
try {
PersistentBase persistentRecord = (PersistentBase) ClassLoadingUtils.loadClass(avroSchema.getFullName()).newInstance();
ResourceFieldSchema[] tupleFieldSchemas = pigField.getSchema().getFields();
for (int i = 0; i < tupleFieldSchemas.length; i++) {
persistentRecord.put(tupleFieldSchemas[i].getName(), this.writeField(avroSchema.getField(tupleFieldSchemas[i].getName()).schema(), tupleFieldSchemas[i], ((Tuple) pigData).get(i)));
}
return persistentRecord;
} catch (InstantiationException e) {
throw new IOException(e);
} catch (IllegalAccessException e) {
throw new IOException(e);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
default:
throw new IOException("Unexpected field " + pigField.getName() + " with Pig type " + DataType.genTypeToNameMap().get(pigField.getType()));
}
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class GoraStorage method prepareToWrite.
@Override
@SuppressWarnings({ "rawtypes", "unchecked" })
public void prepareToWrite(RecordWriter writer) throws IOException {
// Get the schema of data to write from UDFContext (coming from frontend checkSchema())
String strSchema = this.getUDFProperties().getProperty(GoraStorage.GORA_STORE_PIG_SCHEMA);
this.writer = (GoraRecordWriter<?, ? extends PersistentBase>) writer;
// Parse de the schema from string stored in properties object
this.writeResourceSchema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
this.writeResourceFieldSchemaMap = new HashMap<String, ResourceFieldSchemaWithIndex>();
int index = 0;
for (ResourceFieldSchema fieldSchema : this.writeResourceSchema.getFields()) {
this.writeResourceFieldSchemaMap.put(fieldSchema.getName(), new ResourceFieldSchemaWithIndex(fieldSchema, index++));
}
this.pigFieldKeyIndex = this.writeResourceFieldSchemaMap.get("key").getIndex();
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class GoraDeleteStorage method checkSchema.
@Override
public /**
* Checks that the Pig schema has at least the field "key" with schema chararray.
*
* Sets UDFContext property GORA_STORE_SCHEMA with the schema to send it to the backend.
*/
void checkSchema(ResourceSchema pigSchema) throws IOException {
List<String> pigFieldSchemasNames = new ArrayList<String>(Arrays.asList(pigSchema.fieldNames()));
if (!pigFieldSchemasNames.contains("key")) {
throw new IOException("Expected a field called \"key\" but not found.");
}
for (ResourceFieldSchema fieldSchema : pigSchema.getFields()) {
if (fieldSchema.getName().equals("key") && fieldSchema.getType() != DataType.CHARARRAY) {
throw new IOException("Expected field \"key\" with schema chararray, but found schema " + DataType.findTypeName(fieldSchema.getType()) + ".");
}
}
// Save the schema to UDFContext to use it on backend when writing data
this.getUDFProperties().setProperty(GoraStorage.GORA_STORE_PIG_SCHEMA, pigSchema.toString());
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project gora by apache.
the class SchemaUtils method generatePigSchema.
/**
* Generates a Pig Schema from a Persistent's Avro Schema
*
* @param persistentSchema - The Persistent's Avro Schema to generate a Pig Schema.
* @param queryFields - Declared query fields in the Storage WITHOUT 'key' field.
* @param keyClass - Key class of the Persistent.
* @return
* @throws IOException
*/
public static ResourceSchema generatePigSchema(Schema persistentSchema, List<String> queryFields, Class<?> keyClass) throws IOException {
ResourceFieldSchema[] resourceFieldSchemas = null;
// We count 'key' field here
int numFields = queryFields.size() + 1;
resourceFieldSchemas = new ResourceFieldSchema[numFields];
resourceFieldSchemas[0] = new ResourceFieldSchema().setType(DataType.findType(keyClass)).setName("key");
int fieldIndex = 1;
for (String fieldName : queryFields) {
// Initialize the recursive schema checker in each field
recursiveRecordSchema.clear();
Field field = persistentSchema.getField(fieldName);
if (field == null) {
throw new IOException("Field \"" + fieldName + "\" not found in the entity " + persistentSchema.getFullName());
}
resourceFieldSchemas[fieldIndex++] = avro2ResouceFieldSchema(field.schema()).setName(field.name());
}
ResourceSchema resourceSchema = new ResourceSchema().setFields(resourceFieldSchemas);
return resourceSchema;
}
Aggregations