use of org.apache.pig.ResourceSchema in project hive by apache.
the class PigHCatUtil method getBagSubSchema.
protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
// there are two cases - array<Type> and array<struct<...>>
// in either case the element type of the array is represented in a
// tuple field schema in the bag's field schema - the second case (struct)
// more naturally translates to the tuple - in the first case (array<Type>)
// we simulate the tuple by putting the single field in a tuple
Properties props = UDFContext.getUDFContext().getClientSystemProps();
String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
}
String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
}
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
if (arrayElementFieldSchema.getType() == Type.STRUCT) {
bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
} else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
ResourceSchema s = new ResourceSchema();
List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
bagSubFieldSchemas[0].setSchema(s);
} else {
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
null);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
}
return new ResourceSchema().setFields(bagSubFieldSchemas);
}
use of org.apache.pig.ResourceSchema in project hive by apache.
the class TestPigHCatUtil method testGetBagSubSchemaConfigured.
@Test
public void testGetBagSubSchemaConfigured() throws Exception {
// NOTE: pig-0.8 sets client system properties by actually getting the client
// system properties. Starting in pig-0.9 you must pass the properties in.
// When updating our pig dependency this will need updated.
System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
// Define the expected schema.
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
// Get the actual converted schema.
HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
Assert.assertEquals(expected.toString(), actual.toString());
// Clean up System properties that were set by this test
System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
use of org.apache.pig.ResourceSchema in project gora by apache.
the class SchemaUtils method avro2ResouceFieldSchema.
private static ResourceFieldSchema avro2ResouceFieldSchema(Schema schema) throws IOException {
Type schemaType = schema.getType();
switch(schemaType) {
case NULL:
return new ResourceFieldSchema().setType(DataType.NULL);
case BOOLEAN:
return new ResourceFieldSchema().setType(DataType.BOOLEAN);
case ENUM:
return new ResourceFieldSchema().setType(DataType.INTEGER);
case BYTES:
return new ResourceFieldSchema().setType(DataType.BYTEARRAY);
case STRING:
return new ResourceFieldSchema().setType(DataType.CHARARRAY);
case FLOAT:
return new ResourceFieldSchema().setType(DataType.FLOAT);
case DOUBLE:
return new ResourceFieldSchema().setType(DataType.DOUBLE);
case INT:
return new ResourceFieldSchema().setType(DataType.INTEGER);
case LONG:
return new ResourceFieldSchema().setType(DataType.LONG);
case UNION:
// Returns the first not-null type
if (schema.getTypes().size() != 2) {
LOG.warn("Field UNION {} must be ['null','othertype']. Maybe wrong definition?");
}
for (Schema s : schema.getTypes()) {
if (s.getType() != Type.NULL)
return avro2ResouceFieldSchema(s);
}
LOG.error("Union with only ['null']?");
throw new RuntimeException("Union with only ['null']?");
case RECORD:
// A record in Gora is a Tuple in Pig
if (recursiveRecordSchema.incSchema(schema.getName()) > 1) {
// Recursivity detected (and we are 2 levels bellow desired)
// So we can put the esquema of bother leafs
recursiveRecordSchema.decSchema(schema.getName());
// Return a tuple schema with no fields
return new ResourceFieldSchema().setType(DataType.TUPLE);
}
int numRecordFields = schema.getFields().size();
Iterator<Field> recordFields = schema.getFields().iterator();
ResourceFieldSchema returnRecordResourceFieldSchema = new ResourceFieldSchema().setType(DataType.TUPLE);
ResourceFieldSchema[] recordFieldSchemas = new ResourceFieldSchema[numRecordFields];
for (int fieldIndex = 0; recordFields.hasNext(); fieldIndex++) {
Field schemaField = recordFields.next();
recordFieldSchemas[fieldIndex] = avro2ResouceFieldSchema(schemaField.schema()).setName(schemaField.name());
}
returnRecordResourceFieldSchema.setSchema(new ResourceSchema().setFields(recordFieldSchemas));
return returnRecordResourceFieldSchema;
case ARRAY:
// An array in Gora is a Bag in Pig
// Maybe should be a Map with string(numeric) index to ensure order, but Avro and Pig data model are different :\
ResourceFieldSchema returnArrayResourceFieldSchema = new ResourceFieldSchema().setType(DataType.BAG);
Schema arrayElementType = schema.getElementType();
returnArrayResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { new ResourceFieldSchema().setType(DataType.TUPLE).setName("t").setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(arrayElementType) })) }));
return returnArrayResourceFieldSchema;
case MAP:
// A map in Gora is a Map in Pig, but in pig is only chararray=>something
ResourceFieldSchema returnMapResourceFieldSchema = new ResourceFieldSchema().setType(DataType.MAP);
Schema mapValueType = schema.getValueType();
returnMapResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(mapValueType) }));
return returnMapResourceFieldSchema;
case FIXED:
// TODO Implement FIXED data type
throw new RuntimeException("Fixed type not implemented");
default:
throw new RuntimeException("Unexpected schema type " + schemaType);
}
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class JSONPigReplace method substitute.
/*
* Returns result of substituting pig objects in Tuple t into
* initStr
*
* @param Tuple t : Pig tuple containing pig objects
* @param Object s : Schema representing Tuple t
* @param String un : String to represent un-named Schema Fields
*
* @return Array of BasicBSONObjects that contain all replacements for "marked" strings
*/
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
unnamedStr = un;
final ResourceFieldSchema[] fields;
try {
final ResourceSchema schema;
if (s instanceof String) {
schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
} else if (s instanceof Schema) {
schema = new ResourceSchema((Schema) s);
} else if (s instanceof ResourceSchema) {
schema = (ResourceSchema) s;
} else {
throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
}
fields = schema.getFields();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Schema Format", e);
}
// Make Tuple t into BSONObject using schema provided and store result in pObj
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], t.get(i));
}
// BSONObject that represents Pig Tuple input using Pig Schema
BasicBSONObject pObj = (BasicBSONObject) builder.get();
// fill map of replacement strings to corresponding objects to replace these strings with
fillReplacementMap(pObj);
// Now, replace replacement strings (of form $elem) with corresponding objects in pObj
return replaceAll(initBSONs, reps);
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class MongoInsertStorage method prepareToWrite.
public void prepareToWrite(final RecordWriter writer) throws IOException {
out = writer;
if (out == null) {
throw new IOException("Invalid Record Writer");
}
UDFContext udfc = UDFContext.getUDFContext();
Properties p = udfc.getUDFProperties(getClass(), new String[] { udfcSignature });
String strSchema = p.getProperty(SCHEMA_SIGNATURE);
if (strSchema == null) {
LOG.warn("Could not find schema in UDF context. Interpreting each tuple as containing a single map.");
} else {
try {
// Parse the schema from the string stored in the properties object.
schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
} catch (Exception e) {
schema = null;
LOG.warn(e.getMessage());
}
if (LOG.isDebugEnabled()) {
LOG.debug("GOT A SCHEMA " + schema + " " + strSchema);
}
}
}
Aggregations