use of org.apache.pig.ResourceSchema in project hive by apache.
the class PigHCatUtil method getResourceSchema.
public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {
List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
for (HCatFieldSchema hfs : hcatSchema.getFields()) {
ResourceFieldSchema rfSchema;
rfSchema = getResourceSchemaFromFieldSchema(hfs);
rfSchemaList.add(rfSchema);
}
ResourceSchema rSchema = new ResourceSchema();
rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[rfSchemaList.size()]));
return rSchema;
}
use of org.apache.pig.ResourceSchema in project hive by apache.
the class PigHCatUtil method getBagSubSchema.
protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
// there are two cases - array<Type> and array<struct<...>>
// in either case the element type of the array is represented in a
// tuple field schema in the bag's field schema - the second case (struct)
// more naturally translates to the tuple - in the first case (array<Type>)
// we simulate the tuple by putting the single field in a tuple
Properties props = UDFContext.getUDFContext().getClientSystemProps();
String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
}
String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
}
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
if (arrayElementFieldSchema.getType() == Type.STRUCT) {
bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
} else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
ResourceSchema s = new ResourceSchema();
List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
bagSubFieldSchemas[0].setSchema(s);
} else {
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
null);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
}
return new ResourceSchema().setFields(bagSubFieldSchemas);
}
use of org.apache.pig.ResourceSchema in project hive by apache.
the class TestPigHCatUtil method testGetBagSubSchemaConfigured.
@Test
public void testGetBagSubSchemaConfigured() throws Exception {
// NOTE: pig-0.8 sets client system properties by actually getting the client
// system properties. Starting in pig-0.9 you must pass the properties in.
// When updating our pig dependency this will need updated.
System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
// Define the expected schema.
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
// Get the actual converted schema.
HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
Assert.assertEquals(expected.toString(), actual.toString());
// Clean up System properties that were set by this test
System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class MongoUpdateStorage method prepareToWrite.
@Override
public void prepareToWrite(final RecordWriter writer) throws IOException {
//noinspection unchecked
recordWriter = (MongoRecordWriter<?, MongoUpdateWritable>) writer;
LOG.info("Preparing to write to " + recordWriter);
if (recordWriter == null) {
throw new IOException("Invalid Record Writer");
}
UDFContext context = UDFContext.getUDFContext();
Properties p = context.getUDFProperties(getClass(), new String[] { signature });
/*
* In determining the schema to use, the user-defined schema should take
* precedence over the "inferred" schema
*/
if (schemaStr != null) {
try {
schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
} else {
String s = p.getProperty(SCHEMA_SIGNATURE);
if (s == null) {
throw new IOException("Could not find schema in UDF context. You'd have to explicitly specify a Schema.");
}
schema = new ResourceSchema(Utils.getSchemaFromString(s));
}
}
use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.
the class BSONStorageTest method testNullMap.
@Test
public void testNullMap() throws Exception {
ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString("m:map[]"));
assertNull(BSONStorage.getTypeForBSON(null, schema.getFields()[0], null));
}
Aggregations