Search in sources :

Example 11 with ResourceSchema

use of org.apache.pig.ResourceSchema in project hive by apache.

the class PigHCatUtil method getResourceSchema.

public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {
    List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
    for (HCatFieldSchema hfs : hcatSchema.getFields()) {
        ResourceFieldSchema rfSchema;
        rfSchema = getResourceSchemaFromFieldSchema(hfs);
        rfSchemaList.add(rfSchema);
    }
    ResourceSchema rSchema = new ResourceSchema();
    rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[rfSchemaList.size()]));
    return rSchema;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 12 with ResourceSchema

use of org.apache.pig.ResourceSchema in project hive by apache.

the class PigHCatUtil method getBagSubSchema.

protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
    // there are two cases - array<Type> and array<struct<...>>
    // in either case the element type of the array is represented in a
    // tuple field schema in the bag's field schema - the second case (struct)
    // more naturally translates to the tuple - in the first case (array<Type>)
    // we simulate the tuple by putting the single field in a tuple
    Properties props = UDFContext.getUDFContext().getClientSystemProps();
    String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
        innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
        innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
        bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
        ResourceSchema s = new ResourceSchema();
        List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
        s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
        bagSubFieldSchemas[0].setSchema(s);
    } else {
        ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
        innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
        null);
        bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
    return new ResourceSchema().setFields(bagSubFieldSchemas);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) Properties(java.util.Properties) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 13 with ResourceSchema

use of org.apache.pig.ResourceSchema in project hive by apache.

the class TestPigHCatUtil method testGetBagSubSchemaConfigured.

@Test
public void testGetBagSubSchemaConfigured() throws Exception {
    // NOTE: pig-0.8 sets client system properties by actually getting the client
    // system properties. Starting in pig-0.9 you must pass the properties in.
    // When updating our pig dependency this will need updated.
    System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
    System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
    UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
    // Define the expected schema.
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
    innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
    // Get the actual converted schema.
    HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
    Assert.assertEquals(expected.toString(), actual.toString());
    // Clean up System properties that were set by this test
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 14 with ResourceSchema

use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.

the class MongoUpdateStorage method prepareToWrite.

@Override
public void prepareToWrite(final RecordWriter writer) throws IOException {
    //noinspection unchecked
    recordWriter = (MongoRecordWriter<?, MongoUpdateWritable>) writer;
    LOG.info("Preparing to write to " + recordWriter);
    if (recordWriter == null) {
        throw new IOException("Invalid Record Writer");
    }
    UDFContext context = UDFContext.getUDFContext();
    Properties p = context.getUDFProperties(getClass(), new String[] { signature });
    /*
         * In determining the schema to use, the user-defined schema should take
         * precedence over the "inferred" schema
         */
    if (schemaStr != null) {
        try {
            schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
        }
    } else {
        String s = p.getProperty(SCHEMA_SIGNATURE);
        if (s == null) {
            throw new IOException("Could not find schema in UDF context. You'd have to explicitly specify a Schema.");
        }
        schema = new ResourceSchema(Utils.getSchemaFromString(s));
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) UDFContext(org.apache.pig.impl.util.UDFContext) IOException(java.io.IOException) Properties(java.util.Properties) IOException(java.io.IOException)

Example 15 with ResourceSchema

use of org.apache.pig.ResourceSchema in project mongo-hadoop by mongodb.

the class BSONStorageTest method testNullMap.

@Test
public void testNullMap() throws Exception {
    ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString("m:map[]"));
    assertNull(BSONStorage.getTypeForBSON(null, schema.getFields()[0], null));
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) Test(org.junit.Test)

Aggregations

ResourceSchema (org.apache.pig.ResourceSchema)18 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)11 IOException (java.io.IOException)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 ArrayList (java.util.ArrayList)5 Properties (java.util.Properties)5 Test (org.junit.Test)5 UDFContext (org.apache.pig.impl.util.UDFContext)4 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 Tuple (org.apache.pig.data.Tuple)3 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)2 SQLException (java.sql.SQLException)2 LinkedHashMap (java.util.LinkedHashMap)2 DataBag (org.apache.pig.data.DataBag)2 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)2 BasicBSONObject (org.bson.BasicBSONObject)2 DateTime (org.joda.time.DateTime)2