Search in sources :

Example 16 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project hive by apache.

the class PigHCatUtil method getBagSubSchema.

protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
    // there are two cases - array<Type> and array<struct<...>>
    // in either case the element type of the array is represented in a
    // tuple field schema in the bag's field schema - the second case (struct)
    // more naturally translates to the tuple - in the first case (array<Type>)
    // we simulate the tuple by putting the single field in a tuple
    Properties props = UDFContext.getUDFContext().getClientSystemProps();
    String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
        innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
        innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
        bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
        ResourceSchema s = new ResourceSchema();
        List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
        s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
        bagSubFieldSchemas[0].setSchema(s);
    } else {
        ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
        innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
        null);
        bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
    return new ResourceSchema().setFields(bagSubFieldSchemas);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) Properties(java.util.Properties) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 17 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project hive by apache.

the class TestPigHCatUtil method testGetBagSubSchemaConfigured.

@Test
public void testGetBagSubSchemaConfigured() throws Exception {
    // NOTE: pig-0.8 sets client system properties by actually getting the client
    // system properties. Starting in pig-0.9 you must pass the properties in.
    // When updating our pig dependency this will need updated.
    System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
    System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
    UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
    // Define the expected schema.
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
    innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
    // Get the actual converted schema.
    HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
    Assert.assertEquals(expected.toString(), actual.toString());
    // Clean up System properties that were set by this test
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 18 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class JSONPigReplace method substitute.

/*
     * Returns result of substituting pig objects in Tuple t into
     * initStr
     * 
     * @param Tuple t : Pig tuple containing pig objects
     * @param Object s : Schema representing Tuple t
     * @param String un : String to represent un-named Schema Fields 
     * 
     * @return Array of BasicBSONObjects that contain all replacements for "marked" strings
     */
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
    unnamedStr = un;
    final ResourceFieldSchema[] fields;
    try {
        final ResourceSchema schema;
        if (s instanceof String) {
            schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
        } else if (s instanceof Schema) {
            schema = new ResourceSchema((Schema) s);
        } else if (s instanceof ResourceSchema) {
            schema = (ResourceSchema) s;
        } else {
            throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
        }
        fields = schema.getFields();
    } catch (Exception e) {
        throw new IllegalArgumentException("Invalid Schema Format", e);
    }
    // Make Tuple t into BSONObject using schema provided and store result in pObj
    final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
    for (int i = 0; i < fields.length; i++) {
        writeField(builder, fields[i], t.get(i));
    }
    // BSONObject that represents Pig Tuple input using Pig Schema
    BasicBSONObject pObj = (BasicBSONObject) builder.get();
    // fill map of replacement strings to corresponding objects to replace these strings with
    fillReplacementMap(pObj);
    // Now, replace replacement strings (of form $elem) with corresponding objects in pObj      
    return replaceAll(initBSONs, reps);
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) ResourceSchema(org.apache.pig.ResourceSchema) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) IOException(java.io.IOException)

Example 19 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project eiger by wlloyd.

the class CassandraStorage method getSchema.

public ResourceSchema getSchema(String location, Job job) throws IOException {
    setLocation(location, job);
    CfDef cfDef = getCfDef(loadSignature);
    if (cfDef.column_type.equals("Super"))
        return null;
    // top-level schema, no type
    ResourceSchema schema = new ResourceSchema();
    // get default marshallers and validators
    List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
    Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
    // add key
    ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
    keyFieldSchema.setName("key");
    keyFieldSchema.setType(getPigType(marshallers.get(2)));
    // will become the bag of tuples
    ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema();
    bagFieldSchema.setName("columns");
    bagFieldSchema.setType(DataType.BAG);
    ResourceSchema bagSchema = new ResourceSchema();
    List<ResourceFieldSchema> tupleFields = new ArrayList<ResourceFieldSchema>();
    // default comparator/validator
    ResourceSchema innerTupleSchema = new ResourceSchema();
    ResourceFieldSchema tupleField = new ResourceFieldSchema();
    tupleField.setType(DataType.TUPLE);
    tupleField.setSchema(innerTupleSchema);
    ResourceFieldSchema colSchema = new ResourceFieldSchema();
    colSchema.setName("name");
    colSchema.setType(getPigType(marshallers.get(0)));
    tupleFields.add(colSchema);
    ResourceFieldSchema valSchema = new ResourceFieldSchema();
    AbstractType validator = marshallers.get(1);
    valSchema.setName("value");
    valSchema.setType(getPigType(validator));
    tupleFields.add(valSchema);
    // defined validators/indexes
    for (ColumnDef cdef : cfDef.column_metadata) {
        colSchema = new ResourceFieldSchema();
        colSchema.setName(new String(cdef.getName()));
        colSchema.setType(getPigType(marshallers.get(0)));
        tupleFields.add(colSchema);
        valSchema = new ResourceFieldSchema();
        validator = validators.get(cdef.getName());
        if (validator == null)
            validator = marshallers.get(1);
        valSchema.setName("value");
        valSchema.setType(getPigType(validator));
        tupleFields.add(valSchema);
    }
    innerTupleSchema.setFields(tupleFields.toArray(new ResourceFieldSchema[tupleFields.size()]));
    // a bag can contain only one tuple, but that tuple can contain anything
    bagSchema.setFields(new ResourceFieldSchema[] { tupleField });
    bagFieldSchema.setSchema(bagSchema);
    // top level schema contains everything
    schema.setFields(new ResourceFieldSchema[] { keyFieldSchema, bagFieldSchema });
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ByteBuffer(java.nio.ByteBuffer)

Example 20 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project phoenix by apache.

the class PhoenixPigSchemaUtil method getResourceSchema.

public static ResourceSchema getResourceSchema(final Configuration configuration, Dependencies dependencies) throws IOException {
    final ResourceSchema schema = new ResourceSchema();
    try {
        List<ColumnInfo> columns = null;
        final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration);
        if (SchemaType.QUERY.equals(schemaType)) {
            final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration);
            Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration");
            final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration);
            columns = function.apply(sqlQuery);
        } else {
            columns = dependencies.getSelectColumnMetadataList(configuration);
        }
        ResourceFieldSchema[] fields = new ResourceFieldSchema[columns.size()];
        int i = 0;
        for (ColumnInfo cinfo : columns) {
            int sqlType = cinfo.getSqlType();
            PDataType phoenixDataType = PDataType.fromTypeId(sqlType);
            byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType);
            ResourceFieldSchema field = new ResourceFieldSchema();
            field.setType(pigType).setName(cinfo.getDisplayName());
            fields[i++] = field;
        }
        schema.setFields(fields);
    } catch (SQLException sqle) {
        LOG.error(String.format("Error: SQLException [%s] ", sqle.getMessage()));
        throw new IOException(sqle);
    }
    return schema;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) SQLException(java.sql.SQLException) ColumnInfo(org.apache.phoenix.util.ColumnInfo) IOException(java.io.IOException) SchemaType(org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.SchemaType) PDataType(org.apache.phoenix.schema.types.PDataType) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Aggregations

ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)20 ResourceSchema (org.apache.pig.ResourceSchema)11 IOException (java.io.IOException)10 Map (java.util.Map)6 Tuple (org.apache.pig.data.Tuple)6 ArrayList (java.util.ArrayList)5 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)4 SQLException (java.sql.SQLException)4 BasicBSONObject (org.bson.BasicBSONObject)4 Test (org.junit.Test)4 HashMap (java.util.HashMap)3 List (java.util.List)3 BSONObject (org.bson.BSONObject)3 DateTime (org.joda.time.DateTime)3 LinkedHashMap (java.util.LinkedHashMap)2 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)2 PhoenixRecordWritable (org.apache.phoenix.mapreduce.PhoenixRecordWritable)2 PDataType (org.apache.phoenix.schema.types.PDataType)2 PhoenixArray (org.apache.phoenix.schema.types.PhoenixArray)2