Search in sources :

Example 41 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class JSONPigReplace method substitute.

/*
     * Returns result of substituting pig objects in Tuple t into
     * initStr
     * 
     * @param Tuple t : Pig tuple containing pig objects
     * @param Object s : Schema representing Tuple t
     * @param String un : String to represent un-named Schema Fields 
     * 
     * @return Array of BasicBSONObjects that contain all replacements for "marked" strings
     */
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
    unnamedStr = un;
    final ResourceFieldSchema[] fields;
    try {
        final ResourceSchema schema;
        if (s instanceof String) {
            schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
        } else if (s instanceof Schema) {
            schema = new ResourceSchema((Schema) s);
        } else if (s instanceof ResourceSchema) {
            schema = (ResourceSchema) s;
        } else {
            throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
        }
        fields = schema.getFields();
    } catch (Exception e) {
        throw new IllegalArgumentException("Invalid Schema Format", e);
    }
    // Make Tuple t into BSONObject using schema provided and store result in pObj
    final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
    for (int i = 0; i < fields.length; i++) {
        writeField(builder, fields[i], t.get(i));
    }
    // BSONObject that represents Pig Tuple input using Pig Schema
    BasicBSONObject pObj = (BasicBSONObject) builder.get();
    // fill map of replacement strings to corresponding objects to replace these strings with
    fillReplacementMap(pObj);
    // Now, replace replacement strings (of form $elem) with corresponding objects in pObj
    return replaceAll(initBSONs, reps);
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) ResourceSchema(org.apache.pig.ResourceSchema) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) IOException(java.io.IOException)

Example 42 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project eiger by wlloyd.

the class CassandraStorage method getSchema.

public ResourceSchema getSchema(String location, Job job) throws IOException {
    setLocation(location, job);
    CfDef cfDef = getCfDef(loadSignature);
    if (cfDef.column_type.equals("Super"))
        return null;
    // top-level schema, no type
    ResourceSchema schema = new ResourceSchema();
    // get default marshallers and validators
    List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
    Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
    // add key
    ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
    keyFieldSchema.setName("key");
    keyFieldSchema.setType(getPigType(marshallers.get(2)));
    // will become the bag of tuples
    ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema();
    bagFieldSchema.setName("columns");
    bagFieldSchema.setType(DataType.BAG);
    ResourceSchema bagSchema = new ResourceSchema();
    List<ResourceFieldSchema> tupleFields = new ArrayList<ResourceFieldSchema>();
    // default comparator/validator
    ResourceSchema innerTupleSchema = new ResourceSchema();
    ResourceFieldSchema tupleField = new ResourceFieldSchema();
    tupleField.setType(DataType.TUPLE);
    tupleField.setSchema(innerTupleSchema);
    ResourceFieldSchema colSchema = new ResourceFieldSchema();
    colSchema.setName("name");
    colSchema.setType(getPigType(marshallers.get(0)));
    tupleFields.add(colSchema);
    ResourceFieldSchema valSchema = new ResourceFieldSchema();
    AbstractType validator = marshallers.get(1);
    valSchema.setName("value");
    valSchema.setType(getPigType(validator));
    tupleFields.add(valSchema);
    // defined validators/indexes
    for (ColumnDef cdef : cfDef.column_metadata) {
        colSchema = new ResourceFieldSchema();
        colSchema.setName(new String(cdef.getName()));
        colSchema.setType(getPigType(marshallers.get(0)));
        tupleFields.add(colSchema);
        valSchema = new ResourceFieldSchema();
        validator = validators.get(cdef.getName());
        if (validator == null)
            validator = marshallers.get(1);
        valSchema.setName("value");
        valSchema.setType(getPigType(validator));
        tupleFields.add(valSchema);
    }
    innerTupleSchema.setFields(tupleFields.toArray(new ResourceFieldSchema[tupleFields.size()]));
    // a bag can contain only one tuple, but that tuple can contain anything
    bagSchema.setFields(new ResourceFieldSchema[] { tupleField });
    bagFieldSchema.setSchema(bagSchema);
    // top level schema contains everything
    schema.setFields(new ResourceFieldSchema[] { keyFieldSchema, bagFieldSchema });
    return schema;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ByteBuffer(java.nio.ByteBuffer)

Aggregations

ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)42 ResourceSchema (org.apache.pig.ResourceSchema)22 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 Tuple (org.apache.pig.data.Tuple)7 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)4 SQLException (java.sql.SQLException)4 HashMap (java.util.HashMap)4 Field (org.apache.avro.Schema.Field)4 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)4 BasicBSONObject (org.bson.BasicBSONObject)4 Test (org.junit.Test)4 List (java.util.List)3 DataBag (org.apache.pig.data.DataBag)3 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)3 BSONObject (org.bson.BSONObject)3 DateTime (org.joda.time.DateTime)3 LinkedHashMap (java.util.LinkedHashMap)2