Search in sources :

Example 6 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class MongoLoader method getNext.

@Override
public Tuple getNext() throws IOException {
    BSONObject val;
    try {
        if (!in.nextKeyValue()) {
            return null;
        }
        val = (BSONObject) in.getCurrentValue();
    } catch (Exception ie) {
        throw new IOException(ie);
    }
    Tuple t;
    if (fields == null) {
        // dynamic schema mode - just output a tuple with a single element,
        // which is a map storing the keys/values in the document
        // Since there is no schema, no projection can be made, and
        // there's no need to worry about retrieving projected fields.
        t = tupleFactory.newTuple(1);
        t.set(0, BSONLoader.convertBSONtoPigType(val));
    } else {
        // A schema was provided. Try to retrieve the projection.
        int tupleSize;
        if (projectedFields != null) {
            tupleSize = projectedFields.size();
        } else {
            tupleSize = fields.length;
        }
        t = tupleFactory.newTuple(tupleSize);
        for (int i = 0; i < t.size(); i++) {
            String fieldTemp;
            ResourceFieldSchema fieldSchema;
            if (null == projectedFields) {
                fieldTemp = fields[i].getName();
                fieldSchema = fields[i];
                if (idAlias != null && idAlias.equals(fieldTemp)) {
                    fieldTemp = "_id";
                }
            } else {
                fieldTemp = projectedFields.get(i);
                // Use id alias in order to retrieve type info.
                if (idAlias != null && "_id".equals(fieldTemp)) {
                    fieldSchema = schemaMapping.get(idAlias);
                } else {
                    fieldSchema = schemaMapping.get(fieldTemp);
                }
            }
            t.set(i, BSONLoader.readField(val.get(fieldTemp), fieldSchema));
        }
    }
    return t;
}
Also used : ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject) IOException(java.io.IOException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) IOException(java.io.IOException) Tuple(org.apache.pig.data.Tuple)

Example 7 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class MongoStorage method putNext.

public void putNext(final Tuple tuple) throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("writing " + tuple.toString());
    }
    final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
    ResourceFieldSchema[] fields = this.schema.getFields();
    for (int i = 0; i < fields.length; i++) {
        writeField(builder, fields[i], tuple.get(i));
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("writing out:" + builder.get().toString());
    }
    //noinspection unchecked
    recordWriter.write(null, builder.get());
}
Also used : BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 8 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class BSONStorage method putNext.

@Override
public void putNext(final Tuple tuple) throws IOException {
    try {
        final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
        ResourceFieldSchema[] fields = null;
        if (schema != null) {
            fields = schema.getFields();
        }
        if (fields != null) {
            for (int i = 0; i < fields.length; i++) {
                writeField(builder, fields[i], tuple.get(i));
            }
        } else {
            for (int i = 0; i < tuple.size(); i++) {
                writeField(builder, null, tuple.get(i));
            }
        }
        out.write(null, builder.get());
    } catch (Exception e) {
        throw new IOException("Couldn't convert tuple to bson: ", e);
    }
}
Also used : BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) IOException(java.io.IOException) IOException(java.io.IOException)

Example 9 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class BSONStorage method getTypeForBSON.

/**
     * Returns object more suited for BSON storage. Object o corresponds to a field value in pig.
     *
     * @param o        object representing pig type to convert to BSON-like object
     * @param field    field to place o in
     * @param toIgnore name of field in Object o to ignore
     * @return an Object that can be stored as BSON.
     * @throws IOException if no schema is available from the field
     */
public static Object getTypeForBSON(final Object o, final ResourceFieldSchema field, final String toIgnore) throws IOException {
    byte dataType;
    ResourceSchema fieldInnerSchema = null;
    if (null == o) {
        return null;
    }
    if (null == field || DataType.UNKNOWN == field.getType()) {
        dataType = DataType.findType(o);
    } else {
        dataType = field.getType();
        fieldInnerSchema = field.getSchema();
    }
    if (dataType == DataType.BYTEARRAY && o instanceof Map) {
        dataType = DataType.MAP;
    }
    switch(dataType) {
        case DataType.NULL:
            return null;
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            return o;
        case DataType.BYTEARRAY:
            if (o instanceof PigBoxedBSONValue) {
                return ((PigBoxedBSONValue) o).getObject();
            }
            return o.toString();
        case DataType.CHARARRAY:
            return o;
        case DataType.DATETIME:
            return ((DateTime) o).toDate();
        //Given a TUPLE, create a Map so BSONEncoder will eat it
        case DataType.TUPLE:
            // BasicBSONEncoder will consume it as an Iterable.
            if (fieldInnerSchema == null) {
                return o;
            }
            // If there was an inner schema, create a Map from the Tuple.
            ResourceFieldSchema[] fs = fieldInnerSchema.getFields();
            // a bag should be ignored
            if (1 == fs.length && fs[0].getName().equals(toIgnore)) {
                return getTypeForBSON(((Tuple) o).get(0), fs[0], toIgnore);
            }
            // If there is more than one field in the tuple or no fields
            // to ignore, treat the Tuple as a Map.
            Map<String, Object> m = new LinkedHashMap<String, Object>();
            for (int j = 0; j < fs.length; j++) {
                m.put(fs[j].getName(), getTypeForBSON(((Tuple) o).get(j), fs[j], toIgnore));
            }
            return m;
        // Given a BAG, create an Array so BSONEncoder will eat it.
        case DataType.BAG:
            // BasicBSONEncoder will consume it as an Iterable.
            if (null == fieldInnerSchema) {
                return o;
            }
            fs = fieldInnerSchema.getFields();
            ArrayList<Object> bagList = new ArrayList<Object>();
            for (Tuple t : (DataBag) o) {
                bagList.add(getTypeForBSON(t, fs[0], toIgnore));
            }
            return bagList;
        case DataType.MAP:
            if (o == null) {
                return null;
            }
            Map map = (Map) o;
            Map<String, Object> out = new HashMap<String, Object>(map.size());
            for (Object key : map.keySet()) {
                out.put(key.toString(), getTypeForBSON(map.get(key), null, toIgnore));
            }
            return out;
        default:
            return o;
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) DataBag(org.apache.pig.data.DataBag) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) PigBoxedBSONValue(com.mongodb.hadoop.pig.udf.types.PigBoxedBSONValue) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Example 10 with ResourceFieldSchema

use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.

the class MongoStorage method writeField.

protected void writeField(final BasicDBObjectBuilder builder, final ResourceSchema.ResourceFieldSchema field, final Object d) throws IOException {
    // If the field is missing or the value is null, write a null
    if (d == null) {
        builder.add(field.getName(), null);
        return;
    }
    ResourceSchema s = field.getSchema();
    // Based on the field's type, write it out
    byte i = field.getType();
    if (i == DataType.INTEGER) {
        builder.add(field.getName(), d);
    } else if (i == DataType.LONG) {
        builder.add(field.getName(), d);
    } else if (i == DataType.FLOAT) {
        builder.add(field.getName(), d);
    } else if (i == DataType.DOUBLE) {
        builder.add(field.getName(), d);
    } else if (i == DataType.BYTEARRAY) {
        builder.add(field.getName(), d.toString());
    } else if (i == DataType.CHARARRAY) {
        builder.add(field.getName(), d);
    } else if (i == DataType.TUPLE) {
        // Given a TUPLE, create a Map so BSONEncoder will eat it
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        ResourceFieldSchema[] fs = s.getFields();
        Map<String, Object> m = new LinkedHashMap<String, Object>();
        for (int j = 0; j < fs.length; j++) {
            m.put(fs[j].getName(), ((Tuple) d).get(j));
        }
        builder.add(field.getName(), (Map) m);
    } else if (i == DataType.BAG) {
        // Given a BAG, create an Array so BSONEncoder will eat it.
        ResourceFieldSchema[] fs;
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        fs = s.getFields();
        if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
            throw new IOException("Found a bag without a tuple " + "inside!");
        }
        // Drill down the next level to the tuple's schema.
        s = fs[0].getSchema();
        if (s == null) {
            throw new IOException("Schemas must be fully specified to use this storage function.  No schema found for field " + field.getName());
        }
        fs = s.getFields();
        List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
        for (Tuple t : (DataBag) d) {
            Map<String, Object> ma = new LinkedHashMap<String, Object>();
            for (int j = 0; j < fs.length; j++) {
                ma.put(fs[j].getName(), t.get(j));
            }
            a.add(ma);
        }
        builder.add(field.getName(), a);
    } else if (i == DataType.MAP) {
        Map map = (Map) d;
        for (Object key : map.keySet()) {
            builder.add(key.toString(), map.get(key));
        }
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Aggregations

ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)20 ResourceSchema (org.apache.pig.ResourceSchema)11 IOException (java.io.IOException)10 Map (java.util.Map)6 Tuple (org.apache.pig.data.Tuple)6 ArrayList (java.util.ArrayList)5 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)4 SQLException (java.sql.SQLException)4 BasicBSONObject (org.bson.BasicBSONObject)4 Test (org.junit.Test)4 HashMap (java.util.HashMap)3 List (java.util.List)3 BSONObject (org.bson.BSONObject)3 DateTime (org.joda.time.DateTime)3 LinkedHashMap (java.util.LinkedHashMap)2 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)2 PhoenixRecordWritable (org.apache.phoenix.mapreduce.PhoenixRecordWritable)2 PDataType (org.apache.phoenix.schema.types.PDataType)2 PhoenixArray (org.apache.phoenix.schema.types.PhoenixArray)2