use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class MongoLoader method getNext.
@Override
public Tuple getNext() throws IOException {
BSONObject val;
try {
if (!in.nextKeyValue()) {
return null;
}
val = (BSONObject) in.getCurrentValue();
} catch (Exception ie) {
throw new IOException(ie);
}
Tuple t;
if (fields == null) {
// dynamic schema mode - just output a tuple with a single element,
// which is a map storing the keys/values in the document
// Since there is no schema, no projection can be made, and
// there's no need to worry about retrieving projected fields.
t = tupleFactory.newTuple(1);
t.set(0, BSONLoader.convertBSONtoPigType(val));
} else {
// A schema was provided. Try to retrieve the projection.
int tupleSize;
if (projectedFields != null) {
tupleSize = projectedFields.size();
} else {
tupleSize = fields.length;
}
t = tupleFactory.newTuple(tupleSize);
for (int i = 0; i < t.size(); i++) {
String fieldTemp;
ResourceFieldSchema fieldSchema;
if (null == projectedFields) {
fieldTemp = fields[i].getName();
fieldSchema = fields[i];
if (idAlias != null && idAlias.equals(fieldTemp)) {
fieldTemp = "_id";
}
} else {
fieldTemp = projectedFields.get(i);
// Use id alias in order to retrieve type info.
if (idAlias != null && "_id".equals(fieldTemp)) {
fieldSchema = schemaMapping.get(idAlias);
} else {
fieldSchema = schemaMapping.get(fieldTemp);
}
}
t.set(i, BSONLoader.readField(val.get(fieldTemp), fieldSchema));
}
}
return t;
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class MongoStorage method putNext.
public void putNext(final Tuple tuple) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("writing " + tuple.toString());
}
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
ResourceFieldSchema[] fields = this.schema.getFields();
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], tuple.get(i));
}
if (LOG.isDebugEnabled()) {
LOG.debug("writing out:" + builder.get().toString());
}
//noinspection unchecked
recordWriter.write(null, builder.get());
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class BSONStorage method putNext.
@Override
public void putNext(final Tuple tuple) throws IOException {
try {
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
ResourceFieldSchema[] fields = null;
if (schema != null) {
fields = schema.getFields();
}
if (fields != null) {
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], tuple.get(i));
}
} else {
for (int i = 0; i < tuple.size(); i++) {
writeField(builder, null, tuple.get(i));
}
}
out.write(null, builder.get());
} catch (Exception e) {
throw new IOException("Couldn't convert tuple to bson: ", e);
}
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class BSONStorage method getTypeForBSON.
/**
* Returns object more suited for BSON storage. Object o corresponds to a field value in pig.
*
* @param o object representing pig type to convert to BSON-like object
* @param field field to place o in
* @param toIgnore name of field in Object o to ignore
* @return an Object that can be stored as BSON.
* @throws IOException if no schema is available from the field
*/
public static Object getTypeForBSON(final Object o, final ResourceFieldSchema field, final String toIgnore) throws IOException {
byte dataType;
ResourceSchema fieldInnerSchema = null;
if (null == o) {
return null;
}
if (null == field || DataType.UNKNOWN == field.getType()) {
dataType = DataType.findType(o);
} else {
dataType = field.getType();
fieldInnerSchema = field.getSchema();
}
if (dataType == DataType.BYTEARRAY && o instanceof Map) {
dataType = DataType.MAP;
}
switch(dataType) {
case DataType.NULL:
return null;
case DataType.INTEGER:
case DataType.LONG:
case DataType.FLOAT:
case DataType.DOUBLE:
return o;
case DataType.BYTEARRAY:
if (o instanceof PigBoxedBSONValue) {
return ((PigBoxedBSONValue) o).getObject();
}
return o.toString();
case DataType.CHARARRAY:
return o;
case DataType.DATETIME:
return ((DateTime) o).toDate();
//Given a TUPLE, create a Map so BSONEncoder will eat it
case DataType.TUPLE:
// BasicBSONEncoder will consume it as an Iterable.
if (fieldInnerSchema == null) {
return o;
}
// If there was an inner schema, create a Map from the Tuple.
ResourceFieldSchema[] fs = fieldInnerSchema.getFields();
// a bag should be ignored
if (1 == fs.length && fs[0].getName().equals(toIgnore)) {
return getTypeForBSON(((Tuple) o).get(0), fs[0], toIgnore);
}
// If there is more than one field in the tuple or no fields
// to ignore, treat the Tuple as a Map.
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (int j = 0; j < fs.length; j++) {
m.put(fs[j].getName(), getTypeForBSON(((Tuple) o).get(j), fs[j], toIgnore));
}
return m;
// Given a BAG, create an Array so BSONEncoder will eat it.
case DataType.BAG:
// BasicBSONEncoder will consume it as an Iterable.
if (null == fieldInnerSchema) {
return o;
}
fs = fieldInnerSchema.getFields();
ArrayList<Object> bagList = new ArrayList<Object>();
for (Tuple t : (DataBag) o) {
bagList.add(getTypeForBSON(t, fs[0], toIgnore));
}
return bagList;
case DataType.MAP:
if (o == null) {
return null;
}
Map map = (Map) o;
Map<String, Object> out = new HashMap<String, Object>(map.size());
for (Object key : map.keySet()) {
out.put(key.toString(), getTypeForBSON(map.get(key), null, toIgnore));
}
return out;
default:
return o;
}
}
use of org.apache.pig.ResourceSchema.ResourceFieldSchema in project mongo-hadoop by mongodb.
the class MongoStorage method writeField.
protected void writeField(final BasicDBObjectBuilder builder, final ResourceSchema.ResourceFieldSchema field, final Object d) throws IOException {
// If the field is missing or the value is null, write a null
if (d == null) {
builder.add(field.getName(), null);
return;
}
ResourceSchema s = field.getSchema();
// Based on the field's type, write it out
byte i = field.getType();
if (i == DataType.INTEGER) {
builder.add(field.getName(), d);
} else if (i == DataType.LONG) {
builder.add(field.getName(), d);
} else if (i == DataType.FLOAT) {
builder.add(field.getName(), d);
} else if (i == DataType.DOUBLE) {
builder.add(field.getName(), d);
} else if (i == DataType.BYTEARRAY) {
builder.add(field.getName(), d.toString());
} else if (i == DataType.CHARARRAY) {
builder.add(field.getName(), d);
} else if (i == DataType.TUPLE) {
// Given a TUPLE, create a Map so BSONEncoder will eat it
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
ResourceFieldSchema[] fs = s.getFields();
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (int j = 0; j < fs.length; j++) {
m.put(fs[j].getName(), ((Tuple) d).get(j));
}
builder.add(field.getName(), (Map) m);
} else if (i == DataType.BAG) {
// Given a BAG, create an Array so BSONEncoder will eat it.
ResourceFieldSchema[] fs;
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
fs = s.getFields();
if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
throw new IOException("Found a bag without a tuple " + "inside!");
}
// Drill down the next level to the tuple's schema.
s = fs[0].getSchema();
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field " + field.getName());
}
fs = s.getFields();
List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
for (Tuple t : (DataBag) d) {
Map<String, Object> ma = new LinkedHashMap<String, Object>();
for (int j = 0; j < fs.length; j++) {
ma.put(fs[j].getName(), t.get(j));
}
a.add(ma);
}
builder.add(field.getName(), a);
} else if (i == DataType.MAP) {
Map map = (Map) d;
for (Object key : map.keySet()) {
builder.add(key.toString(), map.get(key));
}
}
}
Aggregations