use of org.apache.pig.data.NonSpillableDataBag in project elephant-bird by twitter.
the class Fixtures method buildPersonTuple.
public static Tuple buildPersonTuple() throws ExecException {
DataBag phoneBag = new NonSpillableDataBag(Lists.newArrayList(makePhoneNumberTuple("415-999-9999", null), makePhoneNumberTuple("415-666-6666", "MOBILE"), makePhoneNumberTuple("415-333-3333", "WORK")));
Tuple entryTuple = tf_.newTuple(4);
entryTuple.set(0, "Elephant Bird");
entryTuple.set(1, 123);
entryTuple.set(2, "elephant@bird.com");
entryTuple.set(3, phoneBag);
return entryTuple;
}
use of org.apache.pig.data.NonSpillableDataBag in project elephant-bird by twitter.
the class ProtobufToPig method messageToTuple.
/**
* Translate a nested message to a tuple. If the field is repeated, it walks the list and adds each to a bag.
* Otherwise, it just adds the given one.
* @param fieldDescriptor the descriptor object for the given field.
* @param fieldValue the object representing the value of this field, possibly null.
* @return the object representing fieldValue in Pig -- either a bag or a tuple.
*/
@SuppressWarnings("unchecked")
protected Object messageToTuple(FieldDescriptor fieldDescriptor, Object fieldValue) {
if (fieldValue == null) {
// protobufs unofficially ensures values are not null. just in case:
return null;
}
assert fieldDescriptor.getType() == FieldDescriptor.Type.MESSAGE : "messageToTuple called with field of type " + fieldDescriptor.getType();
if (fieldDescriptor.isRepeated()) {
// The protobuf contract is that if the field is repeated, then the object returned is actually a List
// of the underlying datatype, which in this case is a nested message.
List<Message> messageList = (List<Message>) (fieldValue != null ? fieldValue : Lists.newArrayList());
DataBag bag = new NonSpillableDataBag(messageList.size());
for (Message m : messageList) {
bag.add(new ProtobufTuple(m));
}
return bag;
} else {
return new ProtobufTuple((Message) fieldValue);
}
}
use of org.apache.pig.data.NonSpillableDataBag in project parquet-mr by apache.
the class TupleConsumerPerfTest method tuple.
private static Tuple tuple() throws ExecException {
TupleFactory tf = TupleFactory.getInstance();
Tuple t = tf.newTuple(TOP_LEVEL_COLS);
for (int i = 0; i < TOP_LEVEL_COLS; i++) {
Tuple ti = tf.newTuple(10);
for (int j = 0; j < 4; j++) {
ti.set(j, "foo" + i + "," + j);
}
for (int k = 0; k < 4; k++) {
ti.set(4 + k, (long) k);
}
for (int l = 0; l < 2; l++) {
DataBag bag = new NonSpillableDataBag();
for (int m = 0; m < 10; m++) {
bag.add(tf.newTuple((Object) new Integer(m)));
}
ti.set(8 + l, bag);
}
t.set(i, ti);
}
return t;
}
use of org.apache.pig.data.NonSpillableDataBag in project elephant-bird by twitter.
the class ProtobufToPig method singleFieldToTuple.
/**
* Translate a single field to a tuple. If the field is repeated, it walks the list and adds each to a bag.
* Otherwise, it just adds the given one.
* @param fieldDescriptor the descriptor object for the given field.
* @param fieldValue the object representing the value of this field, possibly null.
* @return the object representing fieldValue in Pig -- either a bag or a single field.
* @throws ExecException if Pig decides to. Shouldn't happen because we won't walk off the end of a tuple's field set.
*/
@SuppressWarnings("unchecked")
protected Object singleFieldToTuple(FieldDescriptor fieldDescriptor, Object fieldValue) {
assert fieldDescriptor.getType() != FieldDescriptor.Type.MESSAGE : "messageToFieldSchema called with field of type " + fieldDescriptor.getType();
if (fieldDescriptor.isRepeated()) {
// The protobuf contract is that if the field is repeated, then the object returned is actually a List
// of the underlying datatype, which in this case is a "primitive" like int, float, String, etc.
// We have to make a single-item tuple out of it to put it in the bag.
List<Object> fieldValueList = (List<Object>) (fieldValue != null ? fieldValue : Collections.emptyList());
DataBag bag = new NonSpillableDataBag(fieldValueList.size());
for (Object singleFieldValue : fieldValueList) {
Object nonEnumFieldValue = coerceToPigTypes(fieldDescriptor, singleFieldValue);
Tuple innerTuple = tupleFactory_.newTuple(1);
try {
innerTuple.set(0, nonEnumFieldValue);
} catch (ExecException e) {
// not expected
throw new RuntimeException(e);
}
bag.add(innerTuple);
}
return bag;
} else {
return coerceToPigTypes(fieldDescriptor, fieldValue);
}
}
Aggregations