use of org.apache.pig.data.DataBag in project elephant-bird by twitter.
the class Fixtures method buildPersonTuple.
public static Tuple buildPersonTuple() throws ExecException {
DataBag phoneBag = new NonSpillableDataBag(Lists.newArrayList(makePhoneNumberTuple("415-999-9999", null), makePhoneNumberTuple("415-666-6666", "MOBILE"), makePhoneNumberTuple("415-333-3333", "WORK")));
Tuple entryTuple = tf_.newTuple(4);
entryTuple.set(0, "Elephant Bird");
entryTuple.set(1, 123);
entryTuple.set(2, "elephant@bird.com");
entryTuple.set(3, phoneBag);
return entryTuple;
}
use of org.apache.pig.data.DataBag in project elephant-bird by twitter.
the class TestInvoker method testArrayConversion.
@Test
public void testArrayConversion() throws SecurityException, ClassNotFoundException, NoSuchMethodException, IOException {
InvokeForInt id = new InvokeForInt(TestInvoker.class.getName() + ".avg", "double[]");
DataBag nums = newSimpleBag(1.0, 2.0, 3.0);
assertEquals(Integer.valueOf(2), id.exec(tf_.newTuple(nums)));
InvokeForString is = new InvokeForString(TestInvoker.class.getName() + ".concatStringArray", "string[]");
DataBag strings = newSimpleBag("foo", "bar", "baz");
assertEquals("foobarbaz", is.exec(tf_.newTuple(strings)));
}
use of org.apache.pig.data.DataBag in project elephant-bird by twitter.
the class ProtobufToPig method messageToTuple.
/**
* Translate a nested message to a tuple. If the field is repeated, it walks the list and adds each to a bag.
* Otherwise, it just adds the given one.
* @param fieldDescriptor the descriptor object for the given field.
* @param fieldValue the object representing the value of this field, possibly null.
* @return the object representing fieldValue in Pig -- either a bag or a tuple.
*/
@SuppressWarnings("unchecked")
protected Object messageToTuple(FieldDescriptor fieldDescriptor, Object fieldValue) {
if (fieldValue == null) {
// protobufs unofficially ensures values are not null. just in case:
return null;
}
assert fieldDescriptor.getType() == FieldDescriptor.Type.MESSAGE : "messageToTuple called with field of type " + fieldDescriptor.getType();
if (fieldDescriptor.isRepeated()) {
// The protobuf contract is that if the field is repeated, then the object returned is actually a List
// of the underlying datatype, which in this case is a nested message.
List<Message> messageList = (List<Message>) (fieldValue != null ? fieldValue : Lists.newArrayList());
DataBag bag = new NonSpillableDataBag(messageList.size());
for (Message m : messageList) {
bag.add(new ProtobufTuple(m));
}
return bag;
} else {
return new ProtobufTuple((Message) fieldValue);
}
}
use of org.apache.pig.data.DataBag in project elephant-bird by twitter.
the class TestProtoToPig method testLazyProtoToPig.
@Test
public void testLazyProtoToPig() throws ExecException {
Person personProto = Fixtures.buildPersonProto();
Tuple protoTuple = new ProtobufTuple(personProto);
Tuple normalTuple = Fixtures.buildPersonTuple();
List<FieldDescriptor> fieldDescs = personProto.getDescriptorForType().getFields();
TypeRef<Person> typeRef = PigUtil.getProtobufTypeRef(Person.class.getName());
Tuple projectedTuple = new ProjectedProtobufTupleFactory<Person>(typeRef, evenFields(fieldDescs)).newTuple(personProto);
int idx = 0;
for (FieldDescriptor fd : fieldDescs) {
// gives us non-null fields, which are not equal to the null fields...
if (normalTuple.get(fd.getIndex()) instanceof DataBag) {
continue;
}
assertEquals(protoTuple.get(fd.getIndex()), normalTuple.get(fd.getIndex()));
if (idx % 2 == 0) {
assertEquals(projectedTuple.get(fd.getIndex() / 2), normalTuple.get(fd.getIndex()));
}
idx++;
}
}
use of org.apache.pig.data.DataBag in project elephant-bird by twitter.
the class VectorWritableConverter method convertSparseVectorDataToVector.
private Vector convertSparseVectorDataToVector(Tuple value) throws IOException {
Vector v;
// determine output vector size and fetch bag containing entries from input
int size = 0;
DataBag entries = null;
if (value.size() == 2) {
// cardinality defined by input
size = (Integer) value.get(0);
if (cardinality != null) {
// cardinality defined by VectorWritableConverter instance
size = cardinality;
}
entries = (DataBag) value.get(1);
} else {
Preconditions.checkNotNull(cardinality, "Cardinality is undefined");
size = cardinality;
entries = (DataBag) value.get(0);
}
// create vector, allowing conversion of sparse input vector data to dense output vector
if (dense) {
// TODO(Andy Schlaikjer): Test for OOM before it happens
v = new DenseVector(size);
} else {
// more efficient to build sparse vector with this impl
v = new RandomAccessSparseVector(size);
}
// populate vector
for (Tuple entry : entries) {
validateSparseVectorEntryData(entry);
int i = (Integer) entry.get(0);
// check index bounds
if (i < 0 || i >= size) {
counterHelper.incrCounter(Counter.INDEX_OUT_OF_BOUNDS, 1);
continue;
}
double n = ((Number) entry.get(1)).doubleValue();
v.setQuick(i, n);
}
// convert to (sparse) sequential vector if requested
if (sequential) {
v = new SequentialAccessSparseVector(v);
}
return v;
}
Aggregations