use of org.apache.pig.data.DataBag in project common-crawl by matpalm.
the class StrJoin method exec.
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
try {
DataBag bag = (DataBag) input.get(0);
StringBuilder sb = new StringBuilder();
for (Iterator<Tuple> iter = bag.iterator(); iter.hasNext(); ) {
Tuple next = iter.next();
sb.append(next.get(0).toString());
if (iter.hasNext())
sb.append(" ");
}
return sb.toString();
} catch (Exception e) {
System.err.println("StrJoin_Exception " + e.getClass().getName());
if (PigStatusReporter.getInstance() != null) {
PigStatusReporter.getInstance().getCounter("StrJoin_Exception", e.getClass().getName()).increment(1);
}
return null;
}
}
use of org.apache.pig.data.DataBag in project varaha by thedatachef.
the class StanfordTokenize method exec.
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() < 1 || input.isNull(0))
return null;
// Output bag
DataBag bagOfTokens = bagFactory.newDefaultBag();
StringReader textInput = new StringReader(input.get(0).toString());
PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");
for (CoreLabel label; ptbt.hasNext(); ) {
label = (CoreLabel) ptbt.next();
Tuple termText = tupleFactory.newTuple(label.toString());
bagOfTokens.add(termText);
}
return bagOfTokens;
}
use of org.apache.pig.data.DataBag in project mongo-hadoop by mongodb.
the class JSONPigReplaceTest method testUnnamedArrayReplace.
@Test
public void testUnnamedArrayReplace() throws Exception {
// create tuple ({("a"), ("b"), ("c")})
// with schema 'cars:{f:(t:chararray)}'
DataBag b = bagFactory.newDefaultBag();
b.add(tupleFactory.newTuple("a"));
b.add(tupleFactory.newTuple("b"));
b.add(tupleFactory.newTuple("c"));
JSONPigReplace j = new JSONPigReplace(new String[] { "{days : [1,2,3], age : 19, cars : '$cars'}" });
BasicBSONObject[] bs = j.substitute(tupleFactory.newTuple(b), "cars : {f:(t:chararray)}", "t");
assertNotNull(bs);
assertTrue(bs.length == 1);
// should produce
// { "name" : "Daniel" , "age" : 19 , "property" : { "cars" : [ "a" , "b" , "c"]} , "school" : "Carleton College"}
BasicBSONObject res = bs[0];
ArrayList cars = (ArrayList) res.get("cars");
assertEquals(cars.size(), 3);
assertEquals(cars.get(0), "a");
}
use of org.apache.pig.data.DataBag in project mongo-hadoop by mongodb.
the class JSONPigReplaceTest method testSimpleNestedReplace.
@Test
public void testSimpleNestedReplace() throws Exception {
// create tuple ({("Daniel", "Alabi")}, "Carleton College")
// with schema 'b:{t:(f:chararray,l:chararray)}, s:chararray'
Tuple t1 = tupleFactory.newTuple(2);
t1.set(0, "Daniel");
t1.set(1, "Alabi");
DataBag b = bagFactory.newDefaultBag();
b.add(t1);
Tuple t = tupleFactory.newTuple(2);
t.set(0, b);
t.set(1, "Carleton College");
JSONPigReplace j = new JSONPigReplace(new String[] { "{first:'$f', last:'$l', school:'$s'}" });
BasicBSONObject[] bs = j.substitute(t, "b:{t:(f:chararray,l:chararray)}, s:chararray", null);
assertNotNull(bs);
assertTrue(bs.length == 1);
// should produce
// { "first" : "Daniel" , "last" : "Alabi" , "school" : "Carleton College"}
BasicBSONObject res = bs[0];
assertEquals(res.get("first"), "Daniel");
assertEquals(res.get("last"), "Alabi");
assertEquals(res.get("school"), "Carleton College");
}
use of org.apache.pig.data.DataBag in project mongo-hadoop by mongodb.
the class JSONPigReplaceTest method testSampleQueryUpdateReplace.
@Test
public void testSampleQueryUpdateReplace() throws Exception {
// create tuple ("Daniel", "Alabi", 19, {("a"), ("b"), ("c")})
// with schema 'f:chararray,l:chararray,age:int,cars:{t:(t:chararray)}'
DataBag b = bagFactory.newDefaultBag();
b.add(tupleFactory.newTuple("a"));
b.add(tupleFactory.newTuple("b"));
b.add(tupleFactory.newTuple("c"));
Tuple t = tupleFactory.newTuple(4);
t.set(0, "Daniel");
t.set(1, "Alabi");
t.set(2, 19);
t.set(3, b);
JSONPigReplace j = new JSONPigReplace(new String[] { "{first:'$f', last:'$l'}", "{$set: {age: '$age'}, $pushAll : {cars: '$cars'}}" });
BasicBSONObject[] bs = j.substitute(t, "f:chararray,l:chararray,age:int,cars:{t:(t:chararray)}", "t");
assertTrue(bs.length == 2);
// should produce
// { "first" : "Daniel" , "last" : "Alabi"}
// { "$set" : { "age" : 19} , "$pushAll" : { "cars" : [ "a" , "b" , "c"]}}
BasicBSONObject res1 = bs[0];
BasicBSONObject res2 = bs[1];
assertEquals(res1.get("first"), "Daniel");
assertEquals(((BasicBSONObject) res2.get("$set")).get("age"), 19);
}
Aggregations