use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class TestJsonLoader method testPigScript.
@Test
public void testPigScript() throws IOException {
File tempFile = File.createTempFile("json", null);
tempFile.deleteOnExit();
FileWriter writer = new FileWriter(tempFile);
writer.write("{\"score\": 10}\n");
writer.write("{\"score\": 20}\n");
writer.write("{\"score\": 30}\n");
writer.close();
PigServer pigServer = PigTestUtil.makePigServer();
logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
logAndRegisterQuery(pigServer, "a = foreach data generate (int) json#'score' as score;");
logAndRegisterQuery(pigServer, "b = group a all;");
logAndRegisterQuery(pigServer, "c = foreach b generate SUM(a.score) as total_score;");
Iterator<Tuple> tuples = pigServer.openIterator("c");
int count = 0;
while (tuples.hasNext()) {
Tuple t = tuples.next();
// expected sum of scores
Assert.assertEquals(new Long(60), t.get(0));
count++;
}
// expect just one tuple
Assert.assertEquals(1, count);
}
use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class TestJsonLoader method testNestedLoad.
@Test
public void testNestedLoad() throws IOException {
File tempFile = File.createTempFile("json", null);
tempFile.deleteOnExit();
FileWriter writer = new FileWriter(tempFile);
// json structure as in Twitter Streaming
writer.write("{" + " \"entities\": {" + " \"hashtags\": [" + " {\"indices\": [0,0], \"text\": \"test1\"}," + " {\"indices\": [0,0], \"text\": \"test2\"}" + " ]," + " \"user_mentions\": []," + " \"urls\": []" + " }" + "}");
writer.close();
// extract hashtags from it
PigServer pigServer = PigTestUtil.makePigServer();
// enable nested load
pigServer.getPigContext().getProperties().setProperty(JsonLoader.NESTED_LOAD_KEY, "true");
logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
logAndRegisterQuery(pigServer, "a = foreach data generate json#'entities'#'hashtags' as h;");
logAndRegisterQuery(pigServer, "b = foreach a generate flatten(h) as h;");
logAndRegisterQuery(pigServer, "c = foreach b generate h#'text' as h;");
Iterator<Tuple> tuples = pigServer.openIterator("c");
int count = 0;
String[] hashtags = { "test1", "test2" };
while (tuples.hasNext()) {
Tuple t = tuples.next();
Assert.assertEquals(hashtags[count], t.get(0).toString());
count++;
}
// expect two tuples
Assert.assertEquals(2, count);
}
use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class PigTestUtil method makePigServer.
/**
* Creates a new PigServer in local mode.
* Sets pig properties for lzo codec and temp directory.
*/
public static PigServer makePigServer() throws ExecException {
PigServer pigServer = new PigServer(ExecType.LOCAL);
// set lzo codec:
pigServer.getPigContext().getProperties().setProperty("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
pigServer.getPigContext().getProperties().setProperty("pig.temp.dir", System.getProperty("test.build.data") + "/pig-temp");
return pigServer;
}
use of org.apache.pig.PigServer in project incubator-rya by apache.
the class SparqlQueryPigEngine method init.
public void init() throws Exception {
Preconditions.checkNotNull(sparqlToPigTransformVisitor, "Sparql To Pig Transform Visitor must not be null");
logger.info("Initializing Sparql Query Pig Engine");
if (hadoopDir != null) {
// set hadoop dir property
System.setProperty("HADOOPDIR", hadoopDir);
}
if (pigServer == null) {
pigServer = new PigServer(execType);
}
if (inference || stats) {
final String instance = sparqlToPigTransformVisitor.getInstance();
final String zoo = sparqlToPigTransformVisitor.getZk();
final String user = sparqlToPigTransformVisitor.getUser();
final String pass = sparqlToPigTransformVisitor.getPassword();
final Connector connector = new ZooKeeperInstance(instance, zoo).getConnector(user, new PasswordToken(pass.getBytes(StandardCharsets.UTF_8)));
final String tablePrefix = sparqlToPigTransformVisitor.getTablePrefix();
conf.setTablePrefix(tablePrefix);
if (inference) {
logger.info("Using inference");
inferenceEngine = new InferenceEngine();
ryaDAO = new AccumuloRyaDAO();
ryaDAO.setConf(conf);
ryaDAO.setConnector(connector);
ryaDAO.init();
inferenceEngine.setRyaDAO(ryaDAO);
inferenceEngine.setConf(conf);
inferenceEngine.setSchedule(false);
inferenceEngine.init();
}
if (stats) {
logger.info("Using stats");
rdfEvalStatsDAO = new AccumuloRdfEvalStatsDAO();
rdfEvalStatsDAO.setConf(conf);
rdfEvalStatsDAO.setConnector(connector);
// rdfEvalStatsDAO.setEvalTable(tablePrefix + RdfCloudTripleStoreConstants.TBL_EVAL_SUFFIX);
rdfEvalStatsDAO.init();
rdfCloudTripleStoreEvaluationStatistics = new RdfCloudTripleStoreEvaluationStatistics<AccumuloRdfConfiguration>(conf, rdfEvalStatsDAO);
}
}
}
use of org.apache.pig.PigServer in project pigeon by aseldawy.
the class TestArea method testShouldWorkWithWKT.
public void testShouldWorkWithWKT() throws Exception {
ArrayList<String[]> data = new ArrayList<String[]>();
data.add(new String[] { "1", "POLYGON ((0 0, 0 3, 5 5, 10 0, 0 0))" });
String datafile = TestHelper.createTempFile(data, "\t");
datafile = datafile.replace("\\", "\\\\");
PigServer pig = new PigServer(LOCAL);
String query = "A = LOAD 'file:" + datafile + "' as (id, geom);\n" + "B = FOREACH A GENERATE " + Area.class.getName() + "(geom);";
pig.registerQuery(query);
Iterator<?> it = pig.openIterator("B");
ArrayList<Double> correct_result = new ArrayList<Double>();
correct_result.add(15 + 5 + 12.5);
Iterator<Double> areas = correct_result.iterator();
while (it.hasNext() && areas.hasNext()) {
Tuple tuple = (Tuple) it.next();
if (tuple == null)
break;
Double area = (Double) tuple.get(0);
assertEquals(areas.next(), area);
}
}
Aggregations