Search in sources :

Example 31 with PigServer

use of org.apache.pig.PigServer in project elephant-bird by twitter.

the class TestJsonLoader method testPigScript.

@Test
public void testPigScript() throws IOException {
    File tempFile = File.createTempFile("json", null);
    tempFile.deleteOnExit();
    FileWriter writer = new FileWriter(tempFile);
    writer.write("{\"score\": 10}\n");
    writer.write("{\"score\": 20}\n");
    writer.write("{\"score\": 30}\n");
    writer.close();
    PigServer pigServer = PigTestUtil.makePigServer();
    logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
    logAndRegisterQuery(pigServer, "a = foreach data generate (int) json#'score' as score;");
    logAndRegisterQuery(pigServer, "b = group a all;");
    logAndRegisterQuery(pigServer, "c = foreach b generate SUM(a.score) as total_score;");
    Iterator<Tuple> tuples = pigServer.openIterator("c");
    int count = 0;
    while (tuples.hasNext()) {
        Tuple t = tuples.next();
        // expected sum of scores
        Assert.assertEquals(new Long(60), t.get(0));
        count++;
    }
    // expect just one tuple
    Assert.assertEquals(1, count);
}
Also used : PigServer(org.apache.pig.PigServer) FileWriter(java.io.FileWriter) File(java.io.File) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 32 with PigServer

use of org.apache.pig.PigServer in project elephant-bird by twitter.

the class TestJsonLoader method testNestedLoad.

@Test
public void testNestedLoad() throws IOException {
    File tempFile = File.createTempFile("json", null);
    tempFile.deleteOnExit();
    FileWriter writer = new FileWriter(tempFile);
    // json structure as in Twitter Streaming
    writer.write("{" + "  \"entities\": {" + "    \"hashtags\": [" + "      {\"indices\": [0,0], \"text\": \"test1\"}," + "      {\"indices\": [0,0], \"text\": \"test2\"}" + "    ]," + "    \"user_mentions\": []," + "    \"urls\": []" + "  }" + "}");
    writer.close();
    // extract hashtags from it
    PigServer pigServer = PigTestUtil.makePigServer();
    // enable nested load
    pigServer.getPigContext().getProperties().setProperty(JsonLoader.NESTED_LOAD_KEY, "true");
    logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
    logAndRegisterQuery(pigServer, "a = foreach data generate json#'entities'#'hashtags' as h;");
    logAndRegisterQuery(pigServer, "b = foreach a generate flatten(h) as h;");
    logAndRegisterQuery(pigServer, "c = foreach b generate h#'text' as h;");
    Iterator<Tuple> tuples = pigServer.openIterator("c");
    int count = 0;
    String[] hashtags = { "test1", "test2" };
    while (tuples.hasNext()) {
        Tuple t = tuples.next();
        Assert.assertEquals(hashtags[count], t.get(0).toString());
        count++;
    }
    // expect two tuples
    Assert.assertEquals(2, count);
}
Also used : PigServer(org.apache.pig.PigServer) FileWriter(java.io.FileWriter) File(java.io.File) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 33 with PigServer

use of org.apache.pig.PigServer in project elephant-bird by twitter.

the class PigTestUtil method makePigServer.

/**
 * Creates a new PigServer in local mode.
 * Sets pig properties for lzo codec and temp directory.
 */
public static PigServer makePigServer() throws ExecException {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    // set lzo codec:
    pigServer.getPigContext().getProperties().setProperty("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
    pigServer.getPigContext().getProperties().setProperty("pig.temp.dir", System.getProperty("test.build.data") + "/pig-temp");
    return pigServer;
}
Also used : PigServer(org.apache.pig.PigServer)

Example 34 with PigServer

use of org.apache.pig.PigServer in project incubator-rya by apache.

the class SparqlQueryPigEngine method init.

public void init() throws Exception {
    Preconditions.checkNotNull(sparqlToPigTransformVisitor, "Sparql To Pig Transform Visitor must not be null");
    logger.info("Initializing Sparql Query Pig Engine");
    if (hadoopDir != null) {
        // set hadoop dir property
        System.setProperty("HADOOPDIR", hadoopDir);
    }
    if (pigServer == null) {
        pigServer = new PigServer(execType);
    }
    if (inference || stats) {
        final String instance = sparqlToPigTransformVisitor.getInstance();
        final String zoo = sparqlToPigTransformVisitor.getZk();
        final String user = sparqlToPigTransformVisitor.getUser();
        final String pass = sparqlToPigTransformVisitor.getPassword();
        final Connector connector = new ZooKeeperInstance(instance, zoo).getConnector(user, new PasswordToken(pass.getBytes(StandardCharsets.UTF_8)));
        final String tablePrefix = sparqlToPigTransformVisitor.getTablePrefix();
        conf.setTablePrefix(tablePrefix);
        if (inference) {
            logger.info("Using inference");
            inferenceEngine = new InferenceEngine();
            ryaDAO = new AccumuloRyaDAO();
            ryaDAO.setConf(conf);
            ryaDAO.setConnector(connector);
            ryaDAO.init();
            inferenceEngine.setRyaDAO(ryaDAO);
            inferenceEngine.setConf(conf);
            inferenceEngine.setSchedule(false);
            inferenceEngine.init();
        }
        if (stats) {
            logger.info("Using stats");
            rdfEvalStatsDAO = new AccumuloRdfEvalStatsDAO();
            rdfEvalStatsDAO.setConf(conf);
            rdfEvalStatsDAO.setConnector(connector);
            // rdfEvalStatsDAO.setEvalTable(tablePrefix + RdfCloudTripleStoreConstants.TBL_EVAL_SUFFIX);
            rdfEvalStatsDAO.init();
            rdfCloudTripleStoreEvaluationStatistics = new RdfCloudTripleStoreEvaluationStatistics<AccumuloRdfConfiguration>(conf, rdfEvalStatsDAO);
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AccumuloRyaDAO(org.apache.rya.accumulo.AccumuloRyaDAO) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) InferenceEngine(org.apache.rya.rdftriplestore.inference.InferenceEngine) PigServer(org.apache.pig.PigServer) AccumuloRdfEvalStatsDAO(org.apache.rya.accumulo.AccumuloRdfEvalStatsDAO) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance)

Example 35 with PigServer

use of org.apache.pig.PigServer in project pigeon by aseldawy.

the class TestArea method testShouldWorkWithWKT.

public void testShouldWorkWithWKT() throws Exception {
    ArrayList<String[]> data = new ArrayList<String[]>();
    data.add(new String[] { "1", "POLYGON ((0 0, 0 3, 5 5, 10 0, 0 0))" });
    String datafile = TestHelper.createTempFile(data, "\t");
    datafile = datafile.replace("\\", "\\\\");
    PigServer pig = new PigServer(LOCAL);
    String query = "A = LOAD 'file:" + datafile + "' as (id, geom);\n" + "B = FOREACH A GENERATE " + Area.class.getName() + "(geom);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("B");
    ArrayList<Double> correct_result = new ArrayList<Double>();
    correct_result.add(15 + 5 + 12.5);
    Iterator<Double> areas = correct_result.iterator();
    while (it.hasNext() && areas.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        Double area = (Double) tuple.get(0);
        assertEquals(areas.next(), area);
    }
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Tuple(org.apache.pig.data.Tuple)

Aggregations

PigServer (org.apache.pig.PigServer)114 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)68 Test (org.junit.Test)57 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 File (java.io.File)16 Data (org.apache.pig.builtin.mock.Storage.Data)15 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 FileWriter (java.io.FileWriter)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3