Search in sources :

Example 51 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatHiveCompatibility method testUnpartedReadWrite.

@Test
public void testUnpartedReadWrite() throws Exception {
    driver.run("drop table if exists junit_unparted_noisd");
    String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE";
    driver.run(createTable);
    // assert that the table created has no hcat instrumentation, and that we're still able to read it.
    Table table = client.getTable("default", "junit_unparted_noisd");
    Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
    PigServer server = createPigServer(false);
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);");
    logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer();");
    logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Iterator<Tuple> itr = server.openIterator("B");
    int i = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        Assert.assertEquals(1, t.size());
        Assert.assertEquals(t.get(0), i);
        i++;
    }
    Assert.assertFalse(itr.hasNext());
    Assert.assertEquals(11, i);
    // assert that the table created still has no hcat instrumentation
    Table table2 = client.getTable("default", "junit_unparted_noisd");
    Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
    driver.run("drop table junit_unparted_noisd");
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 52 with PigServer

use of org.apache.pig.PigServer in project shifu by ShifuML.

the class PigExecutor method submitJob.

/**
 * Run the pig, Local or MapReduce mode is decide by parameter @sourceTpe
 *
 * @param modelConfig
 *            - model configuration
 * @param pigScriptPath
 *            - path of pig script
 * @param paramsMap
 *            - additional parameters for pig script
 * @param sourceType
 *            - the mode run pig: pig-local/pig-hdfs
 * @param confMap
 *            the configuration map instance
 * @param pathFinder
 *            the path finder
 * @throws IOException
 *             throw IOException when loading the parameter from @ModelConfig
 */
public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap, SourceType sourceType, Map<String, String> confMap, PathFinder pathFinder) throws IOException {
    // Run Pig Scripts
    final PigServer pigServer = createPigServer(sourceType);
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            pigServer.getPigContext().getProperties().put(key, value);
        }
    });
    if (confMap != null) {
        for (Map.Entry<String, String> entry : confMap.entrySet()) {
            pigServer.getPigContext().getProperties().put(entry.getKey(), entry.getValue());
        }
    }
    Map<String, String> pigParamsMap = CommonUtils.getPigParamMap(modelConfig, sourceType, pathFinder);
    if (paramsMap != null) {
        pigParamsMap.putAll(paramsMap);
    }
    log.debug("Pig submit parameters: {}", pigParamsMap);
    if (new File(pigScriptPath).isAbsolute()) {
        log.info("Pig script absolute path is {}", pigScriptPath);
        pigServer.registerScript(pigScriptPath, pigParamsMap);
    } else {
        log.info("Pig script relative path is {}", pigScriptPath);
        pigServer.registerScript(PigExecutor.class.getClassLoader().getResourceAsStream(pigScriptPath), pigParamsMap);
    }
}
Also used : ValueVisitor(ml.shifu.shifu.util.ValueVisitor) PigServer(org.apache.pig.PigServer) Map(java.util.Map) File(java.io.File)

Example 53 with PigServer

use of org.apache.pig.PigServer in project shifu by ShifuML.

the class PigExecutor method submitJob.

public void submitJob(SourceType sourceType, String pigScripts) throws IOException {
    PigServer pigServer = createPigServer(sourceType);
    pigServer.registerScript(new ByteArrayInputStream(pigScripts.getBytes()));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) PigServer(org.apache.pig.PigServer)

Example 54 with PigServer

use of org.apache.pig.PigServer in project shifu by ShifuML.

the class PigExecutor method createPigServer.

private PigServer createPigServer(SourceType sourceType) throws IOException {
    PigServer pigServer = null;
    if (SourceType.HDFS.equals(sourceType)) {
        if (Environment.getProperty("shifu.pig.exectype", "MAPREDUCE").toLowerCase().equals("tez")) {
            if (isTezRunnable()) {
                try {
                    Class<?> tezClazz = Class.forName("org.apache.pig.backend.hadoop.executionengine.tez.TezExecType");
                    log.info("Pig ExecType: TEZ");
                    pigServer = new ShifuPigServer((ExecType) tezClazz.newInstance());
                } catch (Throwable t) {
                    log.info("Pig ExecType: MAPREDUCE");
                    pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
                }
            } else {
                // fall back to mapreduce
                log.info("Pig ExecType: MAPREDUCE");
                pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
            }
        } else {
            log.info("Pig ExecType: MAPREDUCE");
            pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
        }
        String hdpVersion = HDPUtils.getHdpVersionForHDP224();
        if (StringUtils.isNotBlank(hdpVersion)) {
            // for hdp 2.2.4, hdp.version should be set and configuration files should be added to container class
            pigServer.getPigContext().getProperties().put("hdp.version", hdpVersion);
            pigServer.getPigContext().addJar(HDPUtils.findContainingFile("hdfs-site.xml"));
            pigServer.getPigContext().addJar(HDPUtils.findContainingFile("core-site.xml"));
            pigServer.getPigContext().addJar(HDPUtils.findContainingFile("mapred-site.xml"));
            pigServer.getPigContext().addJar(HDPUtils.findContainingFile("yarn-site.xml"));
        }
    } else {
        log.info("ExecType: LOCAL");
        pigServer = new ShifuPigServer(ExecType.LOCAL);
    }
    return pigServer;
}
Also used : PigServer(org.apache.pig.PigServer) ExecType(org.apache.pig.ExecType)

Example 55 with PigServer

use of org.apache.pig.PigServer in project elephant-bird by twitter.

the class TestJsonLoader method testPigScript.

@Test
public void testPigScript() throws IOException {
    File tempFile = File.createTempFile("json", null);
    tempFile.deleteOnExit();
    FileWriter writer = new FileWriter(tempFile);
    writer.write("{\"score\": 10}\n");
    writer.write("{\"score\": 20}\n");
    writer.write("{\"score\": 30}\n");
    writer.close();
    PigServer pigServer = PigTestUtil.makePigServer();
    logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
    logAndRegisterQuery(pigServer, "a = foreach data generate (int) json#'score' as score;");
    logAndRegisterQuery(pigServer, "b = group a all;");
    logAndRegisterQuery(pigServer, "c = foreach b generate SUM(a.score) as total_score;");
    Iterator<Tuple> tuples = pigServer.openIterator("c");
    int count = 0;
    while (tuples.hasNext()) {
        Tuple t = tuples.next();
        // expected sum of scores
        Assert.assertEquals(new Long(60), t.get(0));
        count++;
    }
    // expect just one tuple
    Assert.assertEquals(1, count);
}
Also used : PigServer(org.apache.pig.PigServer) FileWriter(java.io.FileWriter) File(java.io.File) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Aggregations

PigServer (org.apache.pig.PigServer)115 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)70 Test (org.junit.Test)59 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)37 Data (org.apache.pig.builtin.mock.Storage.Data)15 File (java.io.File)14 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Path (org.apache.hadoop.fs.Path)4 FileWriter (java.io.FileWriter)3 List (java.util.List)3 Map (java.util.Map)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Pair (org.apache.hive.hcatalog.data.Pair)3 ExecJob (org.apache.pig.backend.executionengine.ExecJob)3