use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatHiveCompatibility method testUnpartedReadWrite.
@Test
public void testUnpartedReadWrite() throws Exception {
driver.run("drop table if exists junit_unparted_noisd");
String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE";
driver.run(createTable);
// assert that the table created has no hcat instrumentation, and that we're still able to read it.
Table table = client.getTable("default", "junit_unparted_noisd");
Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
PigServer server = createPigServer(false);
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);");
logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer();");
logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();");
Iterator<Tuple> itr = server.openIterator("B");
int i = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
Assert.assertEquals(1, t.size());
Assert.assertEquals(t.get(0), i);
i++;
}
Assert.assertFalse(itr.hasNext());
Assert.assertEquals(11, i);
// assert that the table created still has no hcat instrumentation
Table table2 = client.getTable("default", "junit_unparted_noisd");
Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
driver.run("drop table junit_unparted_noisd");
}
use of org.apache.pig.PigServer in project shifu by ShifuML.
the class PigExecutor method submitJob.
/**
* Run the pig, Local or MapReduce mode is decide by parameter @sourceTpe
*
* @param modelConfig
* - model configuration
* @param pigScriptPath
* - path of pig script
* @param paramsMap
* - additional parameters for pig script
* @param sourceType
* - the mode run pig: pig-local/pig-hdfs
* @param confMap
* the configuration map instance
* @param pathFinder
* the path finder
* @throws IOException
* throw IOException when loading the parameter from @ModelConfig
*/
public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap, SourceType sourceType, Map<String, String> confMap, PathFinder pathFinder) throws IOException {
// Run Pig Scripts
final PigServer pigServer = createPigServer(sourceType);
CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {
@Override
public void inject(Object key, Object value) {
pigServer.getPigContext().getProperties().put(key, value);
}
});
if (confMap != null) {
for (Map.Entry<String, String> entry : confMap.entrySet()) {
pigServer.getPigContext().getProperties().put(entry.getKey(), entry.getValue());
}
}
Map<String, String> pigParamsMap = CommonUtils.getPigParamMap(modelConfig, sourceType, pathFinder);
if (paramsMap != null) {
pigParamsMap.putAll(paramsMap);
}
log.debug("Pig submit parameters: {}", pigParamsMap);
if (new File(pigScriptPath).isAbsolute()) {
log.info("Pig script absolute path is {}", pigScriptPath);
pigServer.registerScript(pigScriptPath, pigParamsMap);
} else {
log.info("Pig script relative path is {}", pigScriptPath);
pigServer.registerScript(PigExecutor.class.getClassLoader().getResourceAsStream(pigScriptPath), pigParamsMap);
}
}
use of org.apache.pig.PigServer in project shifu by ShifuML.
the class PigExecutor method submitJob.
public void submitJob(SourceType sourceType, String pigScripts) throws IOException {
PigServer pigServer = createPigServer(sourceType);
pigServer.registerScript(new ByteArrayInputStream(pigScripts.getBytes()));
}
use of org.apache.pig.PigServer in project shifu by ShifuML.
the class PigExecutor method createPigServer.
private PigServer createPigServer(SourceType sourceType) throws IOException {
PigServer pigServer = null;
if (SourceType.HDFS.equals(sourceType)) {
if (Environment.getProperty("shifu.pig.exectype", "MAPREDUCE").toLowerCase().equals("tez")) {
if (isTezRunnable()) {
try {
Class<?> tezClazz = Class.forName("org.apache.pig.backend.hadoop.executionengine.tez.TezExecType");
log.info("Pig ExecType: TEZ");
pigServer = new ShifuPigServer((ExecType) tezClazz.newInstance());
} catch (Throwable t) {
log.info("Pig ExecType: MAPREDUCE");
pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
}
} else {
// fall back to mapreduce
log.info("Pig ExecType: MAPREDUCE");
pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
}
} else {
log.info("Pig ExecType: MAPREDUCE");
pigServer = new ShifuPigServer(ExecType.MAPREDUCE);
}
String hdpVersion = HDPUtils.getHdpVersionForHDP224();
if (StringUtils.isNotBlank(hdpVersion)) {
// for hdp 2.2.4, hdp.version should be set and configuration files should be added to container class
pigServer.getPigContext().getProperties().put("hdp.version", hdpVersion);
pigServer.getPigContext().addJar(HDPUtils.findContainingFile("hdfs-site.xml"));
pigServer.getPigContext().addJar(HDPUtils.findContainingFile("core-site.xml"));
pigServer.getPigContext().addJar(HDPUtils.findContainingFile("mapred-site.xml"));
pigServer.getPigContext().addJar(HDPUtils.findContainingFile("yarn-site.xml"));
}
} else {
log.info("ExecType: LOCAL");
pigServer = new ShifuPigServer(ExecType.LOCAL);
}
return pigServer;
}
use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class TestJsonLoader method testPigScript.
@Test
public void testPigScript() throws IOException {
File tempFile = File.createTempFile("json", null);
tempFile.deleteOnExit();
FileWriter writer = new FileWriter(tempFile);
writer.write("{\"score\": 10}\n");
writer.write("{\"score\": 20}\n");
writer.write("{\"score\": 30}\n");
writer.close();
PigServer pigServer = PigTestUtil.makePigServer();
logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
logAndRegisterQuery(pigServer, "a = foreach data generate (int) json#'score' as score;");
logAndRegisterQuery(pigServer, "b = group a all;");
logAndRegisterQuery(pigServer, "c = foreach b generate SUM(a.score) as total_score;");
Iterator<Tuple> tuples = pigServer.openIterator("c");
int count = 0;
while (tuples.hasNext()) {
Tuple t = tuples.next();
// expected sum of scores
Assert.assertEquals(new Long(60), t.get(0));
count++;
}
// expect just one tuple
Assert.assertEquals(1, count);
}
Aggregations