use of org.apache.pig.PigServer in project druid by druid-io.
the class AvroHadoopInputRowParserTest method buildPigAvro.
private static GenericRecord buildPigAvro(GenericRecord datum, String inputStorage, String outputStorage) throws IOException {
final File tmpDir = Files.createTempDir();
FileReader<GenericRecord> reader = null;
PigServer pigServer = null;
try {
// 0. write avro object into temp file.
File someAvroDatumFile = new File(tmpDir, "someAvroDatum.avro");
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>());
dataFileWriter.create(SomeAvroDatum.getClassSchema(), someAvroDatumFile);
dataFileWriter.append(datum);
dataFileWriter.close();
// 1. read avro files into Pig
pigServer = new PigServer(ExecType.LOCAL);
pigServer.registerQuery(String.format("A = LOAD '%s' USING %s;", someAvroDatumFile, inputStorage));
// 2. write new avro file using AvroStorage
File outputDir = new File(tmpDir, "output");
pigServer.store("A", String.valueOf(outputDir), outputStorage);
// 3. read avro object from AvroStorage
reader = DataFileReader.openReader(new File(outputDir, "part-m-00000.avro"), new GenericDatumReader<GenericRecord>());
return reader.next();
} finally {
if (pigServer != null) {
pigServer.shutdown();
}
Closeables.close(reader, true);
FileUtils.deleteDirectory(tmpDir);
}
}
use of org.apache.pig.PigServer in project hive by apache.
the class HCatBaseTest method createPigServer.
/**
* creates PigServer in LOCAL mode.
* http://pig.apache.org/docs/r0.12.0/perf.html#error-handling
* @param stopOnFailure equivalent of "-stop_on_failure" command line arg, setting to 'true' makes
* debugging easier
*/
public static PigServer createPigServer(boolean stopOnFailure) throws ExecException {
if (stopOnFailure) {
Properties p = new Properties();
p.put("stop.on.failure", Boolean.TRUE.toString());
return new PigServer(ExecType.LOCAL, p);
}
return new PigServer(ExecType.LOCAL);
}
use of org.apache.pig.PigServer in project Resource by lovelifeming.
the class PigOperator method excutePig.
public static void excutePig(String execTypeString, String jarPath, String input, String output) throws IOException {
PigServer pigServer = new PigServer(execTypeString);
pigServer.registerJar(jarPath);
// String input = "/opt/sf/input.txt";
// String output = "/opt/sf/output.txt";
pigServer.registerQuery("A = load'" + input + "' using TextLoader();");
pigServer.registerQuery("B = foreach A generate flatten(tokenize($0));");
pigServer.registerQuery("C = group B by $1");
pigServer.registerQuery("D = foreach C generate flatten(group),COUNT(B.$0)");
pigServer.store("D", output);
}
use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class TestJsonLoader method testPigScript.
@Test
public void testPigScript() throws IOException {
File tempFile = File.createTempFile("json", null);
tempFile.deleteOnExit();
FileWriter writer = new FileWriter(tempFile);
writer.write("{\"score\": 10}\n");
writer.write("{\"score\": 20}\n");
writer.write("{\"score\": 30}\n");
writer.close();
PigServer pigServer = PigTestUtil.makePigServer();
logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
logAndRegisterQuery(pigServer, "a = foreach data generate (int) json#'score' as score;");
logAndRegisterQuery(pigServer, "b = group a all;");
logAndRegisterQuery(pigServer, "c = foreach b generate SUM(a.score) as total_score;");
Iterator<Tuple> tuples = pigServer.openIterator("c");
int count = 0;
while (tuples.hasNext()) {
Tuple t = tuples.next();
// expected sum of scores
Assert.assertEquals(new Long(60), t.get(0));
count++;
}
// expect just one tuple
Assert.assertEquals(1, count);
}
use of org.apache.pig.PigServer in project elephant-bird by twitter.
the class TestJsonLoader method testNestedLoad.
@Test
public void testNestedLoad() throws IOException {
File tempFile = File.createTempFile("json", null);
tempFile.deleteOnExit();
FileWriter writer = new FileWriter(tempFile);
// json structure as in Twitter Streaming
writer.write("{" + " \"entities\": {" + " \"hashtags\": [" + " {\"indices\": [0,0], \"text\": \"test1\"}," + " {\"indices\": [0,0], \"text\": \"test2\"}" + " ]," + " \"user_mentions\": []," + " \"urls\": []" + " }" + "}");
writer.close();
// extract hashtags from it
PigServer pigServer = PigTestUtil.makePigServer();
// enable nested load
pigServer.getPigContext().getProperties().setProperty(JsonLoader.NESTED_LOAD_KEY, "true");
logAndRegisterQuery(pigServer, "data = load '" + tempFile.getAbsolutePath() + "' using com.twitter.elephantbird.pig.load.JsonLoader() as (json: map[]);");
logAndRegisterQuery(pigServer, "a = foreach data generate json#'entities'#'hashtags' as h;");
logAndRegisterQuery(pigServer, "b = foreach a generate flatten(h) as h;");
logAndRegisterQuery(pigServer, "c = foreach b generate h#'text' as h;");
Iterator<Tuple> tuples = pigServer.openIterator("c");
int count = 0;
String[] hashtags = { "test1", "test2" };
while (tuples.hasNext()) {
Tuple t = tuples.next();
Assert.assertEquals(hashtags[count], t.get(0).toString());
count++;
}
// expect two tuples
Assert.assertEquals(2, count);
}
Aggregations