use of org.apache.pig.PigServer in project parquet-mr by apache.
the class TestParquetLoader method testColumnIndexAccess.
@Test
public void testColumnIndexAccess() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
pigServer.setValidateEachStatement(true);
String out = "target/out";
int rows = 10;
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < rows; i++) {
list.add(Storage.tuple(i, i * 1.0, i * 2L, "v" + i));
}
data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.deleteFile(out);
pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
pigServer.executeBatch();
// Test Null Padding at the end
pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
pigServer.registerQuery("STORE B into 'out' using mock.Storage();");
pigServer.executeBatch();
List<Tuple> actualList = data.get("out");
assertEquals(rows, actualList.size());
for (int i = 0; i < rows; i++) {
Tuple t = actualList.get(i);
assertEquals(4, t.size());
assertEquals(i, t.get(0));
assertEquals(i * 1.0, t.get(1));
assertEquals(i * 2L, t.get(2));
assertEquals("v" + i, t.get(3));
}
}
use of org.apache.pig.PigServer in project parquet-mr by apache.
the class TestParquetStorer method testStorerCompressed.
@Test
public void testStorerCompressed() throws ExecException, Exception {
String out = "target/out";
int rows = 1000;
Properties props = new Properties();
props.setProperty("parquet.compression", "gzip");
props.setProperty("parquet.page.size", "1000");
PigServer pigServer = new PigServer(ExecType.LOCAL, props);
Data data = Storage.resetData(pigServer);
Collection<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < rows; i++) {
list.add(Storage.tuple("a" + i));
}
data.set("in", "a:chararray", list);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.deleteFile(out);
pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
}
pigServer.registerQuery("B = LOAD '" + out + "' USING " + ParquetLoader.class.getName() + "();");
pigServer.registerQuery("Store B into 'out' using mock.Storage();");
if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
}
List<Tuple> result = data.get("out");
assertEquals(rows, result.size());
int i = 0;
for (Tuple tuple : result) {
assertEquals("a" + i, tuple.get(0));
++i;
}
}
use of org.apache.pig.PigServer in project parquet-mr by apache.
the class TestSummary method testPigScript.
@Test
public void testPigScript() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < 1002; i++) {
list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar")))));
}
data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list);
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE " + Summary.class.getName() + "(A);");
pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
System.out.println(data.get("out").get(0).get(0));
TupleSummaryData s = SummaryData.fromJSON((String) data.get("out").get(0).get(0), TupleSummaryData.class);
System.out.println(s);
}
use of org.apache.pig.PigServer in project parquet-mr by apache.
the class TestSummary method testMaxIsZero.
@Test
public void testMaxIsZero() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < 10; i++) {
list.add(t("a", i - 9));
}
data.set("in", "a:chararray, b:int", list);
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE " + Summary.class.getName() + "(A);");
pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
TupleSummaryData s = SummaryData.fromJSON((String) data.get("out").get(0).get(0), TupleSummaryData.class);
System.out.println(s);
assertEquals(0, s.getFields().get(1).getNumber().getValue().getMax(), 0);
}
use of org.apache.pig.PigServer in project vespa by vespa-engine.
the class VespaStorageTest method setup.
private PigServer setup(String script, Configuration conf) throws Exception {
if (conf == null) {
conf = new HdfsConfiguration();
}
conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");
// Parameter substitutions - can also be set by configuration
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
Aggregations