Search in sources :

Example 61 with PigServer

use of org.apache.pig.PigServer in project parquet-mr by apache.

the class TestParquetLoader method testColumnIndexAccess.

@Test
public void testColumnIndexAccess() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.setValidateEachStatement(true);
    String out = "target/out";
    int rows = 10;
    Data data = Storage.resetData(pigServer);
    List<Tuple> list = new ArrayList<Tuple>();
    for (int i = 0; i < rows; i++) {
        list.add(Storage.tuple(i, i * 1.0, i * 2L, "v" + i));
    }
    data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
    pigServer.deleteFile(out);
    pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
    pigServer.executeBatch();
    // Test Null Padding at the end
    pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
    pigServer.registerQuery("STORE B into 'out' using mock.Storage();");
    pigServer.executeBatch();
    List<Tuple> actualList = data.get("out");
    assertEquals(rows, actualList.size());
    for (int i = 0; i < rows; i++) {
        Tuple t = actualList.get(i);
        assertEquals(4, t.size());
        assertEquals(i, t.get(0));
        assertEquals(i * 1.0, t.get(1));
        assertEquals(i * 2L, t.get(2));
        assertEquals("v" + i, t.get(3));
    }
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Data(org.apache.pig.builtin.mock.Storage.Data) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 62 with PigServer

use of org.apache.pig.PigServer in project parquet-mr by apache.

the class TestParquetStorer method testStorerCompressed.

@Test
public void testStorerCompressed() throws ExecException, Exception {
    String out = "target/out";
    int rows = 1000;
    Properties props = new Properties();
    props.setProperty("parquet.compression", "gzip");
    props.setProperty("parquet.page.size", "1000");
    PigServer pigServer = new PigServer(ExecType.LOCAL, props);
    Data data = Storage.resetData(pigServer);
    Collection<Tuple> list = new ArrayList<Tuple>();
    for (int i = 0; i < rows; i++) {
        list.add(Storage.tuple("a" + i));
    }
    data.set("in", "a:chararray", list);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
    pigServer.deleteFile(out);
    pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
    }
    pigServer.registerQuery("B = LOAD '" + out + "' USING " + ParquetLoader.class.getName() + "();");
    pigServer.registerQuery("Store B into 'out' using mock.Storage();");
    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
    }
    List<Tuple> result = data.get("out");
    assertEquals(rows, result.size());
    int i = 0;
    for (Tuple tuple : result) {
        assertEquals("a" + i, tuple.get(0));
        ++i;
    }
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Data(org.apache.pig.builtin.mock.Storage.Data) Properties(java.util.Properties) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 63 with PigServer

use of org.apache.pig.PigServer in project parquet-mr by apache.

the class TestSummary method testPigScript.

@Test
public void testPigScript() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pigServer);
    List<Tuple> list = new ArrayList<Tuple>();
    for (int i = 0; i < 1002; i++) {
        list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar")))));
    }
    data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list);
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
    pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE " + Summary.class.getName() + "(A);");
    pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
    System.out.println(data.get("out").get(0).get(0));
    TupleSummaryData s = SummaryData.fromJSON((String) data.get("out").get(0).get(0), TupleSummaryData.class);
    System.out.println(s);
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Data(org.apache.pig.builtin.mock.Storage.Data) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 64 with PigServer

use of org.apache.pig.PigServer in project parquet-mr by apache.

the class TestSummary method testMaxIsZero.

@Test
public void testMaxIsZero() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pigServer);
    List<Tuple> list = new ArrayList<Tuple>();
    for (int i = 0; i < 10; i++) {
        list.add(t("a", i - 9));
    }
    data.set("in", "a:chararray, b:int", list);
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
    pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE " + Summary.class.getName() + "(A);");
    pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
    TupleSummaryData s = SummaryData.fromJSON((String) data.get("out").get(0).get(0), TupleSummaryData.class);
    System.out.println(s);
    assertEquals(0, s.getFields().get(1).getNumber().getValue().getMax(), 0);
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Data(org.apache.pig.builtin.mock.Storage.Data) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 65 with PigServer

use of org.apache.pig.PigServer in project vespa by vespa-engine.

the class VespaStorageTest method setup.

private PigServer setup(String script, Configuration conf) throws Exception {
    if (conf == null) {
        conf = new HdfsConfiguration();
    }
    conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
    conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");
    // Parameter substitutions - can also be set by configuration
    Map<String, String> parameters = new HashMap<>();
    parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");
    PigServer ps = new PigServer(ExecType.LOCAL, conf);
    ps.setBatchOn();
    ps.registerScript(script, parameters);
    return ps;
}
Also used : HashMap(java.util.HashMap) PigServer(org.apache.pig.PigServer) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration)

Aggregations

PigServer (org.apache.pig.PigServer)115 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)70 Test (org.junit.Test)59 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)37 Data (org.apache.pig.builtin.mock.Storage.Data)15 File (java.io.File)14 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Path (org.apache.hadoop.fs.Path)4 FileWriter (java.io.FileWriter)3 List (java.util.List)3 Map (java.util.Map)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Pair (org.apache.hive.hcatalog.data.Pair)3 ExecJob (org.apache.pig.backend.executionengine.ExecJob)3