Search in sources :

Example 1 with ExecJob

use of org.apache.pig.backend.executionengine.ExecJob in project vespa by vespa-engine.

the class VespaStorageTest method assertAllDocumentsOk.

private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
Also used : PigServer(org.apache.pig.PigServer) PigStats(org.apache.pig.tools.pigstats.PigStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) Counters(org.apache.hadoop.mapred.Counters) ExecJob(org.apache.pig.backend.executionengine.ExecJob) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats)

Example 2 with ExecJob

use of org.apache.pig.backend.executionengine.ExecJob in project parquet-mr by apache.

the class TestParquetLoader method testPredicatePushdown.

@Test
public void testPredicatePushdown() throws Exception {
    Configuration conf = new Configuration();
    conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);
    PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
    pigServer.setValidateEachStatement(true);
    String out = "target/out";
    String out2 = "target/out2";
    int rows = 10;
    Data data = Storage.resetData(pigServer);
    List<Tuple> list = new ArrayList<Tuple>();
    for (int i = 0; i < rows; i++) {
        list.add(Storage.tuple(i, i * 1.0, i * 2L, "v" + i));
    }
    data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
    pigServer.deleteFile(out);
    pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
    pigServer.executeBatch();
    pigServer.deleteFile(out2);
    pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
    pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
    pigServer.registerQuery("STORE C into '" + out2 + "' using mock.Storage();");
    List<ExecJob> jobs = pigServer.executeBatch();
    long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();
    assertEquals(2, recordsRead);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Data(org.apache.pig.builtin.mock.Storage.Data) PigServer(org.apache.pig.PigServer) ExecJob(org.apache.pig.backend.executionengine.ExecJob) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 3 with ExecJob

use of org.apache.pig.backend.executionengine.ExecJob in project hive by apache.

the class TestHCatLoaderComplexSchema method verifyWriteRead.

private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List<Tuple> data, List<Tuple> result, boolean provideSchemaToStorer) throws Exception {
    MockLoader.setData(tablename + "Input", data);
    try {
        createTable(tablename, tableSchema);
        PigServer server = HCatBaseTest.createPigServer(false);
        server.setBatchOn();
        server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");");
        Schema dumpedASchema = server.dumpSchema("A");
        server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + ");");
        ExecJob execJob = server.executeBatch().get(0);
        if (!execJob.getStatistics().isSuccessful()) {
            throw new RuntimeException("Import failed", execJob.getException());
        }
        // test that schema was loaded correctly
        server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
        server.dumpSchema("X");
        Iterator<Tuple> it = server.openIterator("X");
        int i = 0;
        while (it.hasNext()) {
            Tuple input = result.get(i++);
            Tuple output = it.next();
            compareTuples(input, output);
            LOG.info("tuple : {} ", output);
        }
        Schema dumpedXSchema = server.dumpSchema("X");
        assertEquals("expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", "", compareIgnoreFiledNames(dumpedASchema, dumpedXSchema));
    } finally {
        dropTable(tablename);
    }
}
Also used : PigServer(org.apache.pig.PigServer) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ExecJob(org.apache.pig.backend.executionengine.ExecJob) Tuple(org.apache.pig.data.Tuple)

Example 4 with ExecJob

use of org.apache.pig.backend.executionengine.ExecJob in project hive by apache.

the class TestHCatLoaderStorer method smallTinyIntBoundsCheckHelper.

private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) throws Exception {
    driver.run("drop table if exists test_tbl");
    driver.run("create table test_tbl (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
    PigServer server = HCatBaseTest.createPigServer(false);
    server.setBatchOn();
    server.registerQuery("data = load '" + data + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
    server.registerQuery("store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');");
    List<ExecJob> jobs = server.executeBatch();
    Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
}
Also used : PigServer(org.apache.pig.PigServer) ExecJob(org.apache.pig.backend.executionengine.ExecJob)

Aggregations

PigServer (org.apache.pig.PigServer)4 ExecJob (org.apache.pig.backend.executionengine.ExecJob)4 Tuple (org.apache.pig.data.Tuple)2 VespaCounters (com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 Counters (org.apache.hadoop.mapred.Counters)1 Data (org.apache.pig.builtin.mock.Storage.Data)1 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)1 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)1 JobStats (org.apache.pig.tools.pigstats.JobStats)1 PigStats (org.apache.pig.tools.pigstats.PigStats)1 MRJobStats (org.apache.pig.tools.pigstats.mapreduce.MRJobStats)1 Test (org.junit.Test)1