use of org.apache.pig.backend.executionengine.ExecJob in project vespa by vespa-engine.
the class VespaStorageTest method assertAllDocumentsOk.
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
PigServer ps = setup(script, conf);
List<ExecJob> jobs = ps.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
for (JobStats js : stats.getJobGraph()) {
Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
assertNotNull(hadoopCounters);
VespaCounters counters = VespaCounters.get(hadoopCounters);
assertEquals(10, counters.getDocumentsSent());
assertEquals(0, counters.getDocumentsFailed());
assertEquals(10, counters.getDocumentsOk());
}
}
use of org.apache.pig.backend.executionengine.ExecJob in project parquet-mr by apache.
the class TestParquetLoader method testPredicatePushdown.
@Test
public void testPredicatePushdown() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);
PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
pigServer.setValidateEachStatement(true);
String out = "target/out";
String out2 = "target/out2";
int rows = 10;
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < rows; i++) {
list.add(Storage.tuple(i, i * 1.0, i * 2L, "v" + i));
}
data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.deleteFile(out);
pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
pigServer.executeBatch();
pigServer.deleteFile(out2);
pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
pigServer.registerQuery("STORE C into '" + out2 + "' using mock.Storage();");
List<ExecJob> jobs = pigServer.executeBatch();
long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();
assertEquals(2, recordsRead);
}
use of org.apache.pig.backend.executionengine.ExecJob in project hive by apache.
the class TestHCatLoaderComplexSchema method verifyWriteRead.
private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List<Tuple> data, List<Tuple> result, boolean provideSchemaToStorer) throws Exception {
MockLoader.setData(tablename + "Input", data);
try {
createTable(tablename, tableSchema);
PigServer server = HCatBaseTest.createPigServer(false);
server.setBatchOn();
server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");");
Schema dumpedASchema = server.dumpSchema("A");
server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + ");");
ExecJob execJob = server.executeBatch().get(0);
if (!execJob.getStatistics().isSuccessful()) {
throw new RuntimeException("Import failed", execJob.getException());
}
// test that schema was loaded correctly
server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.dumpSchema("X");
Iterator<Tuple> it = server.openIterator("X");
int i = 0;
while (it.hasNext()) {
Tuple input = result.get(i++);
Tuple output = it.next();
compareTuples(input, output);
LOG.info("tuple : {} ", output);
}
Schema dumpedXSchema = server.dumpSchema("X");
assertEquals("expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", "", compareIgnoreFiledNames(dumpedASchema, dumpedXSchema));
} finally {
dropTable(tablename);
}
}
use of org.apache.pig.backend.executionengine.ExecJob in project hive by apache.
the class TestHCatLoaderStorer method smallTinyIntBoundsCheckHelper.
private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) throws Exception {
driver.run("drop table if exists test_tbl");
driver.run("create table test_tbl (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
PigServer server = HCatBaseTest.createPigServer(false);
server.setBatchOn();
server.registerQuery("data = load '" + data + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
server.registerQuery("store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');");
List<ExecJob> jobs = server.executeBatch();
Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
}
Aggregations