use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatLoaderComplexSchema method verifyWriteRead.
private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List<Tuple> data, List<Tuple> result, boolean provideSchemaToStorer) throws Exception {
MockLoader.setData(tablename + "Input", data);
try {
createTable(tablename, tableSchema);
PigServer server = HCatBaseTest.createPigServer(false);
server.setBatchOn();
server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");");
Schema dumpedASchema = server.dumpSchema("A");
server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + ");");
ExecJob execJob = server.executeBatch().get(0);
if (!execJob.getStatistics().isSuccessful()) {
throw new RuntimeException("Import failed", execJob.getException());
}
// test that schema was loaded correctly
server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.dumpSchema("X");
Iterator<Tuple> it = server.openIterator("X");
int i = 0;
while (it.hasNext()) {
Tuple input = result.get(i++);
Tuple output = it.next();
compareTuples(input, output);
LOG.info("tuple : {} ", output);
}
Schema dumpedXSchema = server.dumpSchema("X");
assertEquals("expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", "", compareIgnoreFiledNames(dumpedASchema, dumpedXSchema));
} finally {
dropTable(tablename);
}
}
use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatLoaderStorer method testReadWrite.
/**
* Test round trip of smallint/tinyint: Hive->Pig->Hive. This is a more general use case in HCatalog:
* 'read some data from Hive, process it in Pig, write result back to a Hive table'
*/
@Test
public void testReadWrite() throws Exception {
final String tblName = "small_ints_table";
final String tblName2 = "pig_hcatalog_1";
File dataDir = new File(TEST_DATA_DIR + File.separator + "testReadWrite");
// Might not exist
FileUtil.fullyDelete(dataDir);
Assert.assertTrue(dataDir.mkdir());
final String INPUT_FILE_NAME = dataDir + "/inputtrw.data";
AbstractHCatLoaderTest.dropTable(tblName, driver);
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { "40\t1" });
AbstractHCatLoaderTest.executeStatementOnDriver("create external table " + tblName + " (my_small_int smallint, my_tiny_int tinyint)" + " row format delimited fields terminated by '\t' stored as textfile location '" + dataDir.toURI().getPath() + "'", driver);
AbstractHCatLoaderTest.dropTable(tblName2, driver);
AbstractHCatLoaderTest.createTableDefaultDB(tblName2, "my_small_int smallint, " + "my_tiny_int " + "tinyint", null, driver, "textfile");
LOG.debug("File=" + INPUT_FILE_NAME);
TestHCatStorer.dumpFile(INPUT_FILE_NAME);
PigServer server = createPigServer(true);
try {
int queryNumber = 1;
logAndRegister(server, "A = load '" + tblName + "' using org.apache.hive.hcatalog.pig.HCatLoader() as (my_small_int:int, my_tiny_int:int);", queryNumber++);
logAndRegister(server, "b = foreach A generate my_small_int + my_tiny_int as my_small_int, my_tiny_int;", queryNumber++);
logAndRegister(server, "store b into '" + tblName2 + "' using org.apache.hive.hcatalog.pig.HCatStorer();", queryNumber);
// perform simple checksum here; make sure nothing got turned to NULL
AbstractHCatLoaderTest.executeStatementOnDriver("select my_small_int from " + tblName2, driver);
ArrayList l = new ArrayList();
driver.getResults(l);
for (Object t : l) {
LOG.debug("t=" + t);
}
Assert.assertEquals("Expected '1' rows; got '" + l.size() + "'", 1, l.size());
int result = Integer.parseInt((String) l.get(0));
Assert.assertEquals("Expected value '41'; got '" + result + "'", 41, result);
} finally {
server.shutdown();
}
}
use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatLoaderStorer method testSmallTinyInt.
/**
* Ensure Pig can read/write tinyint/smallint columns.
*/
@Test
public void testSmallTinyInt() throws Exception {
String readTblName = "test_small_tiny_int";
File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData");
File dataFile = new File(dataDir, "testSmallTinyInt.tsv");
String writeTblName = "test_small_tiny_int_write";
File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv");
// Might not exist
FileUtil.fullyDelete(dataDir);
Assert.assertTrue(dataDir.mkdir());
HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[] { String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) });
// Create a table with smallint/tinyint columns, load data, and query from Hive.
driver.run("drop table if exists " + readTblName);
driver.run("create external table " + readTblName + " (my_small_int smallint, my_tiny_int tinyint)" + " row format delimited fields terminated by '\t' stored as textfile");
driver.run("load data local inpath '" + dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName);
PigServer server = HCatBaseTest.createPigServer(false);
server.registerQuery("data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
// Ensure Pig schema is correct.
Schema schema = server.dumpSchema("data");
Assert.assertEquals(2, schema.getFields().size());
Assert.assertEquals("my_small_int", schema.getField(0).alias);
Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
Assert.assertEquals("my_tiny_int", schema.getField(1).alias);
Assert.assertEquals(DataType.INTEGER, schema.getField(1).type);
// Ensure Pig can read data correctly.
Iterator<Tuple> it = server.openIterator("data");
Tuple t = it.next();
Assert.assertEquals(Integer.valueOf(Short.MIN_VALUE), t.get(0));
Assert.assertEquals(Integer.valueOf(Byte.MIN_VALUE), t.get(1));
t = it.next();
Assert.assertEquals(Integer.valueOf(Short.MAX_VALUE), t.get(0));
Assert.assertEquals(Integer.valueOf(Byte.MAX_VALUE), t.get(1));
Assert.assertFalse(it.hasNext());
// Ensure Pig can write correctly to smallint/tinyint columns. This means values within the
// bounds of the column type are written, and values outside throw an exception.
driver.run("drop table if exists " + writeTblName);
driver.run("create table " + writeTblName + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
// Values within the column type bounds.
HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[] { String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) });
smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED);
// Values outside the column type bounds will fail at runtime.
HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[] { String.format("%d\t%d", Short.MIN_VALUE - 1, 0) });
smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);
HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[] { String.format("%d\t%d", Short.MAX_VALUE + 1, 0) });
smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[] { String.format("%d\t%d", 0, Byte.MIN_VALUE - 1) });
smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);
HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[] { String.format("%d\t%d", 0, Byte.MAX_VALUE + 1) });
smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
}
use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatLoaderStorer method smallTinyIntBoundsCheckHelper.
private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) throws Exception {
driver.run("drop table if exists test_tbl");
driver.run("create table test_tbl (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
PigServer server = HCatBaseTest.createPigServer(false);
server.setBatchOn();
server.registerQuery("data = load '" + data + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
server.registerQuery("store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');");
List<ExecJob> jobs = server.executeBatch();
Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
}
use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatHiveCompatibility method testPartedRead.
@Test
public void testPartedRead() throws Exception {
driver.run("drop table if exists junit_parted_noisd");
String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE";
driver.run(createTable);
// assert that the table created has no hcat instrumentation, and that we're still able to read it.
Table table = client.getTable("default", "junit_parted_noisd");
Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
PigServer server = createPigServer(false);
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);");
logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer('b=42');");
logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();");
Iterator<Tuple> itr = server.openIterator("B");
int i = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
// Contains explicit field "a" and partition "b".
Assert.assertEquals(2, t.size());
Assert.assertEquals(t.get(0), i);
Assert.assertEquals(t.get(1), "42");
i++;
}
Assert.assertFalse(itr.hasNext());
Assert.assertEquals(11, i);
// assert that the table created still has no hcat instrumentation
Table table2 = client.getTable("default", "junit_parted_noisd");
Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
// assert that there is one partition present, and it had hcat instrumentation inserted when it was created.
Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42"));
Assert.assertNotNull(ptn);
Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
driver.run("drop table junit_unparted_noisd");
}
Aggregations