use of org.apache.pig.PigServer in project hive by apache.
the class TestHCatStorerWrapper method testStoreExternalTableWithExternalDir.
@Test
public void testStoreExternalTableWithExternalDir() throws Exception {
File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString());
tmpExternalDir.deleteOnExit();
String part_val = "100";
driver.run("drop table junit_external");
String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE";
driver.run(createTable);
int LOOP_SIZE = 3;
String[] inputData = new String[LOOP_SIZE * LOOP_SIZE];
int k = 0;
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
inputData[k++] = si + "\t" + j;
}
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer server = HCatBaseTest.createPigServer(false);
server.setBatchOn();
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');");
server.executeBatch();
Assert.assertTrue(tmpExternalDir.exists());
boolean found = false;
File[] f = tmpExternalDir.listFiles();
if (f != null) {
for (File fin : f) {
if (fin.getPath().contains("part-m-00000")) {
found = true;
}
}
}
Assert.assertTrue(found);
driver.run("select * from junit_external");
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
driver.run("drop table junit_external");
Iterator<String> itr = res.iterator();
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
Assert.assertEquals(si + "\t" + j + "\t" + part_val, itr.next());
}
}
Assert.assertFalse(itr.hasNext());
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testStaticPartitioningMultiPartColsNoData.
@Test
public void testStaticPartitioningMultiPartColsNoData() throws Exception {
AbstractHCatLoaderTest.dropTable("employee", driver);
AbstractHCatLoaderTest.createTableDefaultDB("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
String[] inputData = {};
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer pig = createPigServer(false);
pig.setBatchOn();
pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
pig.registerQuery("IN = FILTER A BY emp_country == 'IN' AND emp_state== 'KA';");
pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN, emp_state=KA');");
pig.executeBatch();
driver.run("select * from employee");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
assertEquals(0, results.size());
driver.run("drop table employee");
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testDynamicPartitioningMultiPartColsInDataNoSpec.
@Test
public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws Exception {
AbstractHCatLoaderTest.dropTable("employee", driver);
AbstractHCatLoaderTest.createTableDefaultDB("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" };
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer pig = createPigServer(false);
pig.setBatchOn();
pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
pig.registerQuery("IN = FILTER A BY emp_country == 'IN';");
pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();");
pig.executeBatch();
driver.run("select * from employee");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
assertEquals(4, results.size());
Collections.sort(results);
assertEquals(inputData[0], results.get(0));
assertEquals(inputData[1], results.get(1));
assertEquals(inputData[2], results.get(2));
assertEquals(inputData[3], results.get(3));
driver.run("drop table employee");
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testEmptyStore.
@Test
public void testEmptyStore() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
AbstractHCatLoaderTest.createTableDefaultDB("junit_unparted", "a int, b string", null, driver, storageFormat);
int LOOP_SIZE = 3;
String[] input = new String[LOOP_SIZE * LOOP_SIZE];
int k = 0;
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
input[k++] = si + "\t" + j;
}
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
PigServer server = createPigServer(false);
server.setBatchOn();
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
server.registerQuery("B = filter A by a > 100;");
server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');");
server.executeBatch();
driver.run("select * from junit_unparted");
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
driver.run("drop table junit_unparted");
Iterator<String> itr = res.iterator();
assertFalse(itr.hasNext());
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testMultiPartColsInData.
@Test
public void testMultiPartColsInData() throws Exception {
AbstractHCatLoaderTest.dropTable("employee", driver);
AbstractHCatLoaderTest.createTableDefaultDB("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" };
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer pig = createPigServer(false);
pig.setBatchOn();
pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
pig.registerQuery("TN = FILTER A BY emp_state == 'TN';");
pig.registerQuery("KA = FILTER A BY emp_state == 'KA';");
pig.registerQuery("KL = FILTER A BY emp_state == 'KL';");
pig.registerQuery("AP = FILTER A BY emp_state == 'AP';");
pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');");
pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');");
pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');");
pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');");
pig.executeBatch();
driver.run("select * from employee");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
assertEquals(4, results.size());
Collections.sort(results);
assertEquals(inputData[0], results.get(0));
assertEquals(inputData[1], results.get(1));
assertEquals(inputData[2], results.get(2));
assertEquals(inputData[3], results.get(3));
// verify the directories in table location
Path path = new Path(client.getTable("default", "employee").getSd().getLocation());
FileSystem fs = path.getFileSystem(hiveConf);
assertEquals(1, fs.listStatus(path).length);
assertEquals(4, fs.listStatus(new Path(client.getTable("default", "employee").getSd().getLocation() + File.separator + "emp_country=IN")).length);
driver.run("drop table employee");
}
Aggregations