use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testDynamicPartitioningMultiPartColsNoDataInDataNoSpec.
@Test
public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws Exception {
AbstractHCatLoaderTest.dropTable("employee", driver);
AbstractHCatLoaderTest.createTableDefaultDB("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
String[] inputData = {};
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer pig = createPigServer(false);
pig.setBatchOn();
pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
pig.registerQuery("IN = FILTER A BY emp_country == 'IN';");
pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();");
pig.executeBatch();
driver.run("select * from employee");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
assertEquals(0, results.size());
driver.run("drop table employee");
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testStoreWithNoCtorArgs.
@Test
public void testStoreWithNoCtorArgs() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
AbstractHCatLoaderTest.createTableDefaultDB("junit_unparted", "a int, b string", null, driver, storageFormat);
int LOOP_SIZE = 3;
String[] input = new String[LOOP_SIZE * LOOP_SIZE];
int k = 0;
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
input[k++] = si + "\t" + j;
}
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
PigServer server = createPigServer(false);
server.setBatchOn();
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();");
server.executeBatch();
driver.run("select * from junit_unparted");
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
driver.run("drop table junit_unparted");
Iterator<String> itr = res.iterator();
for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) {
assertEquals(input[i], itr.next());
}
assertFalse(itr.hasNext());
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testDateCharTypes.
/**
* Create a data file with datatypes added in 0.13. Read it with Pig and use Pig + HCatStorer to
* write to a Hive table. Then read it using Pig and Hive and make sure results match.
*/
@Test
public void testDateCharTypes() throws Exception {
final String tblName = "junit_date_char";
AbstractHCatLoaderTest.dropTable(tblName, driver);
AbstractHCatLoaderTest.createTableDefaultDB(tblName, "id int, char5 char(5), varchar10 varchar(10), dec52 decimal(5,2)", null, driver, storageFormat);
int NUM_ROWS = 5;
String[] rows = new String[NUM_ROWS];
for (int i = 0; i < NUM_ROWS; i++) {
// since the file is read by Pig, we need to make sure the values are in format that Pig
// understands
// otherwise it will turn the value to NULL on read
rows[i] = i + "\txxxxx\tyyy\t" + 5.2;
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, rows);
LOG.debug("File=" + INPUT_FILE_NAME);
// dumpFile(INPUT_FILE_NAME);
PigServer server = createPigServer(true);
int queryNumber = 1;
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (id:int, char5:chararray, varchar10:chararray, dec52:bigdecimal);", queryNumber++);
logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
try {
driver.run("select * from " + tblName);
} catch (CommandProcessorException e) {
LOG.debug("cpr.respCode=" + e.getResponseCode() + " cpr.errMsg=" + e.getMessage());
}
List l = new ArrayList();
driver.getResults(l);
LOG.debug("Dumping rows via SQL from " + tblName);
/*
* Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String thus
* the timestamp in 't' doesn't match rawData
*/
for (Object t : l) {
LOG.debug(t == null ? null : t.toString());
}
Iterator<Tuple> itr = server.openIterator("B");
int numRowsRead = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
StringBuilder rowFromPig = new StringBuilder();
for (int i = 0; i < t.size(); i++) {
rowFromPig.append(t.get(i)).append("\t");
}
rowFromPig.setLength(rowFromPig.length() - 1);
assertEquals("Comparing Pig to Raw data", rows[numRowsRead], rowFromPig.toString());
// see comment at "Dumping rows via SQL..." for why this doesn't work (for all types)
// assertEquals("Comparing Pig to Hive", rowFromPig.toString(), l.get(numRowsRead));
numRowsRead++;
}
assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME, NUM_ROWS, numRowsRead);
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testStaticPartitioningMultiPartCols.
@Test
public void testStaticPartitioningMultiPartCols() throws Exception {
AbstractHCatLoaderTest.dropTable("employee", driver);
AbstractHCatLoaderTest.createTableDefaultDB("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tKA", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKA", "111240\tKavya\t01/01/2002\tF\tIN\tKA" };
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer pig = createPigServer(false);
pig.setBatchOn();
pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
pig.registerQuery("IN = FILTER A BY emp_country == 'IN' AND emp_state== 'KA';");
pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN, emp_state=KA');");
pig.executeBatch();
driver.run("select * from employee");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
assertEquals(4, results.size());
Collections.sort(results);
assertEquals(inputData[0], results.get(0));
assertEquals(inputData[1], results.get(1));
assertEquals(inputData[2], results.get(2));
assertEquals(inputData[3], results.get(3));
driver.run("drop table employee");
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testStoreInPartiitonedTbl.
@Test
public void testStoreInPartiitonedTbl() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
AbstractHCatLoaderTest.createTableDefaultDB("junit_unparted", "a int", "b string", driver, storageFormat);
int LOOP_SIZE = 11;
String[] input = new String[LOOP_SIZE];
for (int i = 0; i < LOOP_SIZE; i++) {
input[i] = i + "";
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
PigServer server = createPigServer(false);
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);");
server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');");
server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();");
Iterator<Tuple> itr = server.openIterator("B");
int i = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
assertEquals(2, t.size());
assertEquals(t.get(0), i);
assertEquals(t.get(1), "1");
i++;
}
assertFalse(itr.hasNext());
assertEquals(11, i);
// verify the scratch directories has been cleaned up
Path path = new Path(client.getTable("default", "junit_unparted").getSd().getLocation());
FileSystem fs = path.getFileSystem(hiveConf);
assertEquals(1, fs.listStatus(path).length);
}
Aggregations