use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testStoreFuncSimple.
@Test
public void testStoreFuncSimple() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
AbstractHCatLoaderTest.createTable("junit_unparted", "a int, b string", null, driver, storageFormat);
int LOOP_SIZE = 3;
String[] inputData = new String[LOOP_SIZE * LOOP_SIZE];
int k = 0;
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
inputData[k++] = si + "\t" + j;
}
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');");
server.executeBatch();
driver.run("select * from junit_unparted");
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
driver.run("drop table junit_unparted");
Iterator<String> itr = res.iterator();
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
assertEquals(si + "\t" + j, itr.next());
}
}
assertFalse(itr.hasNext());
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testPartColsInData.
@Test
public void testPartColsInData() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
AbstractHCatLoaderTest.createTable("junit_unparted", "a int", "b string", driver, storageFormat);
int LOOP_SIZE = 11;
String[] input = new String[LOOP_SIZE];
for (int i = 0; i < LOOP_SIZE; i++) {
input[i] = i + "\t1";
}
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
PigServer server = new PigServer(ExecType.LOCAL);
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');");
server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();");
Iterator<Tuple> itr = server.openIterator("B");
int i = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
assertEquals(2, t.size());
assertEquals(t.get(0), i);
assertEquals(t.get(1), "1");
i++;
}
assertFalse(itr.hasNext());
assertEquals(LOOP_SIZE, i);
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method testNoAlias.
@Test
public void testNoAlias() throws Exception {
AbstractHCatLoaderTest.dropTable("junit_parted", driver);
AbstractHCatLoaderTest.createTable("junit_parted", "a int, b string", "ds string", driver, storageFormat);
PigServer server = new PigServer(ExecType.LOCAL);
boolean errCaught = false;
try {
server.setBatchOn();
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
server.registerQuery("B = foreach A generate a+10, b;");
server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');");
server.executeBatch();
} catch (PigException fe) {
PigException pe = LogUtils.getPigException(fe);
assertTrue(pe instanceof FrontendException);
assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode());
assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer."));
errCaught = true;
}
assertTrue(errCaught);
errCaught = false;
try {
server.setBatchOn();
server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);");
server.registerQuery("B = foreach A generate a, B;");
server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');");
server.executeBatch();
} catch (PigException fe) {
PigException pe = LogUtils.getPigException(fe);
assertTrue(pe instanceof FrontendException);
assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode());
assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B"));
errCaught = true;
}
driver.run("drop table junit_parted");
assertTrue(errCaught);
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatStorerTest method pigValueRangeTest.
/**
* This is used to test how Pig values of various data types which are out of range for Hive
* target column are handled. Currently the options are to raise an error or write NULL. 1. create
* a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read
* from Hive table using Pig 5. check that read value is what is expected
*
* @param tblName Hive table name to create
* @param hiveType datatype to use for the single column in table
* @param pigType corresponding Pig type when loading file into Pig
* @param goal how out-of-range values from Pig are handled by HCat, may be {@code null}
* @param inputValue written to file which is read by Pig, thus must be something Pig can read
* (e.g. DateTime.toString(), rather than java.sql.Date)
* @param expectedValue what Pig should see when reading Hive table
* @param format date format to use for comparison of values since default DateTime.toString()
* includes TZ which is meaningless for Hive DATE type
*/
void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception {
AbstractHCatLoaderTest.dropTable(tblName, driver);
final String field = "f1";
AbstractHCatLoaderTest.createTable(tblName, field + " " + hiveType, null, driver, storageFormat);
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue });
LOG.debug("File=" + INPUT_FILE_NAME);
dumpFile(INPUT_FILE_NAME);
PigServer server = createPigServer(true);
int queryNumber = 1;
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++);
Iterator<Tuple> firstLoad = server.openIterator("A");
if (goal == null) {
logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
} else {
FrontendException fe = null;
try {
logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++);
} catch (FrontendException e) {
fe = e;
}
switch(goal) {
case Null:
// do nothing, fall through and verify the data
break;
case Throw:
assertTrue("Expected a FrontendException", fe != null);
assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A");
// this test is done
return;
default:
assertFalse("Unexpected goal: " + goal, 1 == 1);
}
}
logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
CommandProcessorResponse cpr = driver.run("select * from " + tblName);
LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage() + " for table " + tblName);
List l = new ArrayList();
driver.getResults(l);
LOG.debug("Dumping rows via SQL from " + tblName);
for (Object t : l) {
LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass());
}
Iterator<Tuple> itr = server.openIterator("B");
int numRowsRead = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
if ("date".equals(hiveType)) {
DateTime dateTime = (DateTime) t.get(0);
assertTrue(format != null);
assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime == null ? null : dateTime.toString(format));
} else {
assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString());
}
// see comment at "Dumping rows via SQL..." for why this doesn't work
// assertEquals("Comparing Pig to Hive", t.get(0), l.get(0));
numRowsRead++;
}
assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + tblName, 1, numRowsRead);
/*
* Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a
* String thus the timestamp in 't' doesn't match rawData
*/
}
use of org.apache.pig.PigServer in project Resource by lovelifeming.
the class PigOperator method excutePig.
public static void excutePig(String execTypeString, String jarPath, String input, String output) throws IOException {
PigServer pigServer = new PigServer(execTypeString);
pigServer.registerJar(jarPath);
// String input = "/opt/sf/input.txt";
// String output = "/opt/sf/output.txt";
pigServer.registerQuery("A = load'" + input + "' using TextLoader();");
pigServer.registerQuery("B = foreach A generate flatten(tokenize($0));");
pigServer.registerQuery("C = group B by $1");
pigServer.registerQuery("D = foreach C generate flatten(group),COUNT(B.$0)");
pigServer.store("D", output);
}
Aggregations