Search in sources :

Example 26 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testStoreFuncSimple.

@Test
public void testStoreFuncSimple() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "a int, b string", null, driver, storageFormat);
    int LOOP_SIZE = 3;
    String[] inputData = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            inputData[k++] = si + "\t" + j;
        }
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("drop table junit_unparted");
    Iterator<String> itr = res.iterator();
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            assertEquals(si + "\t" + j, itr.next());
        }
    }
    assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 27 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testPartColsInData.

@Test
public void testPartColsInData() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "a int", "b string", driver, storageFormat);
    int LOOP_SIZE = 11;
    String[] input = new String[LOOP_SIZE];
    for (int i = 0; i < LOOP_SIZE; i++) {
        input[i] = i + "\t1";
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');");
    server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();");
    Iterator<Tuple> itr = server.openIterator("B");
    int i = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        assertEquals(2, t.size());
        assertEquals(t.get(0), i);
        assertEquals(t.get(1), "1");
        i++;
    }
    assertFalse(itr.hasNext());
    assertEquals(LOOP_SIZE, i);
}
Also used : PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 28 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testNoAlias.

@Test
public void testNoAlias() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_parted", driver);
    AbstractHCatLoaderTest.createTable("junit_parted", "a int, b string", "ds string", driver, storageFormat);
    PigServer server = new PigServer(ExecType.LOCAL);
    boolean errCaught = false;
    try {
        server.setBatchOn();
        server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
        server.registerQuery("B = foreach A generate a+10, b;");
        server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');");
        server.executeBatch();
    } catch (PigException fe) {
        PigException pe = LogUtils.getPigException(fe);
        assertTrue(pe instanceof FrontendException);
        assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode());
        assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer."));
        errCaught = true;
    }
    assertTrue(errCaught);
    errCaught = false;
    try {
        server.setBatchOn();
        server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);");
        server.registerQuery("B = foreach A generate a, B;");
        server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');");
        server.executeBatch();
    } catch (PigException fe) {
        PigException pe = LogUtils.getPigException(fe);
        assertTrue(pe instanceof FrontendException);
        assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode());
        assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B"));
        errCaught = true;
    }
    driver.run("drop table junit_parted");
    assertTrue(errCaught);
}
Also used : PigServer(org.apache.pig.PigServer) PigException(org.apache.pig.PigException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 29 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method pigValueRangeTest.

/**
 * This is used to test how Pig values of various data types which are out of range for Hive
 * target column are handled. Currently the options are to raise an error or write NULL. 1. create
 * a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read
 * from Hive table using Pig 5. check that read value is what is expected
 *
 * @param tblName Hive table name to create
 * @param hiveType datatype to use for the single column in table
 * @param pigType corresponding Pig type when loading file into Pig
 * @param goal how out-of-range values from Pig are handled by HCat, may be {@code null}
 * @param inputValue written to file which is read by Pig, thus must be something Pig can read
 *          (e.g. DateTime.toString(), rather than java.sql.Date)
 * @param expectedValue what Pig should see when reading Hive table
 * @param format date format to use for comparison of values since default DateTime.toString()
 *          includes TZ which is meaningless for Hive DATE type
 */
void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception {
    AbstractHCatLoaderTest.dropTable(tblName, driver);
    final String field = "f1";
    AbstractHCatLoaderTest.createTable(tblName, field + " " + hiveType, null, driver, storageFormat);
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue });
    LOG.debug("File=" + INPUT_FILE_NAME);
    dumpFile(INPUT_FILE_NAME);
    PigServer server = createPigServer(true);
    int queryNumber = 1;
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++);
    Iterator<Tuple> firstLoad = server.openIterator("A");
    if (goal == null) {
        logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
    } else {
        FrontendException fe = null;
        try {
            logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++);
        } catch (FrontendException e) {
            fe = e;
        }
        switch(goal) {
            case Null:
                // do nothing, fall through and verify the data
                break;
            case Throw:
                assertTrue("Expected a FrontendException", fe != null);
                assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A");
                // this test is done
                return;
            default:
                assertFalse("Unexpected goal: " + goal, 1 == 1);
        }
    }
    logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
    CommandProcessorResponse cpr = driver.run("select * from " + tblName);
    LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage() + " for table " + tblName);
    List l = new ArrayList();
    driver.getResults(l);
    LOG.debug("Dumping rows via SQL from " + tblName);
    for (Object t : l) {
        LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass());
    }
    Iterator<Tuple> itr = server.openIterator("B");
    int numRowsRead = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        if ("date".equals(hiveType)) {
            DateTime dateTime = (DateTime) t.get(0);
            assertTrue(format != null);
            assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime == null ? null : dateTime.toString(format));
        } else {
            assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString());
        }
        // see comment at "Dumping rows via SQL..." for why this doesn't work
        // assertEquals("Comparing Pig to Hive", t.get(0), l.get(0));
        numRowsRead++;
    }
    assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + tblName, 1, numRowsRead);
/*
     * Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a
     * String thus the timestamp in 't' doesn't match rawData
     */
}
Also used : CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) List(java.util.List) Tuple(org.apache.pig.data.Tuple) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 30 with PigServer

use of org.apache.pig.PigServer in project Resource by lovelifeming.

the class PigOperator method excutePig.

public static void excutePig(String execTypeString, String jarPath, String input, String output) throws IOException {
    PigServer pigServer = new PigServer(execTypeString);
    pigServer.registerJar(jarPath);
    // String input = "/opt/sf/input.txt";
    // String output = "/opt/sf/output.txt";
    pigServer.registerQuery("A = load'" + input + "' using TextLoader();");
    pigServer.registerQuery("B = foreach A generate flatten(tokenize($0));");
    pigServer.registerQuery("C = group B by $1");
    pigServer.registerQuery("D = foreach C generate flatten(group),COUNT(B.$0)");
    pigServer.store("D", output);
}
Also used : PigServer(org.apache.pig.PigServer)

Aggregations

PigServer (org.apache.pig.PigServer)114 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)68 Test (org.junit.Test)57 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 File (java.io.File)16 Data (org.apache.pig.builtin.mock.Storage.Data)15 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 FileWriter (java.io.FileWriter)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3