Search in sources :

Example 46 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testMultiPartColsInData.

@Test
public void testMultiPartColsInData() throws Exception {
    driver.run("drop table employee");
    String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat;
    int retCode = driver.run(createTable).getResponseCode();
    if (retCode != 0) {
        throw new IOException("Failed to create table.");
    }
    String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" };
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.setBatchOn();
    pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
    pig.registerQuery("TN = FILTER A BY emp_state == 'TN';");
    pig.registerQuery("KA = FILTER A BY emp_state == 'KA';");
    pig.registerQuery("KL = FILTER A BY emp_state == 'KL';");
    pig.registerQuery("AP = FILTER A BY emp_state == 'AP';");
    pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');");
    pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');");
    pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');");
    pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');");
    pig.executeBatch();
    driver.run("select * from employee");
    ArrayList<String> results = new ArrayList<String>();
    driver.getResults(results);
    assertEquals(4, results.size());
    Collections.sort(results);
    assertEquals(inputData[0], results.get(0));
    assertEquals(inputData[1], results.get(1));
    assertEquals(inputData[2], results.get(2));
    assertEquals(inputData[3], results.get(3));
    // verify the directories in table location
    Path path = new Path(client.getTable("default", "employee").getSd().getLocation());
    FileSystem fs = path.getFileSystem(hiveConf);
    assertEquals(1, fs.listStatus(path).length);
    assertEquals(4, fs.listStatus(new Path(client.getTable("default", "employee").getSd().getLocation() + File.separator + "emp_country=IN")).length);
    driver.run("drop table employee");
}
Also used : Path(org.apache.hadoop.fs.Path) PigServer(org.apache.pig.PigServer) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 47 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testEmptyStore.

@Test
public void testEmptyStore() throws IOException, CommandNeedRetryException {
    driver.run("drop table junit_unparted");
    String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat;
    int retCode = driver.run(createTable).getResponseCode();
    if (retCode != 0) {
        throw new IOException("Failed to create table.");
    }
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            input[k++] = si + "\t" + j;
        }
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("B = filter A by a > 100;");
    server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("drop table junit_unparted");
    Iterator<String> itr = res.iterator();
    assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 48 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testDynamicPartitioningMultiPartColsNoDataInDataNoSpec.

@Test
public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException {
    driver.run("drop table if exists employee");
    String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat;
    int retCode = driver.run(createTable).getResponseCode();
    if (retCode != 0) {
        throw new IOException("Failed to create table.");
    }
    String[] inputData = {};
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.setBatchOn();
    pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
    pig.registerQuery("IN = FILTER A BY emp_country == 'IN';");
    pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();");
    pig.executeBatch();
    driver.run("select * from employee");
    ArrayList<String> results = new ArrayList<String>();
    driver.getResults(results);
    assertEquals(0, results.size());
    driver.run("drop table employee");
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 49 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testDateCharTypes.

/**
   * Create a data file with datatypes added in 0.13. Read it with Pig and use Pig + HCatStorer to
   * write to a Hive table. Then read it using Pig and Hive and make sure results match.
   */
@Test
public void testDateCharTypes() throws Exception {
    final String tblName = "junit_date_char";
    AbstractHCatLoaderTest.dropTable(tblName, driver);
    AbstractHCatLoaderTest.createTable(tblName, "id int, char5 char(5), varchar10 varchar(10), dec52 decimal(5,2)", null, driver, storageFormat);
    int NUM_ROWS = 5;
    String[] rows = new String[NUM_ROWS];
    for (int i = 0; i < NUM_ROWS; i++) {
        // since the file is read by Pig, we need to make sure the values are in format that Pig
        // understands
        // otherwise it will turn the value to NULL on read
        rows[i] = i + "\txxxxx\tyyy\t" + 5.2;
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, rows);
    LOG.debug("File=" + INPUT_FILE_NAME);
    // dumpFile(INPUT_FILE_NAME);
    PigServer server = createPigServer(true);
    int queryNumber = 1;
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (id:int, char5:chararray, varchar10:chararray, dec52:bigdecimal);", queryNumber++);
    logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
    logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
    CommandProcessorResponse cpr = driver.run("select * from " + tblName);
    LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage());
    List l = new ArrayList();
    driver.getResults(l);
    LOG.debug("Dumping rows via SQL from " + tblName);
    /*
     * Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String thus
     * the timestamp in 't' doesn't match rawData
     */
    for (Object t : l) {
        LOG.debug(t == null ? null : t.toString());
    }
    Iterator<Tuple> itr = server.openIterator("B");
    int numRowsRead = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        StringBuilder rowFromPig = new StringBuilder();
        for (int i = 0; i < t.size(); i++) {
            rowFromPig.append(t.get(i)).append("\t");
        }
        rowFromPig.setLength(rowFromPig.length() - 1);
        assertEquals("Comparing Pig to Raw data", rows[numRowsRead], rowFromPig.toString());
        // see comment at "Dumping rows via SQL..." for why this doesn't work (for all types)
        // assertEquals("Comparing Pig to Hive", rowFromPig.toString(), l.get(numRowsRead));
        numRowsRead++;
    }
    assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME, NUM_ROWS, numRowsRead);
}
Also used : CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) ArrayList(java.util.ArrayList) PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) List(java.util.List) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 50 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatStorerWrapper method testStoreExternalTableWithExternalDir.

@Test
public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException {
    File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString());
    tmpExternalDir.deleteOnExit();
    String part_val = "100";
    driver.run("drop table junit_external");
    String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE";
    Assert.assertEquals(0, driver.run(createTable).getResponseCode());
    int LOOP_SIZE = 3;
    String[] inputData = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            inputData[k++] = si + "\t" + j;
        }
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');");
    server.executeBatch();
    Assert.assertTrue(tmpExternalDir.exists());
    boolean found = false;
    File[] f = tmpExternalDir.listFiles();
    if (f != null) {
        for (File fin : f) {
            if (fin.getPath().contains("part-m-00000")) {
                found = true;
            }
        }
    }
    Assert.assertTrue(found);
    driver.run("select * from junit_external");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("drop table junit_external");
    Iterator<String> itr = res.iterator();
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            Assert.assertEquals(si + "\t" + j + "\t" + part_val, itr.next());
        }
    }
    Assert.assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) File(java.io.File) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest) Test(org.junit.Test)

Aggregations

PigServer (org.apache.pig.PigServer)50 Test (org.junit.Test)38 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 ArrayList (java.util.ArrayList)22 Tuple (org.apache.pig.data.Tuple)20 IOException (java.io.IOException)17 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)11 File (java.io.File)10 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 Before (org.junit.Before)4 Properties (java.util.Properties)3 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)3 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 Driver (org.apache.hadoop.hive.ql.Driver)3 Pair (org.apache.hive.hcatalog.data.Pair)3 RandomAccessFile (java.io.RandomAccessFile)2 List (java.util.List)2