Search in sources :

Example 21 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testBagNStruct.

@Test
public void testBagNStruct() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "b string,a struct<a1:int>,  arr_of_struct array<string>, " + "arr_of_struct2 array<struct<s1:string,s2:string>>,  arr_of_struct3 array<struct<s3:string>>", null, driver, storageFormat);
    String[] inputData = new String[] { "zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}" };
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});");
    server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("drop table junit_unparted");
    Iterator<String> itr = res.iterator();
    assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next());
    assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next());
    assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 22 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testStoreFuncAllSimpleTypes.

@Test
public void testStoreFuncAllSimpleTypes() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "a int, b float, c double, d bigint, e string, h boolean, f binary, g binary", null, driver, storageFormat);
    int i = 0;
    String[] input = new String[3];
    // Empty values except first column
    input[i++] = "0\t\t\t\t\t\t\t";
    input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + // First column empty
    "\tbinary-data";
    input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data";
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);");
    // null gets stored into column g which is a binary field.
    server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    Iterator<String> itr = res.iterator();
    String next = itr.next();
    assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next);
    assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next());
    assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next());
    assertFalse(itr.hasNext());
    server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";");
    Iterator<Tuple> iter = server.openIterator("B");
    int count = 0;
    int num5nulls = 0;
    while (iter.hasNext()) {
        Tuple t = iter.next();
        if (t.get(6) == null) {
            num5nulls++;
        } else {
            assertTrue(t.get(6) instanceof DataByteArray);
        }
        assertNull(t.get(7));
        count++;
    }
    assertEquals(3, count);
    assertEquals(1, num5nulls);
    driver.run("drop table junit_unparted");
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 23 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testPartitionPublish.

@Test
public void testPartitionPublish() throws Exception {
    AbstractHCatLoaderTest.dropTable("ptn_fail", driver);
    AbstractHCatLoaderTest.createTable("ptn_fail", "a int, c string", "b string", driver, storageFormat);
    int LOOP_SIZE = 11;
    String[] input = new String[LOOP_SIZE];
    for (int i = 0; i < LOOP_SIZE; i++) {
        input[i] = i + "\tmath";
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, c:chararray);");
    server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + "($0);");
    server.registerQuery("store B into 'ptn_fail' using " + HCatStorer.class.getName() + "('b=math');");
    server.executeBatch();
    String query = "show partitions ptn_fail";
    int retCode = driver.run(query).getResponseCode();
    if (retCode != 0) {
        throw new IOException("Error " + retCode + " running query " + query);
    }
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    assertEquals(0, res.size());
    // Make sure the partitions directory is not in hdfs.
    assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists());
    assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")).exists());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) IOException(java.io.IOException) File(java.io.File) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 24 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testStoreWithNoSchema.

@Test
public void testStoreWithNoSchema() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "a int, b string", null, driver, storageFormat);
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            input[k++] = si + "\t" + j;
        }
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("drop table junit_unparted");
    Iterator<String> itr = res.iterator();
    for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) {
        assertEquals(input[i], itr.next());
    }
    assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 25 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testStoreMultiTables.

@Test
public void testStoreMultiTables() throws Exception {
    AbstractHCatLoaderTest.dropTable("junit_unparted", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted", "a int, b string", null, driver, storageFormat);
    AbstractHCatLoaderTest.dropTable("junit_unparted2", driver);
    AbstractHCatLoaderTest.createTable("junit_unparted2", "a int, b string", null, driver, "RCFILE");
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            input[k++] = si + "\t" + j;
        }
    }
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("B = filter A by a < 2;");
    server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();");
    server.registerQuery("C = filter A by a >= 2;");
    server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();");
    server.executeBatch();
    driver.run("select * from junit_unparted");
    ArrayList<String> res = new ArrayList<String>();
    driver.getResults(res);
    driver.run("select * from junit_unparted2");
    ArrayList<String> res2 = new ArrayList<String>();
    driver.getResults(res2);
    res.addAll(res2);
    driver.run("drop table junit_unparted");
    driver.run("drop table junit_unparted2");
    Iterator<String> itr = res.iterator();
    for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) {
        assertEquals(input[i], itr.next());
    }
    assertFalse(itr.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

PigServer (org.apache.pig.PigServer)114 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)68 Test (org.junit.Test)57 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 File (java.io.File)16 Data (org.apache.pig.builtin.mock.Storage.Data)15 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 FileWriter (java.io.FileWriter)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3