Search in sources :

Example 16 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testConvertBooleanToInt.

@Test
public void testConvertBooleanToInt() throws Exception {
    String tbl = "test_convert_boolean_to_int";
    String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt";
    File inputDataDir = new File(inputFileName).getParentFile();
    inputDataDir.mkdir();
    String[] lines = new String[] { "llama\ttrue", "alpaca\tfalse" };
    HcatTestUtils.createTestDataFile(inputFileName, lines);
    assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode());
    assertEquals(0, driver.run("create external table " + tbl + " (a string, b boolean) row format delimited fields terminated by '\t'" + " stored as textfile location 'file:///" + inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode());
    Properties properties = new Properties();
    properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true");
    properties.put("stop.on.failure", Boolean.TRUE.toString());
    PigServer server = new PigServer(ExecType.LOCAL, properties);
    server.registerQuery("data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema schema = server.dumpSchema("data");
    assertEquals(2, schema.getFields().size());
    assertEquals("a", schema.getField(0).alias);
    assertEquals(DataType.CHARARRAY, schema.getField(0).type);
    assertEquals("b", schema.getField(1).alias);
    if (PigHCatUtil.pigHasBooleanSupport()) {
        assertEquals(DataType.BOOLEAN, schema.getField(1).type);
    } else {
        assertEquals(DataType.INTEGER, schema.getField(1).type);
    }
    Iterator<Tuple> iterator = server.openIterator("data");
    Tuple t = iterator.next();
    assertEquals("llama", t.get(0));
    assertEquals(1, t.get(1));
    t = iterator.next();
    assertEquals("alpaca", t.get(0));
    assertEquals(0, t.get(1));
    assertFalse(iterator.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Properties(java.util.Properties) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 17 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testReadPartitionedBasic.

@Test
public void testReadPartitionedBasic() throws Exception {
    PigServer server = createPigServer(false);
    driver.run("select * from " + PARTITIONED_TABLE);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size());
    server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedWSchema = server.dumpSchema("W");
    List<FieldSchema> Wfields = dumpedWSchema.getFields();
    assertEquals(3, Wfields.size());
    assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Wfields.get(0).type == DataType.INTEGER);
    assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
    assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
    assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
    Iterator<Tuple> WIter = server.openIterator("W");
    Collection<Pair<Integer, String>> valuesRead = new ArrayList<Pair<Integer, String>>();
    while (WIter.hasNext()) {
        Tuple t = WIter.next();
        assertTrue(t.size() == 3);
        assertNotNull(t.get(0));
        assertNotNull(t.get(1));
        assertNotNull(t.get(2));
        assertTrue(t.get(0).getClass() == Integer.class);
        assertTrue(t.get(1).getClass() == String.class);
        assertTrue(t.get(2).getClass() == String.class);
        valuesRead.add(new Pair<Integer, String>((Integer) t.get(0), (String) t.get(1)));
        if ((Integer) t.get(0) < 2) {
            assertEquals("0", t.get(2));
        } else {
            assertEquals("1", t.get(2));
        }
    }
    assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size());
    server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P1filter = filter P1 by bkt == '0';");
    Iterator<Tuple> P1Iter = server.openIterator("P1filter");
    int count1 = 0;
    while (P1Iter.hasNext()) {
        Tuple t = P1Iter.next();
        assertEquals("0", t.get(2));
        assertEquals(1, t.get(0));
        count1++;
    }
    assertEquals(3, count1);
    server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P2filter = filter P2 by bkt == '1';");
    Iterator<Tuple> P2Iter = server.openIterator("P2filter");
    int count2 = 0;
    while (P2Iter.hasNext()) {
        Tuple t = P2Iter.next();
        assertEquals("1", t.get(2));
        assertTrue(((Integer) t.get(0)) > 1);
        count2++;
    }
    assertEquals(6, count2);
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Pair(org.apache.hive.hcatalog.data.Pair) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 18 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testColumnarStorePushdown2.

/**
 * Tests the failure case caused by HIVE-10752
 * @throws Exception
 */
@Test
public void testColumnarStorePushdown2() throws Exception {
    PigServer server = createPigServer(false);
    server.registerQuery("A = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("B = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("C = join A by name, B by name;");
    server.registerQuery("D = foreach C generate B::studentid;");
    server.registerQuery("E = ORDER D by studentid asc;");
    Iterator<Tuple> iter = server.openIterator("E");
    Tuple t = iter.next();
    assertEquals(42, t.get(0));
    t = iter.next();
    assertEquals(1337, t.get(0));
}
Also used : PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 19 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testDatePartitionPushUp.

/**
 * Test if we can read a date partitioned table
 */
@Test
public void testDatePartitionPushUp() throws Exception {
    PigServer server = createPigServer(false);
    server.registerQuery("X = load '" + PARTITIONED_DATE_TABLE + "' using " + HCatLoader.class.getName() + "();");
    server.registerQuery("Y = filter X by dt == ToDate('2016-07-14','yyyy-MM-dd');");
    Iterator<Tuple> YIter = server.openIterator("Y");
    int numTuplesRead = 0;
    while (YIter.hasNext()) {
        Tuple t = YIter.next();
        assertEquals(t.size(), 2);
        numTuplesRead++;
    }
    assertTrue("Expected " + 1 + "; found " + numTuplesRead, numTuplesRead == 1);
}
Also used : PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 20 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatStorerTest method testDynamicPartitioningMultiPartColsInDataPartialSpec.

@Test
public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws Exception {
    AbstractHCatLoaderTest.dropTable("employee", driver);
    AbstractHCatLoaderTest.createTable("employee", "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", "emp_country STRING , emp_state STRING", driver, storageFormat);
    String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" };
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.setBatchOn();
    pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);");
    pig.registerQuery("IN = FILTER A BY emp_country == 'IN';");
    pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');");
    pig.executeBatch();
    driver.run("select * from employee");
    ArrayList<String> results = new ArrayList<String>();
    driver.getResults(results);
    assertEquals(4, results.size());
    Collections.sort(results);
    assertEquals(inputData[0], results.get(0));
    assertEquals(inputData[1], results.get(1));
    assertEquals(inputData[2], results.get(2));
    assertEquals(inputData[3], results.get(3));
    driver.run("drop table employee");
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

PigServer (org.apache.pig.PigServer)114 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)68 Test (org.junit.Test)57 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 File (java.io.File)16 Data (org.apache.pig.builtin.mock.Storage.Data)15 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 FileWriter (java.io.FileWriter)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3