Search in sources :

Example 81 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatLoaderComplexSchema method verifyWriteRead.

private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List<Tuple> data, List<Tuple> result, boolean provideSchemaToStorer) throws Exception {
    MockLoader.setData(tablename + "Input", data);
    try {
        createTable(tablename, tableSchema);
        PigServer server = HCatBaseTest.createPigServer(false);
        server.setBatchOn();
        server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");");
        Schema dumpedASchema = server.dumpSchema("A");
        server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + ");");
        ExecJob execJob = server.executeBatch().get(0);
        if (!execJob.getStatistics().isSuccessful()) {
            throw new RuntimeException("Import failed", execJob.getException());
        }
        // test that schema was loaded correctly
        server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
        server.dumpSchema("X");
        Iterator<Tuple> it = server.openIterator("X");
        int i = 0;
        while (it.hasNext()) {
            Tuple input = result.get(i++);
            Tuple output = it.next();
            compareTuples(input, output);
            LOG.info("tuple : {} ", output);
        }
        Schema dumpedXSchema = server.dumpSchema("X");
        assertEquals("expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", "", compareIgnoreFiledNames(dumpedASchema, dumpedXSchema));
    } finally {
        dropTable(tablename);
    }
}
Also used : PigServer(org.apache.pig.PigServer) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ExecJob(org.apache.pig.backend.executionengine.ExecJob) Tuple(org.apache.pig.data.Tuple)

Example 82 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatLoaderStorer method testReadWrite.

/**
 * Test round trip of smallint/tinyint: Hive->Pig->Hive.  This is a more general use case in HCatalog:
 * 'read some data from Hive, process it in Pig, write result back to a Hive table'
 */
@Test
public void testReadWrite() throws Exception {
    final String tblName = "small_ints_table";
    final String tblName2 = "pig_hcatalog_1";
    File dataDir = new File(TEST_DATA_DIR + File.separator + "testReadWrite");
    // Might not exist
    FileUtil.fullyDelete(dataDir);
    Assert.assertTrue(dataDir.mkdir());
    final String INPUT_FILE_NAME = dataDir + "/inputtrw.data";
    AbstractHCatLoaderTest.dropTable(tblName, driver);
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { "40\t1" });
    AbstractHCatLoaderTest.executeStatementOnDriver("create external table " + tblName + " (my_small_int smallint, my_tiny_int tinyint)" + " row format delimited fields terminated by '\t' stored as textfile location '" + dataDir.toURI().getPath() + "'", driver);
    AbstractHCatLoaderTest.dropTable(tblName2, driver);
    AbstractHCatLoaderTest.createTableDefaultDB(tblName2, "my_small_int smallint, " + "my_tiny_int " + "tinyint", null, driver, "textfile");
    LOG.debug("File=" + INPUT_FILE_NAME);
    TestHCatStorer.dumpFile(INPUT_FILE_NAME);
    PigServer server = createPigServer(true);
    try {
        int queryNumber = 1;
        logAndRegister(server, "A = load '" + tblName + "' using org.apache.hive.hcatalog.pig.HCatLoader() as (my_small_int:int, my_tiny_int:int);", queryNumber++);
        logAndRegister(server, "b = foreach A generate my_small_int + my_tiny_int as my_small_int, my_tiny_int;", queryNumber++);
        logAndRegister(server, "store b into '" + tblName2 + "' using org.apache.hive.hcatalog.pig.HCatStorer();", queryNumber);
        // perform simple checksum here; make sure nothing got turned to NULL
        AbstractHCatLoaderTest.executeStatementOnDriver("select my_small_int from " + tblName2, driver);
        ArrayList l = new ArrayList();
        driver.getResults(l);
        for (Object t : l) {
            LOG.debug("t=" + t);
        }
        Assert.assertEquals("Expected '1' rows; got '" + l.size() + "'", 1, l.size());
        int result = Integer.parseInt((String) l.get(0));
        Assert.assertEquals("Expected value '41'; got '" + result + "'", 41, result);
    } finally {
        server.shutdown();
    }
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 83 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatLoaderStorer method testSmallTinyInt.

/**
 * Ensure Pig can read/write tinyint/smallint columns.
 */
@Test
public void testSmallTinyInt() throws Exception {
    String readTblName = "test_small_tiny_int";
    File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData");
    File dataFile = new File(dataDir, "testSmallTinyInt.tsv");
    String writeTblName = "test_small_tiny_int_write";
    File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv");
    // Might not exist
    FileUtil.fullyDelete(dataDir);
    Assert.assertTrue(dataDir.mkdir());
    HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[] { String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) });
    // Create a table with smallint/tinyint columns, load data, and query from Hive.
    driver.run("drop table if exists " + readTblName);
    driver.run("create external table " + readTblName + " (my_small_int smallint, my_tiny_int tinyint)" + " row format delimited fields terminated by '\t' stored as textfile");
    driver.run("load data local inpath '" + dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName);
    PigServer server = HCatBaseTest.createPigServer(false);
    server.registerQuery("data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    // Ensure Pig schema is correct.
    Schema schema = server.dumpSchema("data");
    Assert.assertEquals(2, schema.getFields().size());
    Assert.assertEquals("my_small_int", schema.getField(0).alias);
    Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
    Assert.assertEquals("my_tiny_int", schema.getField(1).alias);
    Assert.assertEquals(DataType.INTEGER, schema.getField(1).type);
    // Ensure Pig can read data correctly.
    Iterator<Tuple> it = server.openIterator("data");
    Tuple t = it.next();
    Assert.assertEquals(Integer.valueOf(Short.MIN_VALUE), t.get(0));
    Assert.assertEquals(Integer.valueOf(Byte.MIN_VALUE), t.get(1));
    t = it.next();
    Assert.assertEquals(Integer.valueOf(Short.MAX_VALUE), t.get(0));
    Assert.assertEquals(Integer.valueOf(Byte.MAX_VALUE), t.get(1));
    Assert.assertFalse(it.hasNext());
    // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the
    // bounds of the column type are written, and values outside throw an exception.
    driver.run("drop table if exists " + writeTblName);
    driver.run("create table " + writeTblName + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
    // Values within the column type bounds.
    HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[] { String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) });
    smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED);
    // Values outside the column type bounds will fail at runtime.
    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[] { String.format("%d\t%d", Short.MIN_VALUE - 1, 0) });
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);
    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[] { String.format("%d\t%d", Short.MAX_VALUE + 1, 0) });
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[] { String.format("%d\t%d", 0, Byte.MIN_VALUE - 1) });
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED);
    HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[] { String.format("%d\t%d", 0, Byte.MAX_VALUE + 1) });
    smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED);
}
Also used : PigServer(org.apache.pig.PigServer) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) File(java.io.File) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 84 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatLoaderStorer method smallTinyIntBoundsCheckHelper.

private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) throws Exception {
    driver.run("drop table if exists test_tbl");
    driver.run("create table test_tbl (my_small_int smallint, my_tiny_int tinyint) stored as rcfile");
    PigServer server = HCatBaseTest.createPigServer(false);
    server.setBatchOn();
    server.registerQuery("data = load '" + data + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);");
    server.registerQuery("store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');");
    List<ExecJob> jobs = server.executeBatch();
    Assert.assertEquals(expectedStatus, jobs.get(0).getStatus());
}
Also used : PigServer(org.apache.pig.PigServer) ExecJob(org.apache.pig.backend.executionengine.ExecJob)

Example 85 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatHiveCompatibility method testPartedRead.

@Test
public void testPartedRead() throws Exception {
    driver.run("drop table if exists junit_parted_noisd");
    String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE";
    driver.run(createTable);
    // assert that the table created has no hcat instrumentation, and that we're still able to read it.
    Table table = client.getTable("default", "junit_parted_noisd");
    Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
    PigServer server = createPigServer(false);
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);");
    logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer('b=42');");
    logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Iterator<Tuple> itr = server.openIterator("B");
    int i = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        // Contains explicit field "a" and partition "b".
        Assert.assertEquals(2, t.size());
        Assert.assertEquals(t.get(0), i);
        Assert.assertEquals(t.get(1), "42");
        i++;
    }
    Assert.assertFalse(itr.hasNext());
    Assert.assertEquals(11, i);
    // assert that the table created still has no hcat instrumentation
    Table table2 = client.getTable("default", "junit_parted_noisd");
    Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
    // assert that there is one partition present, and it had hcat instrumentation inserted when it was created.
    Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42"));
    Assert.assertNotNull(ptn);
    Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS));
    driver.run("drop table junit_unparted_noisd");
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Aggregations

PigServer (org.apache.pig.PigServer)115 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)70 Test (org.junit.Test)59 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)37 Data (org.apache.pig.builtin.mock.Storage.Data)15 File (java.io.File)14 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Path (org.apache.hadoop.fs.Path)4 FileWriter (java.io.FileWriter)3 List (java.util.List)3 Map (java.util.Map)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Pair (org.apache.hive.hcatalog.data.Pair)3 ExecJob (org.apache.pig.backend.executionengine.ExecJob)3