Search in sources :

Example 6 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatHiveThriftCompatibility method testDynamicCols.

/**
 *  Create a table with no explicit schema and ensure its correctly
 *  discovered from the thrift struct.
 */
@Test
public void testDynamicCols() throws Exception {
    Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode());
    Assert.assertEquals(0, driver.run("create external table test_thrift " + "partitioned by (year string) " + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + "with serdeproperties ( " + "  'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + "  'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + "stored as" + "  inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + "  outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'").getResponseCode());
    Assert.assertEquals(0, driver.run("alter table test_thrift add partition (year = '2012') location '" + intStringSeq.getParent() + "'").getResponseCode());
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery("A = load 'test_thrift' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema expectedSchema = new Schema();
    expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER));
    expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY));
    expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER));
    expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY));
    Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A"));
    Iterator<Tuple> iterator = pigServer.openIterator("A");
    Tuple t = iterator.next();
    Assert.assertEquals(1, t.get(0));
    Assert.assertEquals("one", t.get(1));
    Assert.assertEquals(1, t.get(2));
    Assert.assertEquals("2012", t.get(3));
    Assert.assertFalse(iterator.hasNext());
}
Also used : PigServer(org.apache.pig.PigServer) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test)

Example 7 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestSequenceFileReadWrite method setup.

@Before
public void setup() throws Exception {
    dataDir = new File(System.getProperty("java.io.tmpdir") + File.separator + TestSequenceFileReadWrite.class.getCanonicalName() + "-" + System.currentTimeMillis());
    hiveConf = new HiveConf(this.getClass());
    warehouseDir = HCatUtil.makePathASafeFileName(dataDir + File.separator + "warehouse");
    inputFileName = HCatUtil.makePathASafeFileName(dataDir + File.separator + "input.data");
    hiveConf = new HiveConf(this.getClass());
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehouseDir);
    hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    driver = DriverFactory.newDriver(hiveConf);
    SessionState.start(new CliSessionState(hiveConf));
    if (!(new File(warehouseDir).mkdirs())) {
        throw new RuntimeException("Could not create " + warehouseDir);
    }
    int numRows = 3;
    input = new String[numRows];
    for (int i = 0; i < numRows; i++) {
        String col1 = "a" + i;
        String col2 = "b" + i;
        input[i] = i + "," + col1 + "," + col2;
    }
    HcatTestUtils.createTestDataFile(inputFileName, input);
    server = new PigServer(ExecType.LOCAL);
}
Also used : PigServer(org.apache.pig.PigServer) HiveConf(org.apache.hadoop.hive.conf.HiveConf) File(java.io.File) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) Before(org.junit.Before)

Example 8 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatLoaderEncryption method setup.

@Before
public void setup() throws Exception {
    File f = new File(TEST_WAREHOUSE_DIR);
    if (f.exists()) {
        FileUtil.fullyDelete(f);
    }
    if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) {
        throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR);
    }
    HiveConf hiveConf = new HiveConf(this.getClass());
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR);
    hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    String s = hiveConf.get("hdfs.minidfs.basedir");
    if (s == null || s.length() <= 0) {
        // return System.getProperty("test.build.data", "build/test/data") + "/dfs/";
        hiveConf.set("hdfs.minidfs.basedir", System.getProperty("test.build.data", "build/test/data") + "_" + System.currentTimeMillis() + "_" + salt.getAndIncrement() + "/dfs/");
    }
    initEncryptionShim(hiveConf);
    String encryptedTablePath = TEST_WAREHOUSE_DIR + "/encryptedTable";
    SessionState.start(new CliSessionState(hiveConf));
    driver = DriverFactory.newDriver(hiveConf);
    SessionState.get().out = System.out;
    createTable(BASIC_TABLE, "a int, b string");
    createTableInSpecifiedPath(ENCRYPTED_TABLE, "a int, b string", encryptedTablePath, driver);
    associateEncryptionZoneWithPath(encryptedTablePath);
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    basicInputData = new HashMap<Integer, Pair<Integer, String>>();
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            String sj = "S" + j + "S";
            input[k] = si + "\t" + sj;
            basicInputData.put(k, new Pair<Integer, String>(i, sj));
            k++;
        }
    }
    HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input);
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    int i = 0;
    server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i);
    server.registerQuery("store A into '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.executeBatch();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PigServer(org.apache.pig.PigServer) HiveConf(org.apache.hadoop.hive.conf.HiveConf) File(java.io.File) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) Pair(org.apache.hive.hcatalog.data.Pair) Before(org.junit.Before)

Example 9 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatStorerMulti method testStoreBasicTable.

@Test
public void testStoreBasicTable() throws Exception {
    assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
    createTable(BASIC_TABLE, "a int, b string");
    populateBasicFile();
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();");
    server.executeBatch();
    driver.run("select * from " + BASIC_TABLE);
    ArrayList<String> unpartitionedTableValuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(unpartitionedTableValuesReadFromHiveDriver);
    assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 10 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestHCatStorerMulti method testStoreTableMulti.

@Test
public void testStoreTableMulti() throws Exception {
    assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
    createTable(BASIC_TABLE, "a int, b string");
    createTable(PARTITIONED_TABLE, "a int, b string", "bkt string");
    populateBasicFile();
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);");
    server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();");
    server.registerQuery("B2 = filter A by a < 2;");
    server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');");
    server.registerQuery("C2 = filter A by a >= 2;");
    server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');");
    server.executeBatch();
    driver.run("select * from " + BASIC_TABLE);
    ArrayList<String> unpartitionedTableValuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(unpartitionedTableValuesReadFromHiveDriver);
    driver.run("select * from " + PARTITIONED_TABLE);
    ArrayList<String> partitionedTableValuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(partitionedTableValuesReadFromHiveDriver);
    assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size());
    assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size());
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

PigServer (org.apache.pig.PigServer)114 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)68 Test (org.junit.Test)57 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)28 File (java.io.File)16 Data (org.apache.pig.builtin.mock.Storage.Data)15 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)5 FileWriter (java.io.FileWriter)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3