Search in sources :

Example 11 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCompactor method minorCompactWhileStreaming.

@Test
public void minorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, 1);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 12 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCompactor method testMinorCompactionForSplitUpdateWithOnlyInserts.

@Test
public void testMinorCompactionForSplitUpdateWithOnlyInserts() throws Exception {
    String agentInfo = "UT_" + Thread.currentThread().getName();
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    // Insert some data -> this will generate only insert deltas and no delete deltas: delta_1_1
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
    // Insert some data -> this will again generate only insert deltas and no delete deltas: delta_2_2
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 'bar')", driver);
    // Now, compact
    // One important thing to note in this test is that minor compaction always produces
    // delta_x_y and a counterpart delete_delta_x_y, even when there are no delete_delta events.
    // Such a choice has been made to simplify processing of AcidUtils.getAcidState().
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setConf(conf);
    AtomicBoolean stop = new AtomicBoolean(true);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    // Verify that we have got correct set of deltas.
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
    String[] deltas = new String[stat.length];
    Path minorCompactedDelta = null;
    for (int i = 0; i < deltas.length; i++) {
        deltas[i] = stat[i].getPath().getName();
        if (deltas[i].equals("delta_0000001_0000002")) {
            minorCompactedDelta = stat[i].getPath();
        }
    }
    Arrays.sort(deltas);
    String[] expectedDeltas = new String[] { "delta_0000001_0000001_0000", "delta_0000001_0000002", "delta_0000002_0000002_0000" };
    if (!Arrays.deepEquals(expectedDeltas, deltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeltas) + ", found: " + Arrays.toString(deltas));
    }
    checkExpectedTxnsPresent(null, new Path[] { minorCompactedDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 2L, 1);
    // Verify that we have got correct set of delete_deltas.
    FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
    String[] deleteDeltas = new String[deleteDeltaStat.length];
    Path minorCompactedDeleteDelta = null;
    for (int i = 0; i < deleteDeltas.length; i++) {
        deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
        if (deleteDeltas[i].equals("delete_delta_0000001_0000002")) {
            minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
        }
    }
    Arrays.sort(deleteDeltas);
    String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000002" };
    if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
    }
    // There should be no rows in the delete_delta because there have been no delete events.
    checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L, 1);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 13 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCompactor method setup.

@Before
public void setup() throws Exception {
    File f = new File(TEST_WAREHOUSE_DIR);
    if (f.exists()) {
        FileUtil.fullyDelete(f);
    }
    if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) {
        throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR);
    }
    HiveConf hiveConf = new HiveConf(this.getClass());
    hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, "");
    hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, "");
    hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR);
    hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName());
    hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    TxnDbUtil.setConfValues(hiveConf);
    TxnDbUtil.cleanDb(hiveConf);
    TxnDbUtil.prepDb(hiveConf);
    conf = hiveConf;
    msClient = new HiveMetaStoreClient(conf);
    driver = DriverFactory.newDriver(hiveConf);
    SessionState.start(new CliSessionState(hiveConf));
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            String sj = "S" + j + "S";
            input[k] = si + "\t" + sj;
            k++;
        }
    }
    createTestDataFile(BASIC_FILE_NAME, input);
}
Also used : HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) HiveConf(org.apache.hadoop.hive.conf.HiveConf) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) File(java.io.File) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) Before(org.junit.Before)

Example 14 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class WarehouseInstance method initialize.

private void initialize(String cmRoot, String warehouseRoot, Map<String, String> overridesForHiveConf) throws Exception {
    hiveConf = new HiveConf(miniDFSCluster.getConfiguration(0), TestReplicationScenarios.class);
    for (Map.Entry<String, String> entry : overridesForHiveConf.entrySet()) {
        hiveConf.set(entry.getKey(), entry.getValue());
    }
    String metaStoreUri = System.getProperty("test." + HiveConf.ConfVars.METASTOREURIS.varname);
    String hiveWarehouseLocation = System.getProperty("test.warehouse.dir", "/tmp") + Path.SEPARATOR + TestReplicationScenarios.class.getCanonicalName().replace('.', '_') + "_" + System.nanoTime();
    if (metaStoreUri != null) {
        hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreUri);
        return;
    }
    // hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_IN_TEST, hiveInTest);
    // turn on db notification listener on meta store
    hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, warehouseRoot);
    hiveConf.setVar(HiveConf.ConfVars.METASTORE_TRANSACTIONAL_EVENT_LISTENERS, LISTENER_CLASS);
    hiveConf.setBoolVar(HiveConf.ConfVars.REPLCMENABLED, true);
    hiveConf.setBoolVar(HiveConf.ConfVars.FIRE_EVENTS_FOR_DML, true);
    hiveConf.setVar(HiveConf.ConfVars.REPLCMDIR, cmRoot);
    hiveConf.setVar(HiveConf.ConfVars.REPL_FUNCTIONS_ROOT_DIR, functionsRoot);
    hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, "jdbc:derby:memory:${test.tmp.dir}/APP;create=true");
    hiveConf.setVar(HiveConf.ConfVars.REPLDIR, hiveWarehouseLocation + "/hrepl" + uniqueIdentifier + "/");
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
    System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");
    MetaStoreTestUtils.startMetaStoreWithRetry(hiveConf);
    Path testPath = new Path(hiveWarehouseLocation);
    FileSystem testPathFileSystem = FileSystem.get(testPath.toUri(), hiveConf);
    testPathFileSystem.mkdirs(testPath);
    driver = DriverFactory.newDriver(hiveConf);
    SessionState.start(new CliSessionState(hiveConf));
    client = new HiveMetaStoreClient(hiveConf);
    // change the value for the next instance.
    ++uniqueIdentifier;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Map(java.util.Map) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState)

Example 15 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class StorageBasedMetastoreTestBase method setUp.

@Before
public void setUp() throws Exception {
    // Turn on metastore-side authorization
    System.setProperty(HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS.varname, AuthorizationPreEventListener.class.getName());
    System.setProperty(HiveConf.ConfVars.HIVE_METASTORE_AUTHORIZATION_MANAGER.varname, getAuthorizationProvider());
    System.setProperty(HiveConf.ConfVars.HIVE_METASTORE_AUTHENTICATOR_MANAGER.varname, InjectableDummyAuthenticator.class.getName());
    clientHiveConf = createHiveConf();
    MetaStoreTestUtils.startMetaStoreWithRetry(clientHiveConf);
    // Turn off client-side authorization
    clientHiveConf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, false);
    clientHiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    clientHiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    ugi = Utils.getUGI();
    SessionState.start(new CliSessionState(clientHiveConf));
    msc = new HiveMetaStoreClient(clientHiveConf);
    driver = DriverFactory.newDriver(clientHiveConf);
    setupFakeUser();
    InjectableDummyAuthenticator.injectMode(false);
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) AuthorizationPreEventListener(org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) Before(org.junit.Before)

Aggregations

HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)45 HiveConf (org.apache.hadoop.hive.conf.HiveConf)20 Path (org.apache.hadoop.fs.Path)14 Table (org.apache.hadoop.hive.metastore.api.Table)14 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)12 FileSystem (org.apache.hadoop.fs.FileSystem)11 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)11 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)9 Test (org.junit.Test)9 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)8 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)8 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)8 File (java.io.File)7 WindowingException (com.sap.hadoop.windowing.WindowingException)6 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)6 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)6 Before (org.junit.Before)6 BeforeClass (org.junit.BeforeClass)5