Search in sources :

Example 1 with ReplLoadWork

use of org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork in project hive by apache.

the class TestReplicationScenarios method getReplLoadRootTask.

private Task getReplLoadRootTask(String sourceDb, String replicadb, boolean isIncrementalDump, Tuple tuple) throws Throwable {
    HiveConf confTemp = driverMirror.getConf();
    Path loadPath = new Path(tuple.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    ReplicationMetricCollector metricCollector;
    if (isIncrementalDump) {
        metricCollector = new IncrementalLoadMetricCollector(replicadb, tuple.dumpLocation, 0, confTemp);
    } else {
        metricCollector = new BootstrapLoadMetricCollector(replicadb, tuple.dumpLocation, 0, confTemp);
    }
    /* When 'hive.repl.retain.custom.db.locations.on.target' is enabled, the first iteration of repl load would
       run only database creation task, and only in next iteration of Repl Load Task execution, remaining tasks will be
       executed. Hence disabling this to perform the test on task optimization.  */
    confTemp.setBoolVar(HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET, false);
    ReplLoadWork replLoadWork = new ReplLoadWork(confTemp, loadPath.toString(), sourceDb, replicadb, null, null, isIncrementalDump, Long.valueOf(tuple.lastReplId), 0L, metricCollector, false);
    Task replLoadTask = TaskFactory.get(replLoadWork, confTemp);
    replLoadTask.initialize(null, null, new TaskQueue(driver.getContext()), driver.getContext());
    replLoadTask.executeTask(null);
    Hive.closeCurrent();
    return replLoadWork.getRootTask();
}
Also used : Path(org.apache.hadoop.fs.Path) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ReplicationMetricCollector(org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) BootstrapLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector)

Example 2 with ReplLoadWork

use of org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork in project hive by apache.

the class TestReplicationMetricUpdateOnFailure method testReplLoadRecoverableMissingStage.

@Test
public void testReplLoadRecoverableMissingStage() throws Exception {
    String dumpDir = TEST_PATH + Path.SEPARATOR + testName.getMethodName();
    MetricCollector.getInstance().deinit();
    BootstrapLoadMetricCollector metricCollector = new BootstrapLoadMetricCollector(null, TEST_PATH, 0, conf);
    ReplLoadWork replLoadWork = Mockito.mock(ReplLoadWork.class);
    Mockito.when(replLoadWork.getTargetDatabase()).thenReturn("dummy");
    Mockito.when(replLoadWork.getDumpDirectory()).thenReturn(new Path(dumpDir + Path.SEPARATOR + "test").toString());
    Mockito.when(replLoadWork.getMetricCollector()).thenReturn(metricCollector);
    Mockito.when(replLoadWork.getRootTask()).thenThrow(recoverableException);
    Task replLoadTask = TaskFactory.get(replLoadWork, conf);
    // ensure stages are missing initially and execute without reporting start metrics
    Assert.assertEquals(0, MetricCollector.getInstance().getMetrics().size());
    Assert.assertThrows(RuntimeException.class, () -> replLoadTask.execute());
    performRecoverableChecks("REPL_LOAD");
}
Also used : Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) BootstrapLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector) Test(org.junit.Test)

Example 3 with ReplLoadWork

use of org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork in project hive by apache.

the class TestReplicationMetricUpdateOnFailure method testReplLoadFailure.

@Test
public void testReplLoadFailure() throws Exception {
    String dumpDir = TEST_PATH + Path.SEPARATOR + testName.getMethodName();
    MetricCollector.getInstance().deinit();
    IncrementalLoadMetricCollector metricCollector = new IncrementalLoadMetricCollector(null, TEST_PATH, 0, conf);
    ReplLoadWork replLoadWork = Mockito.mock(ReplLoadWork.class);
    Mockito.when(replLoadWork.getTargetDatabase()).thenReturn("dummy");
    Mockito.when(replLoadWork.getDumpDirectory()).thenReturn(new Path(dumpDir + Path.SEPARATOR + "test").toString());
    Mockito.when(replLoadWork.getMetricCollector()).thenReturn(metricCollector);
    Mockito.when(replLoadWork.getRootTask()).thenThrow(recoverableException, nonRecoverableException);
    Task replLoadTask = TaskFactory.get(replLoadWork, conf);
    String stageName = "REPL_LOAD";
    metricCollector.reportStageStart(stageName, new HashMap<>());
    Assert.assertThrows(RuntimeException.class, () -> replLoadTask.execute());
    performRecoverableChecks(stageName);
    metricCollector.reportStageStart(stageName, new HashMap<>());
    Assert.assertThrows(RuntimeException.class, () -> replLoadTask.execute());
    performNonRecoverableChecks(dumpDir, stageName);
}
Also used : Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) Test(org.junit.Test)

Example 4 with ReplLoadWork

use of org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork in project hive by apache.

the class ReplicationSemanticAnalyzer method analyzeReplLoad.

/*
   * Example dump dirs we need to be able to handle :
   *
   * for: hive.repl.rootdir = staging/
   * Then, repl dumps will be created in staging/<dumpdir>
   *
   * single-db-dump: staging/blah12345 will contain a db dir for the db specified
   *  blah12345/
   *   default/
   *    _metadata
   *    tbl1/
   *      _metadata
   *      dt=20160907/
   *        _files
   *    tbl2/
   *    tbl3/
   *    unptn_tbl/
   *      _metadata
   *      _files
   *
   * multi-db-dump: staging/bar12347 will contain dirs for each db covered
   * staging/
   *  bar12347/
   *   default/
   *     ...
   *   sales/
   *     ...
   *
   * single table-dump: staging/baz123 will contain a table object dump inside
   * staging/
   *  baz123/
   *    _metadata
   *    dt=20150931/
   *      _files
   *
   * incremental dump : staging/blue123 will contain dirs for each event inside.
   * staging/
   *  blue123/
   *    34/
   *    35/
   *    36/
   */
private void analyzeReplLoad(ASTNode ast) throws SemanticException {
    try {
        initReplLoad(ast);
    } catch (HiveException e) {
        throw new SemanticException(e);
    }
    // import job in its place.
    try {
        assert (sourceDbNameOrPattern != null);
        Path loadPath = getCurrentLoadPath();
        // If repl status of target is greater than dumps, don't do anything as the load for the latest dump is done
        if (ReplUtils.failedWithNonRecoverableError(ReplUtils.getLatestDumpPath(ReplUtils.getEncodedDumpRootPath(conf, sourceDbNameOrPattern.toLowerCase()), conf), conf)) {
            throw new Exception(ErrorMsg.REPL_FAILED_WITH_NON_RECOVERABLE_ERROR.getMsg());
        }
        if (loadPath != null) {
            DumpMetaData dmd = new DumpMetaData(loadPath, conf);
            boolean evDump = false;
            // we will decide what hdfs locations needs to be copied over here as well.
            if (dmd.isIncrementalDump()) {
                LOG.debug("{} contains an incremental dump", loadPath);
                evDump = true;
            } else {
                LOG.debug("{} contains an bootstrap dump", loadPath);
            }
            ReplLoadWork replLoadWork = new ReplLoadWork(conf, loadPath.toString(), sourceDbNameOrPattern, replScope.getDbName(), dmd.getReplScope(), queryState.getLineageState(), evDump, dmd.getEventTo(), dmd.getDumpExecutionId(), initMetricCollection(!evDump, loadPath.toString(), replScope.getDbName(), dmd.getDumpExecutionId()), dmd.isReplScopeModified());
            rootTasks.add(TaskFactory.get(replLoadWork, conf));
        } else {
            LOG.warn("Previous Dump Already Loaded");
        }
    } catch (Exception e) {
        // TODO : simple wrap & rethrow for now, clean up with error codes
        throw new SemanticException(e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 5 with ReplLoadWork

use of org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork in project hive by apache.

the class TestReplicationMetricUpdateOnFailure method testReplLoadNonRecoverableMissingStage.

@Test
public void testReplLoadNonRecoverableMissingStage() throws Exception {
    String dumpDir = TEST_PATH + Path.SEPARATOR + testName.getMethodName();
    MetricCollector.getInstance().deinit();
    IncrementalLoadMetricCollector metricCollector = new IncrementalLoadMetricCollector(null, TEST_PATH, 0, conf);
    ReplLoadWork replLoadWork = Mockito.mock(ReplLoadWork.class);
    Mockito.when(replLoadWork.getTargetDatabase()).thenReturn("dummy");
    Mockito.when(replLoadWork.getDumpDirectory()).thenReturn(new Path(dumpDir + Path.SEPARATOR + "test").toString());
    Mockito.when(replLoadWork.getMetricCollector()).thenReturn(metricCollector);
    Mockito.when(replLoadWork.getRootTask()).thenThrow(nonRecoverableException);
    Task replLoadTask = TaskFactory.get(replLoadWork, conf);
    // ensure stages are missing initially and execute without reporting start metrics
    Assert.assertEquals(0, MetricCollector.getInstance().getMetrics().size());
    Assert.assertThrows(RuntimeException.class, () -> replLoadTask.execute());
    performNonRecoverableChecks(dumpDir, "REPL_LOAD");
}
Also used : Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)5 ReplLoadWork (org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork)5 Task (org.apache.hadoop.hive.ql.exec.Task)4 IncrementalLoadMetricCollector (org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector)3 Test (org.junit.Test)3 BootstrapLoadMetricCollector (org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector)2 IOException (java.io.IOException)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 TaskQueue (org.apache.hadoop.hive.ql.TaskQueue)1 DDLTask (org.apache.hadoop.hive.ql.ddl.DDLTask)1 MoveTask (org.apache.hadoop.hive.ql.exec.MoveTask)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 DumpMetaData (org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData)1 ReplicationMetricCollector (org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector)1