Search in sources :

Example 1 with NotificationEvent

use of org.apache.hadoop.hive.metastore.api.NotificationEvent in project hive by apache.

the class ReplicationSemanticAnalyzer method analyzeReplDump.

// REPL DUMP
private void analyzeReplDump(ASTNode ast) throws SemanticException {
    LOG.debug("ReplicationSemanticAnalyzer.analyzeReplDump: " + String.valueOf(dbNameOrPattern) + "." + String.valueOf(tblNameOrPattern) + " from " + String.valueOf(eventFrom) + " to " + String.valueOf(eventTo) + " maxEventLimit " + String.valueOf(maxEventLimit));
    String replRoot = conf.getVar(HiveConf.ConfVars.REPLDIR);
    Path dumpRoot = new Path(replRoot, getNextDumpDir());
    DumpMetaData dmd = new DumpMetaData(dumpRoot);
    Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR));
    Long lastReplId;
    try {
        if (eventFrom == null) {
            // bootstrap case
            Long bootDumpBeginReplId = db.getMSC().getCurrentNotificationEventId().getEventId();
            for (String dbName : matchesDb(dbNameOrPattern)) {
                LOG.debug("ReplicationSemanticAnalyzer: analyzeReplDump dumping db: " + dbName);
                Path dbRoot = dumpDbMetadata(dbName, dumpRoot);
                for (String tblName : matchesTbl(dbName, tblNameOrPattern)) {
                    LOG.debug("ReplicationSemanticAnalyzer: analyzeReplDump dumping table: " + tblName + " to db root " + dbRoot.toUri());
                    dumpTbl(ast, dbName, tblName, dbRoot);
                }
            }
            Long bootDumpEndReplId = db.getMSC().getCurrentNotificationEventId().getEventId();
            LOG.info("Bootstrap object dump phase took from {} to {}", bootDumpBeginReplId, bootDumpEndReplId);
            // Now that bootstrap has dumped all objects related, we have to account for the changes
            // that occurred while bootstrap was happening - i.e. we have to look through all events
            // during the bootstrap period and consolidate them with our dump.
            IMetaStoreClient.NotificationFilter evFilter = EventUtils.getDbTblNotificationFilter(dbNameOrPattern, tblNameOrPattern);
            EventUtils.MSClientNotificationFetcher evFetcher = new EventUtils.MSClientNotificationFetcher(db.getMSC());
            EventUtils.NotificationEventIterator evIter = new EventUtils.NotificationEventIterator(evFetcher, bootDumpBeginReplId, Ints.checkedCast(bootDumpEndReplId - bootDumpBeginReplId) + 1, evFilter);
            // Now we consolidate all the events that happenned during the objdump into the objdump
            while (evIter.hasNext()) {
                NotificationEvent ev = evIter.next();
                Path evRoot = new Path(dumpRoot, String.valueOf(ev.getEventId()));
            // FIXME : implement consolidateEvent(..) similar to dumpEvent(ev,evRoot)
            }
            LOG.info("Consolidation done, preparing to return {},{}->{}", dumpRoot.toUri(), bootDumpBeginReplId, bootDumpEndReplId);
            dmd.setDump(DUMPTYPE.BOOTSTRAP, bootDumpBeginReplId, bootDumpEndReplId, cmRoot);
            dmd.write();
            // Set the correct last repl id to return to the user
            lastReplId = bootDumpEndReplId;
        } else {
            // go through each event, and dump out each event to a event-level dump dir inside dumproot
            if (eventTo == null) {
                eventTo = db.getMSC().getCurrentNotificationEventId().getEventId();
                LOG.debug("eventTo not specified, using current event id : {}", eventTo);
            }
            Integer maxRange = Ints.checkedCast(eventTo - eventFrom + 1);
            if ((maxEventLimit == null) || (maxEventLimit > maxRange)) {
                maxEventLimit = maxRange;
            }
            // TODO : instead of simply restricting by message format, we should eventually
            // move to a jdbc-driver-stype registering of message format, and picking message
            // factory per event to decode. For now, however, since all messages have the
            // same factory, restricting by message format is effectively a guard against
            // older leftover data that would cause us problems.
            IMetaStoreClient.NotificationFilter evFilter = EventUtils.andFilter(EventUtils.getDbTblNotificationFilter(dbNameOrPattern, tblNameOrPattern), EventUtils.getEventBoundaryFilter(eventFrom, eventTo), EventUtils.restrictByMessageFormat(MessageFactory.getInstance().getMessageFormat()));
            EventUtils.MSClientNotificationFetcher evFetcher = new EventUtils.MSClientNotificationFetcher(db.getMSC());
            EventUtils.NotificationEventIterator evIter = new EventUtils.NotificationEventIterator(evFetcher, eventFrom, maxEventLimit, evFilter);
            while (evIter.hasNext()) {
                NotificationEvent ev = evIter.next();
                Path evRoot = new Path(dumpRoot, String.valueOf(ev.getEventId()));
                dumpEvent(ev, evRoot, cmRoot);
            }
            LOG.info("Done dumping events, preparing to return {},{}", dumpRoot.toUri(), eventTo);
            writeOutput(Arrays.asList("incremental", String.valueOf(eventFrom), String.valueOf(eventTo)), dmd.getDumpFilePath());
            dmd.setDump(DUMPTYPE.INCREMENTAL, eventFrom, eventTo, cmRoot);
            dmd.write();
            // Set the correct last repl id to return to the user
            lastReplId = eventTo;
        }
        prepareReturnValues(Arrays.asList(dumpRoot.toUri().toString(), String.valueOf(lastReplId)), dumpSchema);
        setFetchTask(createFetchTask(dumpSchema));
    } catch (Exception e) {
        // TODO : simple wrap & rethrow for now, clean up with error codes
        LOG.warn("Error during analyzeReplDump", e);
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) EventUtils(org.apache.hadoop.hive.metastore.messaging.EventUtils) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException)

Example 2 with NotificationEvent

use of org.apache.hadoop.hive.metastore.api.NotificationEvent in project hive by apache.

the class TestHCatClient method testReplicationTaskIter.

/**
   * Test for event-based replication scenario
   *
   * Does not test if replication actually happened, merely tests if we're able to consume a repl task
   * iter appropriately, calling all the functions expected of the interface, without errors.
   */
@Test
public void testReplicationTaskIter() throws Exception {
    Configuration cfg = new Configuration(hcatConf);
    // set really low batch size to ensure batching
    cfg.set(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX.varname, "10");
    cfg.set(HiveConf.ConfVars.HIVE_REPL_TASK_FACTORY.varname, EximReplicationTaskFactory.class.getName());
    HCatClient sourceMetastore = HCatClient.create(cfg);
    String dbName = "testReplicationTaskIter";
    long baseId = sourceMetastore.getCurrentNotificationEventId();
    {
        // Perform some operations
        // 1: Create a db after dropping if needed => 1 or 2 events
        sourceMetastore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        sourceMetastore.createDatabase(HCatCreateDBDesc.create(dbName).ifNotExists(false).build());
        // 2: Create an unpartitioned table T1 => 1 event
        String tblName1 = "T1";
        List<HCatFieldSchema> cols1 = HCatSchemaUtils.getHCatSchema("a:int,b:string").getFields();
        HCatTable table1 = (new HCatTable(dbName, tblName1)).cols(cols1);
        sourceMetastore.createTable(HCatCreateTableDesc.create(table1).build());
        // 3: Create a partitioned table T2 => 1 event
        String tblName2 = "T2";
        List<HCatFieldSchema> cols2 = HCatSchemaUtils.getHCatSchema("a:int").getFields();
        List<HCatFieldSchema> pcols2 = HCatSchemaUtils.getHCatSchema("b:string").getFields();
        HCatTable table2 = (new HCatTable(dbName, tblName2)).cols(cols2).partCols(pcols2);
        sourceMetastore.createTable(HCatCreateTableDesc.create(table2).build());
        // 4: Add a partition P1 to T2 => 1 event
        HCatTable table2Created = sourceMetastore.getTable(dbName, tblName2);
        Map<String, String> ptnDesc1 = new HashMap<String, String>();
        ptnDesc1.put("b", "test1");
        HCatPartition ptn1 = (new HCatPartition(table2Created, ptnDesc1, makePartLocation(table2Created, ptnDesc1)));
        sourceMetastore.addPartition(HCatAddPartitionDesc.create(ptn1).build());
        for (int i = 0; i < 20; i++) {
            Map<String, String> ptnDesc = new HashMap<String, String>();
            ptnDesc.put("b", "testmul" + i);
            HCatPartition ptn = (new HCatPartition(table2Created, ptnDesc, makePartLocation(table2Created, ptnDesc)));
            sourceMetastore.addPartition(HCatAddPartitionDesc.create(ptn).build());
            sourceMetastore.dropPartitions(dbName, tblName2, ptnDesc, true);
        }
        // 6 : Drop table T1 => 1 event
        sourceMetastore.dropTable(dbName, tblName1, true);
        // 7 : Drop table T2 => 1 event
        sourceMetastore.dropTable(dbName, tblName2, true);
        // verify that the number of events since we began is at least 25 more
        long currId = sourceMetastore.getCurrentNotificationEventId();
        assertTrue("currId[" + currId + "] must be more than 25 greater than baseId[" + baseId + "]", currId > baseId + 25);
    }
    // do rest of tests on db we just picked up above.
    List<HCatNotificationEvent> notifs = sourceMetastore.getNextNotification(0, 0, new IMetaStoreClient.NotificationFilter() {

        @Override
        public boolean accept(NotificationEvent event) {
            return true;
        }
    });
    for (HCatNotificationEvent n : notifs) {
        LOG.info("notif from dblistener:" + n.getEventId() + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName());
    }
    Iterator<ReplicationTask> taskIter = sourceMetastore.getReplicationTasks(0, -1, dbName, null);
    while (taskIter.hasNext()) {
        ReplicationTask task = taskIter.next();
        HCatNotificationEvent n = task.getEvent();
        LOG.info("notif from tasks:" + n.getEventId() + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName() + ",s:" + n.getEventScope());
        LOG.info("task :" + task.getClass().getName());
        if (task.needsStagingDirs()) {
            StagingDirectoryProvider provider = new StagingDirectoryProvider() {

                @Override
                public String getStagingDirectory(String key) {
                    LOG.info("getStagingDirectory(" + key + ") called!");
                    return "/tmp/" + key.replaceAll(" ", "_");
                }
            };
            task.withSrcStagingDirProvider(provider).withDstStagingDirProvider(provider);
        }
        if (task.isActionable()) {
            LOG.info("task was actionable!");
            Function<Command, String> commandDebugPrinter = new Function<Command, String>() {

                @Override
                public String apply(@Nullable Command cmd) {
                    StringBuilder sb = new StringBuilder();
                    String serializedCmd = null;
                    try {
                        serializedCmd = ReplicationUtils.serializeCommand(cmd);
                    } catch (IOException e) {
                        e.printStackTrace();
                        throw new RuntimeException(e);
                    }
                    sb.append("SERIALIZED:" + serializedCmd + "\n");
                    Command command = null;
                    try {
                        command = ReplicationUtils.deserializeCommand(serializedCmd);
                    } catch (IOException e) {
                        e.printStackTrace();
                        throw new RuntimeException(e);
                    }
                    sb.append("CMD:[" + command.getClass().getName() + "]\n");
                    sb.append("EVENTID:[" + command.getEventId() + "]\n");
                    for (String s : command.get()) {
                        sb.append("CMD:" + s);
                        sb.append("\n");
                    }
                    sb.append("Retriable:" + command.isRetriable() + "\n");
                    sb.append("Undoable:" + command.isUndoable() + "\n");
                    if (command.isUndoable()) {
                        for (String s : command.getUndo()) {
                            sb.append("UNDO:" + s);
                            sb.append("\n");
                        }
                    }
                    List<String> locns = command.cleanupLocationsPerRetry();
                    sb.append("cleanupLocationsPerRetry entries :" + locns.size());
                    for (String s : locns) {
                        sb.append("RETRY_CLEANUP:" + s);
                        sb.append("\n");
                    }
                    locns = command.cleanupLocationsAfterEvent();
                    sb.append("cleanupLocationsAfterEvent entries :" + locns.size());
                    for (String s : locns) {
                        sb.append("AFTER_EVENT_CLEANUP:" + s);
                        sb.append("\n");
                    }
                    return sb.toString();
                }
            };
            LOG.info("On src:");
            for (String s : Iterables.transform(task.getSrcWhCommands(), commandDebugPrinter)) {
                LOG.info(s);
            }
            LOG.info("On dest:");
            for (String s : Iterables.transform(task.getDstWhCommands(), commandDebugPrinter)) {
                LOG.info(s);
            }
        } else {
            LOG.info("task was not actionable.");
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Function(com.google.common.base.Function) ReplicationTask(org.apache.hive.hcatalog.api.repl.ReplicationTask) List(java.util.List) ArrayList(java.util.ArrayList) StagingDirectoryProvider(org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) IOException(java.io.IOException) Command(org.apache.hive.hcatalog.api.repl.Command) EximReplicationTaskFactory(org.apache.hive.hcatalog.api.repl.exim.EximReplicationTaskFactory) Map(java.util.Map) HashMap(java.util.HashMap) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 3 with NotificationEvent

use of org.apache.hadoop.hive.metastore.api.NotificationEvent in project hive by apache.

the class TestReplicationTask method testCreate.

@Test
public static void testCreate() throws HCatException {
    Table t = new Table();
    t.setDbName("testdb");
    t.setTableName("testtable");
    NotificationEvent event = new NotificationEvent(0, (int) System.currentTimeMillis(), HCatConstants.HCAT_CREATE_TABLE_EVENT, msgFactory.buildCreateTableMessage(t).toString());
    event.setDbName(t.getDbName());
    event.setTableName(t.getTableName());
    ReplicationTask.resetFactory(null);
    ReplicationTask rtask = ReplicationTask.create(HCatClient.create(new HiveConf()), new HCatNotificationEvent(event));
    assertTrue("Provided factory instantiation should yield CreateTableReplicationTask", rtask instanceof CreateTableReplicationTask);
    ReplicationTask.resetFactory(NoopFactory.class);
    rtask = ReplicationTask.create(HCatClient.create(new HiveConf()), new HCatNotificationEvent(event));
    assertTrue("Provided factory instantiation should yield NoopReplicationTask", rtask instanceof NoopReplicationTask);
    ReplicationTask.resetFactory(null);
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) CreateTableReplicationTask(org.apache.hive.hcatalog.api.repl.exim.CreateTableReplicationTask) CreateTableReplicationTask(org.apache.hive.hcatalog.api.repl.exim.CreateTableReplicationTask) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) Test(org.junit.Test)

Example 4 with NotificationEvent

use of org.apache.hadoop.hive.metastore.api.NotificationEvent in project hive by apache.

the class TestEximReplicationTasks method testCreateTable.

@Test
public void testCreateTable() throws IOException {
    Table t = new Table();
    t.setDbName("testdb");
    t.setTableName("testtable");
    NotificationEvent event = new NotificationEvent(getEventId(), getTime(), HCatConstants.HCAT_CREATE_TABLE_EVENT, msgFactory.buildCreateTableMessage(t).toString());
    event.setDbName(t.getDbName());
    event.setTableName(t.getTableName());
    HCatNotificationEvent hev = new HCatNotificationEvent(event);
    ReplicationTask rtask = ReplicationTask.create(client, hev);
    assertEquals(hev.toString(), rtask.getEvent().toString());
    verifyCreateTableReplicationTask(rtask);
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) NoopReplicationTask(org.apache.hive.hcatalog.api.repl.NoopReplicationTask) ReplicationTask(org.apache.hive.hcatalog.api.repl.ReplicationTask) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) Test(org.junit.Test)

Example 5 with NotificationEvent

use of org.apache.hadoop.hive.metastore.api.NotificationEvent in project hive by apache.

the class TestEximReplicationTasks method testInsert.

@Test
public void testInsert() throws HCatException {
    Table t = new Table();
    t.setDbName("testdb");
    t.setTableName("testtable");
    List<FieldSchema> pkeys = HCatSchemaUtils.getFieldSchemas(HCatSchemaUtils.getHCatSchema("a:int,b:string").getFields());
    t.setPartitionKeys(pkeys);
    Partition p = createPtn(t, Arrays.asList("102", "lmn"));
    List<String> files = Arrays.asList("/tmp/test123");
    NotificationEvent event = new NotificationEvent(getEventId(), getTime(), HCatConstants.HCAT_INSERT_EVENT, msgFactory.buildInsertMessage(t.getDbName(), t.getTableName(), getPtnDesc(t, p), files).toString());
    event.setDbName(t.getDbName());
    event.setTableName(t.getTableName());
    HCatNotificationEvent hev = new HCatNotificationEvent(event);
    ReplicationTask rtask = ReplicationTask.create(client, hev);
    assertEquals(hev.toString(), rtask.getEvent().toString());
    verifyInsertReplicationTask(rtask, t, p);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) NoopReplicationTask(org.apache.hive.hcatalog.api.repl.NoopReplicationTask) ReplicationTask(org.apache.hive.hcatalog.api.repl.ReplicationTask) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) HCatNotificationEvent(org.apache.hive.hcatalog.api.HCatNotificationEvent) Test(org.junit.Test)

Aggregations

NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)52 Test (org.junit.Test)34 Table (org.apache.hadoop.hive.metastore.api.Table)26 NotificationEventResponse (org.apache.hadoop.hive.metastore.api.NotificationEventResponse)23 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)16 ArrayList (java.util.ArrayList)14 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)12 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)12 Partition (org.apache.hadoop.hive.metastore.api.Partition)10 HCatNotificationEvent (org.apache.hive.hcatalog.api.HCatNotificationEvent)10 ReplicationTask (org.apache.hive.hcatalog.api.repl.ReplicationTask)10 NoopReplicationTask (org.apache.hive.hcatalog.api.repl.NoopReplicationTask)9 Database (org.apache.hadoop.hive.metastore.api.Database)8 Index (org.apache.hadoop.hive.metastore.api.Index)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)5 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)5 Function (org.apache.hadoop.hive.metastore.api.Function)4 Order (org.apache.hadoop.hive.metastore.api.Order)3 IOException (java.io.IOException)2