Search in sources :

Example 1 with PartitionFiles

use of org.apache.hadoop.hive.metastore.messaging.PartitionFiles in project hive by apache.

the class AddPartitionHandler method handle.

@Override
public void handle(Context withinContext) throws Exception {
    LOG.info("Processing#{} ADD_PARTITION message : {}", fromEventId(), eventMessageAsJSON);
    // dump partition related events for metadata-only dump.
    if (withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY)) {
        return;
    }
    AddPartitionMessage apm = (AddPartitionMessage) eventMessage;
    org.apache.hadoop.hive.metastore.api.Table tobj = apm.getTableObj();
    if (tobj == null) {
        LOG.debug("Event#{} was a ADD_PTN_EVENT with no table listed", fromEventId());
        return;
    }
    final Table qlMdTable = new Table(tobj);
    if (!Utils.shouldReplicate(withinContext.replicationSpec, qlMdTable, true, withinContext.getTablesForBootstrap(), withinContext.oldReplScope, withinContext.hiveConf)) {
        return;
    }
    Iterable<org.apache.hadoop.hive.metastore.api.Partition> ptns = apm.getPartitionObjs();
    if ((ptns == null) || (!ptns.iterator().hasNext())) {
        LOG.debug("Event#{} was an ADD_PTN_EVENT with no partitions", fromEventId());
        return;
    }
    Iterable<Partition> qlPtns = StreamSupport.stream(ptns.spliterator(), true).map(input -> {
        if (input == null) {
            return null;
        }
        try {
            return new Partition(qlMdTable, input);
        } catch (HiveException e) {
            throw new IllegalArgumentException(e);
        }
    }).collect(Collectors.toList());
    Path metaDataPath = new Path(withinContext.eventRoot, EximUtil.METADATA_NAME);
    EximUtil.createExportDump(metaDataPath.getFileSystem(withinContext.hiveConf), metaDataPath, qlMdTable, qlPtns, withinContext.replicationSpec, withinContext.hiveConf);
    boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET);
    Iterator<PartitionFiles> partitionFilesIter = apm.getPartitionFilesIter().iterator();
    // list would be empty. So, it is enough to check hasNext outside the loop.
    if (partitionFilesIter.hasNext()) {
        for (Partition qlPtn : qlPtns) {
            Iterable<String> files = partitionFilesIter.next().getFiles();
            if (files != null) {
                if (copyAtLoad) {
                    // encoded filename/checksum of files, write into _files
                    Path ptnDataPath = new Path(withinContext.eventRoot, EximUtil.DATA_PATH_NAME + File.separator + qlPtn.getName());
                    writeEncodedDumpFiles(withinContext, files, ptnDataPath);
                } else {
                    for (String file : files) {
                        writeFileEntry(qlMdTable, qlPtn, file, withinContext);
                    }
                }
            }
        }
    }
    withinContext.createDmd(this).write();
}
Also used : Iterator(java.util.Iterator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Table(org.apache.hadoop.hive.ql.metadata.Table) DumpType(org.apache.hadoop.hive.ql.parse.repl.DumpType) Collectors(java.util.stream.Collectors) File(java.io.File) PartitionFiles(org.apache.hadoop.hive.metastore.messaging.PartitionFiles) Partition(org.apache.hadoop.hive.ql.metadata.Partition) EventMessage(org.apache.hadoop.hive.metastore.messaging.EventMessage) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) EximUtil(org.apache.hadoop.hive.ql.parse.EximUtil) Path(org.apache.hadoop.fs.Path) Utils(org.apache.hadoop.hive.ql.parse.repl.dump.Utils) StreamSupport(java.util.stream.StreamSupport) AddPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PartitionFiles(org.apache.hadoop.hive.metastore.messaging.PartitionFiles) AddPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage)

Example 2 with PartitionFiles

use of org.apache.hadoop.hive.metastore.messaging.PartitionFiles in project hive by apache.

the class ReplicationSemanticAnalyzer method dumpEvent.

private void dumpEvent(NotificationEvent ev, Path evRoot, Path cmRoot) throws Exception {
    long evid = ev.getEventId();
    String evidStr = String.valueOf(evid);
    ReplicationSpec replicationSpec = getNewEventOnlyReplicationSpec(evidStr);
    MessageDeserializer md = MessageFactory.getInstance().getDeserializer();
    switch(ev.getEventType()) {
        case MessageFactory.CREATE_TABLE_EVENT:
            {
                CreateTableMessage ctm = md.getCreateTableMessage(ev.getMessage());
                LOG.info("Processing#{} CREATE_TABLE message : {}", ev.getEventId(), ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tobj = ctm.getTableObj();
                if (tobj == null) {
                    LOG.debug("Event#{} was a CREATE_TABLE_EVENT with no table listed");
                    break;
                }
                Table qlMdTable = new Table(tobj);
                if (qlMdTable.isView()) {
                    replicationSpec.setIsMetadataOnly(true);
                }
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, null, replicationSpec);
                Path dataPath = new Path(evRoot, "data");
                Iterable<String> files = ctm.getFiles();
                if (files != null) {
                    // encoded filename/checksum of files, write into _files
                    FileSystem fs = dataPath.getFileSystem(conf);
                    Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
                    BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                    try {
                        for (String file : files) {
                            fileListWriter.write(file + "\n");
                        }
                    } finally {
                        fileListWriter.close();
                    }
                }
                (new DumpMetaData(evRoot, DUMPTYPE.EVENT_CREATE_TABLE, evid, evid, cmRoot)).write();
                break;
            }
        case MessageFactory.ADD_PARTITION_EVENT:
            {
                AddPartitionMessage apm = md.getAddPartitionMessage(ev.getMessage());
                LOG.info("Processing#{} ADD_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                Iterable<org.apache.hadoop.hive.metastore.api.Partition> ptns = apm.getPartitionObjs();
                if ((ptns == null) || (!ptns.iterator().hasNext())) {
                    LOG.debug("Event#{} was an ADD_PTN_EVENT with no partitions");
                    break;
                }
                org.apache.hadoop.hive.metastore.api.Table tobj = apm.getTableObj();
                if (tobj == null) {
                    LOG.debug("Event#{} was a ADD_PTN_EVENT with no table listed");
                    break;
                }
                final Table qlMdTable = new Table(tobj);
                Iterable<Partition> qlPtns = Iterables.transform(ptns, new Function<org.apache.hadoop.hive.metastore.api.Partition, Partition>() {

                    @Nullable
                    @Override
                    public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition input) {
                        if (input == null) {
                            return null;
                        }
                        try {
                            return new Partition(qlMdTable, input);
                        } catch (HiveException e) {
                            throw new IllegalArgumentException(e);
                        }
                    }
                });
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                Iterator<PartitionFiles> partitionFilesIter = apm.getPartitionFilesIter().iterator();
                for (Partition qlPtn : qlPtns) {
                    PartitionFiles partitionFiles = partitionFilesIter.next();
                    Iterable<String> files = partitionFiles.getFiles();
                    if (files != null) {
                        // encoded filename/checksum of files, write into _files
                        Path ptnDataPath = new Path(evRoot, qlPtn.getName());
                        FileSystem fs = ptnDataPath.getFileSystem(conf);
                        Path filesPath = new Path(ptnDataPath, EximUtil.FILES_NAME);
                        BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                        try {
                            for (String file : files) {
                                fileListWriter.write(file + "\n");
                            }
                        } finally {
                            fileListWriter.close();
                        }
                    }
                }
                (new DumpMetaData(evRoot, DUMPTYPE.EVENT_ADD_PARTITION, evid, evid, cmRoot)).write();
                break;
            }
        case MessageFactory.DROP_TABLE_EVENT:
            {
                LOG.info("Processing#{} DROP_TABLE message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_TABLE, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        case MessageFactory.DROP_PARTITION_EVENT:
            {
                LOG.info("Processing#{} DROP_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_PARTITION, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        case MessageFactory.ALTER_TABLE_EVENT:
            {
                LOG.info("Processing#{} ALTER_TABLE message : {}", ev.getEventId(), ev.getMessage());
                AlterTableMessage atm = md.getAlterTableMessage(ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tobjBefore = atm.getTableObjBefore();
                org.apache.hadoop.hive.metastore.api.Table tobjAfter = atm.getTableObjAfter();
                if (tobjBefore.getDbName().equals(tobjAfter.getDbName()) && tobjBefore.getTableName().equals(tobjAfter.getTableName())) {
                    // regular alter scenario
                    replicationSpec.setIsMetadataOnly(true);
                    Table qlMdTableAfter = new Table(tobjAfter);
                    Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                    EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTableAfter, null, replicationSpec);
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_TABLE, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                } else {
                    // rename scenario
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_TABLE, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                }
                break;
            }
        case MessageFactory.ALTER_PARTITION_EVENT:
            {
                LOG.info("Processing#{} ALTER_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                AlterPartitionMessage apm = md.getAlterPartitionMessage(ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tblObj = apm.getTableObj();
                org.apache.hadoop.hive.metastore.api.Partition pobjBefore = apm.getPtnObjBefore();
                org.apache.hadoop.hive.metastore.api.Partition pobjAfter = apm.getPtnObjAfter();
                boolean renameScenario = false;
                Iterator<String> beforeValIter = pobjBefore.getValuesIterator();
                Iterator<String> afterValIter = pobjAfter.getValuesIterator();
                for (; beforeValIter.hasNext(); ) {
                    if (!beforeValIter.next().equals(afterValIter.next())) {
                        renameScenario = true;
                        break;
                    }
                }
                if (!renameScenario) {
                    // regular partition alter
                    replicationSpec.setIsMetadataOnly(true);
                    Table qlMdTable = new Table(tblObj);
                    List<Partition> qlPtns = new ArrayList<Partition>();
                    qlPtns.add(new Partition(qlMdTable, pobjAfter));
                    Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                    EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_PARTITION, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                    break;
                } else {
                    // rename scenario
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_PARTITION, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                    break;
                }
            }
        case MessageFactory.INSERT_EVENT:
            {
                InsertMessage insertMsg = md.getInsertMessage(ev.getMessage());
                String dbName = insertMsg.getDB();
                String tblName = insertMsg.getTable();
                org.apache.hadoop.hive.metastore.api.Table tobj = db.getMSC().getTable(dbName, tblName);
                Table qlMdTable = new Table(tobj);
                Map<String, String> partSpec = insertMsg.getPartitionKeyValues();
                List<Partition> qlPtns = null;
                if (qlMdTable.isPartitioned() && !partSpec.isEmpty()) {
                    qlPtns = Arrays.asList(db.getPartition(qlMdTable, partSpec, false));
                }
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                // Mark the replication type as insert into to avoid overwrite while import
                replicationSpec.setIsInsert(true);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                Iterable<String> files = insertMsg.getFiles();
                if (files != null) {
                    // encoded filename/checksum of files, write into _files
                    Path dataPath = new Path(evRoot, EximUtil.DATA_PATH_NAME);
                    Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
                    FileSystem fs = dataPath.getFileSystem(conf);
                    BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                    try {
                        for (String file : files) {
                            fileListWriter.write(file + "\n");
                        }
                    } finally {
                        fileListWriter.close();
                    }
                }
                LOG.info("Processing#{} INSERT message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_INSERT, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        // TODO : handle other event types
        default:
            LOG.info("Dummy processing#{} message : {}", ev.getEventId(), ev.getMessage());
            DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_UNKNOWN, evid, evid, cmRoot);
            dmd.setPayload(ev.getMessage());
            dmd.write();
            break;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CreateTableMessage(org.apache.hadoop.hive.metastore.messaging.CreateTableMessage) BufferedWriter(java.io.BufferedWriter) PartitionFiles(org.apache.hadoop.hive.metastore.messaging.PartitionFiles) Function(com.google.common.base.Function) FileSystem(org.apache.hadoop.fs.FileSystem) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) AlterPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AlterPartitionMessage) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) MessageDeserializer(org.apache.hadoop.hive.metastore.messaging.MessageDeserializer) Table(org.apache.hadoop.hive.ql.metadata.Table) InsertMessage(org.apache.hadoop.hive.metastore.messaging.InsertMessage) AlterTableMessage(org.apache.hadoop.hive.metastore.messaging.AlterTableMessage) OutputStreamWriter(java.io.OutputStreamWriter) AddPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Nullable(javax.annotation.Nullable)

Aggregations

Iterator (java.util.Iterator)2 Path (org.apache.hadoop.fs.Path)2 AddPartitionMessage (org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage)2 PartitionFiles (org.apache.hadoop.hive.metastore.messaging.PartitionFiles)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2 Function (com.google.common.base.Function)1 BufferedWriter (java.io.BufferedWriter)1 File (java.io.File)1 OutputStreamWriter (java.io.OutputStreamWriter)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 StreamSupport (java.util.stream.StreamSupport)1 Nullable (javax.annotation.Nullable)1 FileSystem (org.apache.hadoop.fs.FileSystem)1