Search in sources :

Example 61 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class ExportSemanticAnalyzer method prepareExport.

// FIXME : Move to EximUtil - it's okay for this to stay here for a little while more till we finalize the statics
public static void prepareExport(ASTNode ast, URI toURI, TableSpec ts, ReplicationSpec replicationSpec, Hive db, HiveConf conf, Context ctx, List<Task<? extends Serializable>> rootTasks, HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs, Logger LOG) throws SemanticException {
    if (ts != null) {
        try {
            EximUtil.validateTable(ts.tableHandle);
            if (replicationSpec.isInReplicationScope() && ts.tableHandle.isTemporary()) {
                // No replication for temporary tables either
                ts = null;
            } else if (ts.tableHandle.isView()) {
                replicationSpec.setIsMetadataOnly(true);
            }
        } catch (SemanticException e) {
            // ignore for replication, error if not.
            if (replicationSpec.isInReplicationScope()) {
                // null out ts so we can't use it.
                ts = null;
            } else {
                throw e;
            }
        }
    }
    try {
        FileSystem fs = FileSystem.get(toURI, conf);
        Path toPath = new Path(toURI.getScheme(), toURI.getAuthority(), toURI.getPath());
        try {
            FileStatus tgt = fs.getFileStatus(toPath);
            // target exists
            if (!tgt.isDir()) {
                throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not a directory : " + toURI));
            } else {
                FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
                if (files != null && files.length != 0) {
                    throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not an empty directory : " + toURI));
                }
            }
        } catch (FileNotFoundException e) {
        }
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
    }
    PartitionIterable partitions = null;
    try {
        replicationSpec.setCurrentReplicationState(String.valueOf(db.getMSC().getCurrentNotificationEventId().getEventId()));
        if ((ts != null) && (ts.tableHandle.isPartitioned())) {
            if (ts.specType == TableSpec.SpecType.TABLE_ONLY) {
                // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
                if (replicationSpec.isMetadataOnly()) {
                    partitions = null;
                } else {
                    partitions = new PartitionIterable(db, ts.tableHandle, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                }
            } else {
                // PARTITIONS specified - partitions inside tableSpec
                partitions = new PartitionIterable(ts.partitions);
            }
        } else {
            // Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null.
            // or this is a noop-replication export, so we can skip looking at ptns.
            partitions = null;
        }
        Path path = new Path(ctx.getLocalTmpPath(), EximUtil.METADATA_NAME);
        EximUtil.createExportDump(FileSystem.getLocal(conf), path, (ts != null ? ts.tableHandle : null), partitions, replicationSpec);
        Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, path, new Path(toURI), conf);
        rootTasks.add(rTask);
        LOG.debug("_metadata file written into " + path.toString() + " and then copied to " + toURI.toString());
    } catch (Exception e) {
        throw new SemanticException(ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e);
    }
    if (!(replicationSpec.isMetadataOnly() || (ts == null))) {
        Path parentPath = new Path(toURI);
        if (ts.tableHandle.isPartitioned()) {
            for (Partition partition : partitions) {
                Path fromPath = partition.getDataLocation();
                Path toPartPath = new Path(parentPath, partition.getName());
                Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf);
                rootTasks.add(rTask);
                inputs.add(new ReadEntity(partition));
            }
        } else {
            Path fromPath = ts.tableHandle.getDataLocation();
            Path toDataPath = new Path(parentPath, EximUtil.DATA_PATH_NAME);
            Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf);
            rootTasks.add(rTask);
            inputs.add(new ReadEntity(ts.tableHandle));
        }
        outputs.add(toWriteEntity(parentPath, conf));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException)

Example 62 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class ProcessAnalyzeTable method genTableStats.

private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) throws HiveException {
    Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass();
    ParseContext parseContext = context.parseContext;
    Table table = tableScan.getConf().getTableMetadata();
    List<Partition> partitions = new ArrayList<>();
    if (table.isPartitioned()) {
        partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions());
        for (Partition partn : partitions) {
            LOG.debug("XXX: adding part: " + partn);
            context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
        }
    }
    TableSpec tableSpec = new TableSpec(table, partitions);
    tableScan.getConf().getTableMetadata().setTableSpec(tableSpec);
    if (inputFormat.equals(OrcInputFormat.class)) {
        // For ORC, there is no Tez Job for table stats.
        StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
        snjWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        // If partition is specified, get pruned partition list
        if (partitions.size() > 0) {
            snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan));
        }
        return TaskFactory.get(snjWork, parseContext.getConf());
    } else {
        StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
        statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
        statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir());
        statsWork.setSourceTask(context.currentTask);
        statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        return TaskFactory.get(statsWork, parseContext.getConf());
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableSpec(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) ArrayList(java.util.ArrayList) StatsNoJobWork(org.apache.hadoop.hive.ql.plan.StatsNoJobWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 63 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class ReplicationSemanticAnalyzer method dumpEvent.

private void dumpEvent(NotificationEvent ev, Path evRoot, Path cmRoot) throws Exception {
    long evid = ev.getEventId();
    String evidStr = String.valueOf(evid);
    ReplicationSpec replicationSpec = getNewEventOnlyReplicationSpec(evidStr);
    MessageDeserializer md = MessageFactory.getInstance().getDeserializer();
    switch(ev.getEventType()) {
        case MessageFactory.CREATE_TABLE_EVENT:
            {
                CreateTableMessage ctm = md.getCreateTableMessage(ev.getMessage());
                LOG.info("Processing#{} CREATE_TABLE message : {}", ev.getEventId(), ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tobj = ctm.getTableObj();
                if (tobj == null) {
                    LOG.debug("Event#{} was a CREATE_TABLE_EVENT with no table listed");
                    break;
                }
                Table qlMdTable = new Table(tobj);
                if (qlMdTable.isView()) {
                    replicationSpec.setIsMetadataOnly(true);
                }
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, null, replicationSpec);
                Path dataPath = new Path(evRoot, "data");
                Iterable<String> files = ctm.getFiles();
                if (files != null) {
                    // encoded filename/checksum of files, write into _files
                    FileSystem fs = dataPath.getFileSystem(conf);
                    Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
                    BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                    try {
                        for (String file : files) {
                            fileListWriter.write(file + "\n");
                        }
                    } finally {
                        fileListWriter.close();
                    }
                }
                (new DumpMetaData(evRoot, DUMPTYPE.EVENT_CREATE_TABLE, evid, evid, cmRoot)).write();
                break;
            }
        case MessageFactory.ADD_PARTITION_EVENT:
            {
                AddPartitionMessage apm = md.getAddPartitionMessage(ev.getMessage());
                LOG.info("Processing#{} ADD_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                Iterable<org.apache.hadoop.hive.metastore.api.Partition> ptns = apm.getPartitionObjs();
                if ((ptns == null) || (!ptns.iterator().hasNext())) {
                    LOG.debug("Event#{} was an ADD_PTN_EVENT with no partitions");
                    break;
                }
                org.apache.hadoop.hive.metastore.api.Table tobj = apm.getTableObj();
                if (tobj == null) {
                    LOG.debug("Event#{} was a ADD_PTN_EVENT with no table listed");
                    break;
                }
                final Table qlMdTable = new Table(tobj);
                Iterable<Partition> qlPtns = Iterables.transform(ptns, new Function<org.apache.hadoop.hive.metastore.api.Partition, Partition>() {

                    @Nullable
                    @Override
                    public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition input) {
                        if (input == null) {
                            return null;
                        }
                        try {
                            return new Partition(qlMdTable, input);
                        } catch (HiveException e) {
                            throw new IllegalArgumentException(e);
                        }
                    }
                });
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                Iterator<PartitionFiles> partitionFilesIter = apm.getPartitionFilesIter().iterator();
                for (Partition qlPtn : qlPtns) {
                    PartitionFiles partitionFiles = partitionFilesIter.next();
                    Iterable<String> files = partitionFiles.getFiles();
                    if (files != null) {
                        // encoded filename/checksum of files, write into _files
                        Path ptnDataPath = new Path(evRoot, qlPtn.getName());
                        FileSystem fs = ptnDataPath.getFileSystem(conf);
                        Path filesPath = new Path(ptnDataPath, EximUtil.FILES_NAME);
                        BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                        try {
                            for (String file : files) {
                                fileListWriter.write(file + "\n");
                            }
                        } finally {
                            fileListWriter.close();
                        }
                    }
                }
                (new DumpMetaData(evRoot, DUMPTYPE.EVENT_ADD_PARTITION, evid, evid, cmRoot)).write();
                break;
            }
        case MessageFactory.DROP_TABLE_EVENT:
            {
                LOG.info("Processing#{} DROP_TABLE message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_TABLE, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        case MessageFactory.DROP_PARTITION_EVENT:
            {
                LOG.info("Processing#{} DROP_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_PARTITION, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        case MessageFactory.ALTER_TABLE_EVENT:
            {
                LOG.info("Processing#{} ALTER_TABLE message : {}", ev.getEventId(), ev.getMessage());
                AlterTableMessage atm = md.getAlterTableMessage(ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tobjBefore = atm.getTableObjBefore();
                org.apache.hadoop.hive.metastore.api.Table tobjAfter = atm.getTableObjAfter();
                if (tobjBefore.getDbName().equals(tobjAfter.getDbName()) && tobjBefore.getTableName().equals(tobjAfter.getTableName())) {
                    // regular alter scenario
                    replicationSpec.setIsMetadataOnly(true);
                    Table qlMdTableAfter = new Table(tobjAfter);
                    Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                    EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTableAfter, null, replicationSpec);
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_TABLE, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                } else {
                    // rename scenario
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_TABLE, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                }
                break;
            }
        case MessageFactory.ALTER_PARTITION_EVENT:
            {
                LOG.info("Processing#{} ALTER_PARTITION message : {}", ev.getEventId(), ev.getMessage());
                AlterPartitionMessage apm = md.getAlterPartitionMessage(ev.getMessage());
                org.apache.hadoop.hive.metastore.api.Table tblObj = apm.getTableObj();
                org.apache.hadoop.hive.metastore.api.Partition pobjBefore = apm.getPtnObjBefore();
                org.apache.hadoop.hive.metastore.api.Partition pobjAfter = apm.getPtnObjAfter();
                boolean renameScenario = false;
                Iterator<String> beforeValIter = pobjBefore.getValuesIterator();
                Iterator<String> afterValIter = pobjAfter.getValuesIterator();
                for (; beforeValIter.hasNext(); ) {
                    if (!beforeValIter.next().equals(afterValIter.next())) {
                        renameScenario = true;
                        break;
                    }
                }
                if (!renameScenario) {
                    // regular partition alter
                    replicationSpec.setIsMetadataOnly(true);
                    Table qlMdTable = new Table(tblObj);
                    List<Partition> qlPtns = new ArrayList<Partition>();
                    qlPtns.add(new Partition(qlMdTable, pobjAfter));
                    Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                    EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_PARTITION, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                    break;
                } else {
                    // rename scenario
                    DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_PARTITION, evid, evid, cmRoot);
                    dmd.setPayload(ev.getMessage());
                    dmd.write();
                    break;
                }
            }
        case MessageFactory.INSERT_EVENT:
            {
                InsertMessage insertMsg = md.getInsertMessage(ev.getMessage());
                String dbName = insertMsg.getDB();
                String tblName = insertMsg.getTable();
                org.apache.hadoop.hive.metastore.api.Table tobj = db.getMSC().getTable(dbName, tblName);
                Table qlMdTable = new Table(tobj);
                Map<String, String> partSpec = insertMsg.getPartitionKeyValues();
                List<Partition> qlPtns = null;
                if (qlMdTable.isPartitioned() && !partSpec.isEmpty()) {
                    qlPtns = Arrays.asList(db.getPartition(qlMdTable, partSpec, false));
                }
                Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
                // Mark the replication type as insert into to avoid overwrite while import
                replicationSpec.setIsInsert(true);
                EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
                Iterable<String> files = insertMsg.getFiles();
                if (files != null) {
                    // encoded filename/checksum of files, write into _files
                    Path dataPath = new Path(evRoot, EximUtil.DATA_PATH_NAME);
                    Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
                    FileSystem fs = dataPath.getFileSystem(conf);
                    BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
                    try {
                        for (String file : files) {
                            fileListWriter.write(file + "\n");
                        }
                    } finally {
                        fileListWriter.close();
                    }
                }
                LOG.info("Processing#{} INSERT message : {}", ev.getEventId(), ev.getMessage());
                DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_INSERT, evid, evid, cmRoot);
                dmd.setPayload(ev.getMessage());
                dmd.write();
                break;
            }
        // TODO : handle other event types
        default:
            LOG.info("Dummy processing#{} message : {}", ev.getEventId(), ev.getMessage());
            DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_UNKNOWN, evid, evid, cmRoot);
            dmd.setPayload(ev.getMessage());
            dmd.write();
            break;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CreateTableMessage(org.apache.hadoop.hive.metastore.messaging.CreateTableMessage) BufferedWriter(java.io.BufferedWriter) PartitionFiles(org.apache.hadoop.hive.metastore.messaging.PartitionFiles) Function(com.google.common.base.Function) FileSystem(org.apache.hadoop.fs.FileSystem) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) AlterPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AlterPartitionMessage) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) MessageDeserializer(org.apache.hadoop.hive.metastore.messaging.MessageDeserializer) Table(org.apache.hadoop.hive.ql.metadata.Table) InsertMessage(org.apache.hadoop.hive.metastore.messaging.InsertMessage) AlterTableMessage(org.apache.hadoop.hive.metastore.messaging.AlterTableMessage) OutputStreamWriter(java.io.OutputStreamWriter) AddPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Nullable(javax.annotation.Nullable)

Example 64 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TestDbTxnManager method addPartitionInput.

private void addPartitionInput(Table t) throws Exception {
    Map<String, String> partSpec = new HashMap<String, String>();
    partSpec.put("version", Integer.toString(nextInput++));
    Partition p = new Partition(t, partSpec, new Path("/dev/null"));
    ReadEntity re = new ReadEntity(p);
    readEntities.add(re);
}
Also used : Path(org.apache.hadoop.fs.Path) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Partition(org.apache.hadoop.hive.ql.metadata.Partition) HashMap(java.util.HashMap)

Example 65 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TestDbTxnManager method addPartitionOutput.

private WriteEntity addPartitionOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
    Map<String, String> partSpec = new HashMap<String, String>();
    partSpec.put("version", Integer.toString(nextInput++));
    Partition p = new Partition(t, partSpec, new Path("/dev/null"));
    WriteEntity we = new WriteEntity(p, writeType);
    writeEntities.add(we);
    return we;
}
Also used : Path(org.apache.hadoop.fs.Path) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Partition(org.apache.hadoop.hive.ql.metadata.Partition) HashMap(java.util.HashMap) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11