use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class ExportSemanticAnalyzer method prepareExport.
// FIXME : Move to EximUtil - it's okay for this to stay here for a little while more till we finalize the statics
public static void prepareExport(ASTNode ast, URI toURI, TableSpec ts, ReplicationSpec replicationSpec, Hive db, HiveConf conf, Context ctx, List<Task<? extends Serializable>> rootTasks, HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs, Logger LOG) throws SemanticException {
if (ts != null) {
try {
EximUtil.validateTable(ts.tableHandle);
if (replicationSpec.isInReplicationScope() && ts.tableHandle.isTemporary()) {
// No replication for temporary tables either
ts = null;
} else if (ts.tableHandle.isView()) {
replicationSpec.setIsMetadataOnly(true);
}
} catch (SemanticException e) {
// ignore for replication, error if not.
if (replicationSpec.isInReplicationScope()) {
// null out ts so we can't use it.
ts = null;
} else {
throw e;
}
}
}
try {
FileSystem fs = FileSystem.get(toURI, conf);
Path toPath = new Path(toURI.getScheme(), toURI.getAuthority(), toURI.getPath());
try {
FileStatus tgt = fs.getFileStatus(toPath);
// target exists
if (!tgt.isDir()) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not a directory : " + toURI));
} else {
FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
if (files != null && files.length != 0) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not an empty directory : " + toURI));
}
}
} catch (FileNotFoundException e) {
}
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
}
PartitionIterable partitions = null;
try {
replicationSpec.setCurrentReplicationState(String.valueOf(db.getMSC().getCurrentNotificationEventId().getEventId()));
if ((ts != null) && (ts.tableHandle.isPartitioned())) {
if (ts.specType == TableSpec.SpecType.TABLE_ONLY) {
// TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
if (replicationSpec.isMetadataOnly()) {
partitions = null;
} else {
partitions = new PartitionIterable(db, ts.tableHandle, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
}
} else {
// PARTITIONS specified - partitions inside tableSpec
partitions = new PartitionIterable(ts.partitions);
}
} else {
// Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null.
// or this is a noop-replication export, so we can skip looking at ptns.
partitions = null;
}
Path path = new Path(ctx.getLocalTmpPath(), EximUtil.METADATA_NAME);
EximUtil.createExportDump(FileSystem.getLocal(conf), path, (ts != null ? ts.tableHandle : null), partitions, replicationSpec);
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, path, new Path(toURI), conf);
rootTasks.add(rTask);
LOG.debug("_metadata file written into " + path.toString() + " and then copied to " + toURI.toString());
} catch (Exception e) {
throw new SemanticException(ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e);
}
if (!(replicationSpec.isMetadataOnly() || (ts == null))) {
Path parentPath = new Path(toURI);
if (ts.tableHandle.isPartitioned()) {
for (Partition partition : partitions) {
Path fromPath = partition.getDataLocation();
Path toPartPath = new Path(parentPath, partition.getName());
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf);
rootTasks.add(rTask);
inputs.add(new ReadEntity(partition));
}
} else {
Path fromPath = ts.tableHandle.getDataLocation();
Path toDataPath = new Path(parentPath, EximUtil.DATA_PATH_NAME);
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf);
rootTasks.add(rTask);
inputs.add(new ReadEntity(ts.tableHandle));
}
outputs.add(toWriteEntity(parentPath, conf));
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class ProcessAnalyzeTable method genTableStats.
private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) throws HiveException {
Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass();
ParseContext parseContext = context.parseContext;
Table table = tableScan.getConf().getTableMetadata();
List<Partition> partitions = new ArrayList<>();
if (table.isPartitioned()) {
partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions());
for (Partition partn : partitions) {
LOG.debug("XXX: adding part: " + partn);
context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
TableSpec tableSpec = new TableSpec(table, partitions);
tableScan.getConf().getTableMetadata().setTableSpec(tableSpec);
if (inputFormat.equals(OrcInputFormat.class)) {
// For ORC, there is no Tez Job for table stats.
StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
snjWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// If partition is specified, get pruned partition list
if (partitions.size() > 0) {
snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan));
}
return TaskFactory.get(snjWork, parseContext.getConf());
} else {
StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir());
statsWork.setSourceTask(context.currentTask);
statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
return TaskFactory.get(statsWork, parseContext.getConf());
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class ReplicationSemanticAnalyzer method dumpEvent.
private void dumpEvent(NotificationEvent ev, Path evRoot, Path cmRoot) throws Exception {
long evid = ev.getEventId();
String evidStr = String.valueOf(evid);
ReplicationSpec replicationSpec = getNewEventOnlyReplicationSpec(evidStr);
MessageDeserializer md = MessageFactory.getInstance().getDeserializer();
switch(ev.getEventType()) {
case MessageFactory.CREATE_TABLE_EVENT:
{
CreateTableMessage ctm = md.getCreateTableMessage(ev.getMessage());
LOG.info("Processing#{} CREATE_TABLE message : {}", ev.getEventId(), ev.getMessage());
org.apache.hadoop.hive.metastore.api.Table tobj = ctm.getTableObj();
if (tobj == null) {
LOG.debug("Event#{} was a CREATE_TABLE_EVENT with no table listed");
break;
}
Table qlMdTable = new Table(tobj);
if (qlMdTable.isView()) {
replicationSpec.setIsMetadataOnly(true);
}
Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, null, replicationSpec);
Path dataPath = new Path(evRoot, "data");
Iterable<String> files = ctm.getFiles();
if (files != null) {
// encoded filename/checksum of files, write into _files
FileSystem fs = dataPath.getFileSystem(conf);
Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
try {
for (String file : files) {
fileListWriter.write(file + "\n");
}
} finally {
fileListWriter.close();
}
}
(new DumpMetaData(evRoot, DUMPTYPE.EVENT_CREATE_TABLE, evid, evid, cmRoot)).write();
break;
}
case MessageFactory.ADD_PARTITION_EVENT:
{
AddPartitionMessage apm = md.getAddPartitionMessage(ev.getMessage());
LOG.info("Processing#{} ADD_PARTITION message : {}", ev.getEventId(), ev.getMessage());
Iterable<org.apache.hadoop.hive.metastore.api.Partition> ptns = apm.getPartitionObjs();
if ((ptns == null) || (!ptns.iterator().hasNext())) {
LOG.debug("Event#{} was an ADD_PTN_EVENT with no partitions");
break;
}
org.apache.hadoop.hive.metastore.api.Table tobj = apm.getTableObj();
if (tobj == null) {
LOG.debug("Event#{} was a ADD_PTN_EVENT with no table listed");
break;
}
final Table qlMdTable = new Table(tobj);
Iterable<Partition> qlPtns = Iterables.transform(ptns, new Function<org.apache.hadoop.hive.metastore.api.Partition, Partition>() {
@Nullable
@Override
public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition input) {
if (input == null) {
return null;
}
try {
return new Partition(qlMdTable, input);
} catch (HiveException e) {
throw new IllegalArgumentException(e);
}
}
});
Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
Iterator<PartitionFiles> partitionFilesIter = apm.getPartitionFilesIter().iterator();
for (Partition qlPtn : qlPtns) {
PartitionFiles partitionFiles = partitionFilesIter.next();
Iterable<String> files = partitionFiles.getFiles();
if (files != null) {
// encoded filename/checksum of files, write into _files
Path ptnDataPath = new Path(evRoot, qlPtn.getName());
FileSystem fs = ptnDataPath.getFileSystem(conf);
Path filesPath = new Path(ptnDataPath, EximUtil.FILES_NAME);
BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
try {
for (String file : files) {
fileListWriter.write(file + "\n");
}
} finally {
fileListWriter.close();
}
}
}
(new DumpMetaData(evRoot, DUMPTYPE.EVENT_ADD_PARTITION, evid, evid, cmRoot)).write();
break;
}
case MessageFactory.DROP_TABLE_EVENT:
{
LOG.info("Processing#{} DROP_TABLE message : {}", ev.getEventId(), ev.getMessage());
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_TABLE, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
}
case MessageFactory.DROP_PARTITION_EVENT:
{
LOG.info("Processing#{} DROP_PARTITION message : {}", ev.getEventId(), ev.getMessage());
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_PARTITION, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
}
case MessageFactory.ALTER_TABLE_EVENT:
{
LOG.info("Processing#{} ALTER_TABLE message : {}", ev.getEventId(), ev.getMessage());
AlterTableMessage atm = md.getAlterTableMessage(ev.getMessage());
org.apache.hadoop.hive.metastore.api.Table tobjBefore = atm.getTableObjBefore();
org.apache.hadoop.hive.metastore.api.Table tobjAfter = atm.getTableObjAfter();
if (tobjBefore.getDbName().equals(tobjAfter.getDbName()) && tobjBefore.getTableName().equals(tobjAfter.getTableName())) {
// regular alter scenario
replicationSpec.setIsMetadataOnly(true);
Table qlMdTableAfter = new Table(tobjAfter);
Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTableAfter, null, replicationSpec);
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_TABLE, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
} else {
// rename scenario
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_TABLE, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
}
break;
}
case MessageFactory.ALTER_PARTITION_EVENT:
{
LOG.info("Processing#{} ALTER_PARTITION message : {}", ev.getEventId(), ev.getMessage());
AlterPartitionMessage apm = md.getAlterPartitionMessage(ev.getMessage());
org.apache.hadoop.hive.metastore.api.Table tblObj = apm.getTableObj();
org.apache.hadoop.hive.metastore.api.Partition pobjBefore = apm.getPtnObjBefore();
org.apache.hadoop.hive.metastore.api.Partition pobjAfter = apm.getPtnObjAfter();
boolean renameScenario = false;
Iterator<String> beforeValIter = pobjBefore.getValuesIterator();
Iterator<String> afterValIter = pobjAfter.getValuesIterator();
for (; beforeValIter.hasNext(); ) {
if (!beforeValIter.next().equals(afterValIter.next())) {
renameScenario = true;
break;
}
}
if (!renameScenario) {
// regular partition alter
replicationSpec.setIsMetadataOnly(true);
Table qlMdTable = new Table(tblObj);
List<Partition> qlPtns = new ArrayList<Partition>();
qlPtns.add(new Partition(qlMdTable, pobjAfter));
Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_PARTITION, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
} else {
// rename scenario
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_PARTITION, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
}
}
case MessageFactory.INSERT_EVENT:
{
InsertMessage insertMsg = md.getInsertMessage(ev.getMessage());
String dbName = insertMsg.getDB();
String tblName = insertMsg.getTable();
org.apache.hadoop.hive.metastore.api.Table tobj = db.getMSC().getTable(dbName, tblName);
Table qlMdTable = new Table(tobj);
Map<String, String> partSpec = insertMsg.getPartitionKeyValues();
List<Partition> qlPtns = null;
if (qlMdTable.isPartitioned() && !partSpec.isEmpty()) {
qlPtns = Arrays.asList(db.getPartition(qlMdTable, partSpec, false));
}
Path metaDataPath = new Path(evRoot, EximUtil.METADATA_NAME);
// Mark the replication type as insert into to avoid overwrite while import
replicationSpec.setIsInsert(true);
EximUtil.createExportDump(metaDataPath.getFileSystem(conf), metaDataPath, qlMdTable, qlPtns, replicationSpec);
Iterable<String> files = insertMsg.getFiles();
if (files != null) {
// encoded filename/checksum of files, write into _files
Path dataPath = new Path(evRoot, EximUtil.DATA_PATH_NAME);
Path filesPath = new Path(dataPath, EximUtil.FILES_NAME);
FileSystem fs = dataPath.getFileSystem(conf);
BufferedWriter fileListWriter = new BufferedWriter(new OutputStreamWriter(fs.create(filesPath)));
try {
for (String file : files) {
fileListWriter.write(file + "\n");
}
} finally {
fileListWriter.close();
}
}
LOG.info("Processing#{} INSERT message : {}", ev.getEventId(), ev.getMessage());
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_INSERT, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
}
// TODO : handle other event types
default:
LOG.info("Dummy processing#{} message : {}", ev.getEventId(), ev.getMessage());
DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_UNKNOWN, evid, evid, cmRoot);
dmd.setPayload(ev.getMessage());
dmd.write();
break;
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TestDbTxnManager method addPartitionInput.
private void addPartitionInput(Table t) throws Exception {
Map<String, String> partSpec = new HashMap<String, String>();
partSpec.put("version", Integer.toString(nextInput++));
Partition p = new Partition(t, partSpec, new Path("/dev/null"));
ReadEntity re = new ReadEntity(p);
readEntities.add(re);
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TestDbTxnManager method addPartitionOutput.
private WriteEntity addPartitionOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
Map<String, String> partSpec = new HashMap<String, String>();
partSpec.put("version", Integer.toString(nextInput++));
Partition p = new Partition(t, partSpec, new Path("/dev/null"));
WriteEntity we = new WriteEntity(p, writeType);
writeEntities.add(we);
return we;
}
Aggregations