use of org.apache.hadoop.hive.ql.parse.repl.dump.io.FileOperations in project hive by apache.
the class PartitionExport method write.
void write(final ReplicationSpec forReplicationSpec) throws InterruptedException {
ExecutorService producer = Executors.newFixedThreadPool(1);
producer.submit(() -> {
for (Partition partition : partitionIterable) {
try {
queue.put(partition);
} catch (InterruptedException e) {
throw new RuntimeException("Error while queuing up the partitions for export of data files", e);
}
}
});
producer.shutdown();
ThreadFactory namingThreadFactory = new ThreadFactoryBuilder().setNameFormat("partition-dump-thread-%d").build();
ExecutorService consumer = Executors.newFixedThreadPool(nThreads, namingThreadFactory);
while (!producer.isTerminated() || !queue.isEmpty()) {
/*
This is removed using a poll because there can be a case where there partitions iterator is empty
but because both the producer and consumer are started simultaneously the while loop will execute
because producer is not terminated but it wont produce anything so queue will be empty and then we
should only wait for a specific time before continuing, as the next loop cycle will fail.
*/
Partition partition = queue.poll(1, TimeUnit.SECONDS);
if (partition == null) {
continue;
}
LOG.debug("scheduling partition dump {}", partition.getName());
consumer.submit(() -> {
String partitionName = partition.getName();
String threadName = Thread.currentThread().getName();
LOG.debug("Thread: {}, start partition dump {}", threadName, partitionName);
Path fromPath = partition.getDataLocation();
try {
// this the data copy
Path rootDataDumpDir = paths.partitionExportDir(partitionName);
new FileOperations(fromPath, rootDataDumpDir, distCpDoAsUser, hiveConf).export(forReplicationSpec);
LOG.debug("Thread: {}, finish partition dump {}", threadName, partitionName);
} catch (Exception e) {
throw new RuntimeException("Error while export of data files", e);
}
});
}
consumer.shutdown();
// may be drive this via configuration as well.
consumer.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
}
use of org.apache.hadoop.hive.ql.parse.repl.dump.io.FileOperations in project hive by apache.
the class TableExport method writeData.
private void writeData(PartitionIterable partitions) throws SemanticException {
try {
if (tableSpec.tableHandle.isPartitioned()) {
if (partitions == null) {
throw new IllegalStateException("partitions cannot be null for partitionTable :" + tableSpec.tableName);
}
new PartitionExport(paths, partitions, distCpDoAsUser, conf).write(replicationSpec);
} else {
Path fromPath = tableSpec.tableHandle.getDataLocation();
// this is the data copy
new FileOperations(fromPath, paths.dataExportDir(), distCpDoAsUser, conf).export(replicationSpec);
}
} catch (Exception e) {
throw new SemanticException(e);
}
}
Aggregations