use of java.util.concurrent.ConcurrentHashMap in project hive by apache.
the class Utilities method getInputSummary.
/**
* Calculate the total size of input files.
*
* @param ctx
* the hadoop job context
* @param work
* map reduce job plan
* @param filter
* filter to apply to the input paths before calculating size
* @return the summary of all the input paths.
* @throws IOException
*/
public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter) throws IOException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
long[] summary = { 0, 0, 0 };
final Set<Path> pathNeedProcess = new HashSet<>();
// this method will avoid number of threads out of control.
synchronized (INPUT_SUMMARY_LOCK) {
// For each input path, calculate the total size.
for (Path path : work.getPathToAliases().keySet()) {
Path p = path;
if (filter != null && !filter.accept(p)) {
continue;
}
ContentSummary cs = ctx.getCS(path);
if (cs == null) {
if (path == null) {
continue;
}
pathNeedProcess.add(path);
} else {
summary[0] += cs.getLength();
summary[1] += cs.getFileCount();
summary[2] += cs.getDirectoryCount();
}
}
// Process the case when name node call is needed
final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
ArrayList<Future<?>> results = new ArrayList<Future<?>>();
final ExecutorService executor;
int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), pathNeedProcess.size());
if (numExecutors > 1) {
LOG.info("Using " + numExecutors + " threads for getContentSummary");
executor = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Summary-%d").build());
} else {
executor = null;
}
HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
@Override
public void interrupt() {
for (Path path : pathNeedProcess) {
try {
path.getFileSystem(ctx.getConf()).close();
} catch (IOException ignore) {
LOG.debug("Failed to close filesystem", ignore);
}
}
if (executor != null) {
executor.shutdownNow();
}
}
});
try {
Configuration conf = ctx.getConf();
JobConf jobConf = new JobConf(conf);
for (Path path : pathNeedProcess) {
final Path p = path;
final String pathStr = path.toString();
// All threads share the same Configuration and JobConf based on the
// assumption that they are thread safe if only read operations are
// executed. It is not stated in Hadoop's javadoc, the sourcce codes
// clearly showed that they made efforts for it and we believe it is
// thread safe. Will revisit this piece of codes if we find the assumption
// is not correct.
final Configuration myConf = conf;
final JobConf myJobConf = jobConf;
final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
final Map<Path, ArrayList<String>> pathToAlias = work.getPathToAliases();
final PartitionDesc partDesc = work.getPathToPartitionInfo().get(p);
Runnable r = new Runnable() {
@Override
public void run() {
try {
Class<? extends InputFormat> inputFormatCls = partDesc.getInputFileFormatClass();
InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(inputFormatCls, myJobConf);
if (inputFormatObj instanceof ContentSummaryInputFormat) {
ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
return;
}
String metaTableStorage = null;
if (partDesc.getTableDesc() != null && partDesc.getTableDesc().getProperties() != null) {
metaTableStorage = partDesc.getTableDesc().getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, null);
}
if (partDesc.getProperties() != null) {
metaTableStorage = partDesc.getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, metaTableStorage);
}
HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, metaTableStorage);
if (handler instanceof InputEstimator) {
long total = 0;
TableDesc tableDesc = partDesc.getTableDesc();
InputEstimator estimator = (InputEstimator) handler;
for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
JobConf jobConf = new JobConf(myJobConf);
TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
Utilities.setColumnNameList(jobConf, scanOp, true);
Utilities.setColumnTypeList(jobConf, scanOp, true);
PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
total += estimator.estimate(jobConf, scanOp, -1).getTotalLength();
}
resultMap.put(pathStr, new ContentSummary(total, -1, -1));
} else {
// todo: should nullify summary for non-native tables,
// not to be selected as a mapjoin target
FileSystem fs = p.getFileSystem(myConf);
resultMap.put(pathStr, fs.getContentSummary(p));
}
} catch (Exception e) {
// We safely ignore this exception for summary data.
// We don't update the cache to protect it from polluting other
// usages. The worst case is that IOException will always be
// retried for another getInputSummary(), which is fine as
// IOException is not considered as a common case.
LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
}
}
};
if (executor == null) {
r.run();
} else {
Future<?> result = executor.submit(r);
results.add(result);
}
}
if (executor != null) {
for (Future<?> result : results) {
boolean executorDone = false;
do {
try {
result.get();
executorDone = true;
} catch (InterruptedException e) {
LOG.info("Interrupted when waiting threads: ", e);
Thread.currentThread().interrupt();
break;
} catch (ExecutionException e) {
throw new IOException(e);
}
} while (!executorDone);
}
executor.shutdown();
}
HiveInterruptUtils.checkInterrupted();
for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
ContentSummary cs = entry.getValue();
summary[0] += cs.getLength();
summary[1] += cs.getFileCount();
summary[2] += cs.getDirectoryCount();
ctx.addCS(entry.getKey(), cs);
LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength() + " file count: " + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
return new ContentSummary(summary[0], summary[1], summary[2]);
} finally {
HiveInterruptUtils.remove(interrup);
}
}
}
use of java.util.concurrent.ConcurrentHashMap in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of java.util.concurrent.ConcurrentHashMap in project tomcat by apache.
the class DeltaSession method doReadObject.
private void doReadObject(ObjectInput stream) throws ClassNotFoundException, IOException {
// Deserialize the scalar instance variables (except Manager)
// Transient only
authType = null;
creationTime = ((Long) stream.readObject()).longValue();
lastAccessedTime = ((Long) stream.readObject()).longValue();
maxInactiveInterval = ((Integer) stream.readObject()).intValue();
isNew = ((Boolean) stream.readObject()).booleanValue();
isValid = ((Boolean) stream.readObject()).booleanValue();
thisAccessedTime = ((Long) stream.readObject()).longValue();
version = ((Long) stream.readObject()).longValue();
boolean hasPrincipal = stream.readBoolean();
principal = null;
if (hasPrincipal) {
principal = (Principal) stream.readObject();
}
// setId((String) stream.readObject());
id = (String) stream.readObject();
if (log.isDebugEnabled())
log.debug(sm.getString("deltaSession.readSession", id));
// Deserialize the attribute count and attribute values
if (attributes == null)
attributes = new ConcurrentHashMap<>();
int n = ((Integer) stream.readObject()).intValue();
boolean isValidSave = isValid;
isValid = true;
for (int i = 0; i < n; i++) {
String name = (String) stream.readObject();
final Object value;
try {
value = stream.readObject();
} catch (WriteAbortedException wae) {
if (wae.getCause() instanceof NotSerializableException) {
// Skip non serializable attributes
continue;
}
throw wae;
}
// the web application was stopped.
if (exclude(name, value)) {
continue;
}
attributes.put(name, value);
}
isValid = isValidSave;
// Session listeners
n = ((Integer) stream.readObject()).intValue();
if (listeners == null || n > 0) {
listeners = new ArrayList<>();
}
for (int i = 0; i < n; i++) {
SessionListener listener = (SessionListener) stream.readObject();
listeners.add(listener);
}
if (notes == null) {
notes = new Hashtable<>();
}
activate();
}
use of java.util.concurrent.ConcurrentHashMap in project storm by apache.
the class PacemakerTest method testServerGetPulse.
@Test
public void testServerGetPulse() {
Pacemaker handler = new Pacemaker(new ConcurrentHashMap());
makeNode(handler, "/some-root/GET_PULSE");
messageWithRandId(HBServerMessageType.GET_PULSE, HBMessageData.path("/some-root/GET_PULSE"));
HBMessage badResponse = handler.handleMessage(hbMessage, false);
HBMessage goodResponse = handler.handleMessage(hbMessage, true);
HBPulse goodPulse = goodResponse.get_data().get_pulse();
Assert.assertEquals(mid, badResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.NOT_AUTHORIZED, badResponse.get_type());
Assert.assertNull(badResponse.get_data());
Assert.assertEquals(mid, goodResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.GET_PULSE_RESPONSE, goodResponse.get_type());
Assert.assertEquals("/some-root/GET_PULSE", goodPulse.get_id());
Assert.assertEquals("nothing", Utils.javaDeserialize(goodPulse.get_details(), String.class));
}
use of java.util.concurrent.ConcurrentHashMap in project storm by apache.
the class PacemakerTest method testServerGetAllNodesForPath.
@Test
public void testServerGetAllNodesForPath() {
Pacemaker handler = new Pacemaker(new ConcurrentHashMap());
makeNode(handler, "/some-root-path/foo");
makeNode(handler, "/some-root-path/bar");
makeNode(handler, "/some-root-path/baz");
makeNode(handler, "/some-root-path/boo");
messageWithRandId(HBServerMessageType.GET_ALL_NODES_FOR_PATH, HBMessageData.path("/some-root-path"));
HBMessage badResponse = handler.handleMessage(hbMessage, false);
HBMessage goodResponse = handler.handleMessage(hbMessage, true);
List<String> pulseIds = goodResponse.get_data().get_nodes().get_pulseIds();
Assert.assertEquals(mid, badResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.NOT_AUTHORIZED, badResponse.get_type());
Assert.assertEquals(mid, goodResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.GET_ALL_NODES_FOR_PATH_RESPONSE, goodResponse.get_type());
Assert.assertTrue(pulseIds.contains("foo"));
Assert.assertTrue(pulseIds.contains("bar"));
Assert.assertTrue(pulseIds.contains("baz"));
Assert.assertTrue(pulseIds.contains("boo"));
makeNode(handler, "/some/deeper/path/foo");
makeNode(handler, "/some/deeper/path/bar");
makeNode(handler, "/some/deeper/path/baz");
messageWithRandId(HBServerMessageType.GET_ALL_NODES_FOR_PATH, HBMessageData.path("/some/deeper/path"));
badResponse = handler.handleMessage(hbMessage, false);
goodResponse = handler.handleMessage(hbMessage, true);
pulseIds = goodResponse.get_data().get_nodes().get_pulseIds();
Assert.assertEquals(mid, badResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.NOT_AUTHORIZED, badResponse.get_type());
Assert.assertEquals(mid, goodResponse.get_message_id());
Assert.assertEquals(HBServerMessageType.GET_ALL_NODES_FOR_PATH_RESPONSE, goodResponse.get_type());
Assert.assertTrue(pulseIds.contains("foo"));
Assert.assertTrue(pulseIds.contains("bar"));
Assert.assertTrue(pulseIds.contains("baz"));
}
Aggregations