use of java.util.TreeSet in project hbase by apache.
the class SnapshotTestingUtils method confirmSnapshotValid.
/**
* Confirm that the snapshot contains references to all the files that should
* be in the snapshot. This method also perform some redundant check like
* the existence of the snapshotinfo or the regioninfo which are done always
* by the MasterSnapshotVerifier, at the end of the snapshot operation.
*/
public static void confirmSnapshotValid(HBaseProtos.SnapshotDescription snapshotDescriptor, TableName tableName, List<byte[]> nonEmptyTestFamilies, List<byte[]> emptyTestFamilies, Path rootDir, Admin admin, FileSystem fs) throws IOException {
final Configuration conf = admin.getConfiguration();
// check snapshot dir
Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotDescriptor, rootDir);
assertTrue(fs.exists(snapshotDir));
HBaseProtos.SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
// Extract regions and families with store files
final Set<byte[]> snapshotFamilies = new TreeSet<>(Bytes.BYTES_COMPARATOR);
SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, desc);
Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap();
for (SnapshotRegionManifest regionManifest : regionManifests.values()) {
SnapshotReferenceUtil.visitRegionStoreFiles(regionManifest, new SnapshotReferenceUtil.StoreFileVisitor() {
@Override
public void storeFile(final HRegionInfo regionInfo, final String family, final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
snapshotFamilies.add(Bytes.toBytes(family));
}
});
}
// Verify that there are store files in the specified families
if (nonEmptyTestFamilies != null) {
for (final byte[] familyName : nonEmptyTestFamilies) {
assertTrue(snapshotFamilies.contains(familyName));
}
}
// Verify that there are no store files in the specified families
if (emptyTestFamilies != null) {
for (final byte[] familyName : emptyTestFamilies) {
assertFalse(snapshotFamilies.contains(familyName));
}
}
// check the region snapshot for all the regions
List<HRegionInfo> regions = admin.getTableRegions(tableName);
// remove the non-default regions
RegionReplicaUtil.removeNonDefaultRegions(regions);
boolean hasMob = regionManifests.containsKey(MobUtils.getMobRegionInfo(tableName).getEncodedName());
if (hasMob) {
assertEquals(regions.size(), regionManifests.size() - 1);
} else {
assertEquals(regions.size(), regionManifests.size());
}
// Verify Regions (redundant check, see MasterSnapshotVerifier)
for (HRegionInfo info : regions) {
String regionName = info.getEncodedName();
assertTrue(regionManifests.containsKey(regionName));
}
}
use of java.util.TreeSet in project lucida by claritylab.
the class TextProcessor method parse.
/**
* Returns the date range string by parsing text.
*
* @param text String representing the original query
*/
public String[] parse(String text) {
// The pipelines may produce same results, so store results in a set.
TreeSet<Temporal> has_seen = new TreeSet<Temporal>(new TemporalComparator());
// Time is comparable, so add temporal of type time and date into a TreeSet to get
// the minimum and maximum time which define the range for event retrieval.
TreeSet<Time> times = new TreeSet<Time>();
for (AnnotationPipeline pipeline : pieplines) {
Annotation annotation = new Annotation(text);
annotation.set(CoreAnnotations.DocDateAnnotation.class, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
pipeline.annotate(annotation);
List<CoreMap> timexAnnsAll = annotation.get(TimeAnnotations.TimexAnnotations.class);
for (CoreMap cm : timexAnnsAll) {
Temporal temporal = cm.get(TimeExpression.Annotation.class).getTemporal();
temporal.getTime();
if (has_seen.contains(temporal)) {
continue;
}
has_seen.add(temporal);
if (temporal.getTimexType().name().equals("TIME") || temporal.getTimexType().name().equals("DATE")) {
if (temporal.getTime() != null) {
try {
times.add(temporal.getTime());
} catch (NullPointerException e) {
}
}
}
}
}
// Get the minimum and maximum time only if there are at least two Time objects in times.
if (times.size() >= 2) {
return new String[] { regexNormalize(Collections.min(times).toString(), 0), regexNormalize(Collections.max(times).toString(), 1) };
}
// Since the range couldn't be defined by times, define the range from has_seen.
for (Temporal temporal : has_seen) {
// Due to a bug (?) in coreNLP, getRange() for "current week" will result in year 2015.
// Thus, try parsing as week before getRange().
String[] try_parse_as_week = parseAsWeek(temporal.toString(), text);
if (try_parse_as_week != null) {
return try_parse_as_week;
}
if (isReadbleTime(temporal.getRange().toString())) {
List<String> string_list = Arrays.asList(temporal.getRange().toString().split(","));
String s1 = regexNormalize(string_list.get(0), 0);
String s2 = regexNormalize(string_list.get(1), 1);
if (s1.length() >= 10 && s2.length() >= 10 && s1.substring(0, 10).equals(s2.substring(0, 10))) {
if (text.contains("from") || text.contains("start") || text.contains("begin")) {
s2 = null;
} else if (text.contains("until")) {
s1 = null;
}
}
return new String[] { s1, s2 };
}
}
// No temporal expression is found by any pipeline.
return new String[] { null, null };
}
use of java.util.TreeSet in project hive by apache.
the class HBaseUtils method hashStorageDescriptor.
/**
* Produce a hash for the storage descriptor
* @param sd storage descriptor to hash
* @param md message descriptor to use to generate the hash
* @return the hash as a byte array
*/
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
// Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different
// results for hashes based on the OS or JVM being used.
md.reset();
for (FieldSchema fs : sd.getCols()) {
md.update(fs.getName().getBytes(ENCODING));
md.update(fs.getType().getBytes(ENCODING));
if (fs.getComment() != null)
md.update(fs.getComment().getBytes(ENCODING));
}
if (sd.getInputFormat() != null) {
md.update(sd.getInputFormat().getBytes(ENCODING));
}
if (sd.getOutputFormat() != null) {
md.update(sd.getOutputFormat().getBytes(ENCODING));
}
md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
if (sd.getSerdeInfo() != null) {
SerDeInfo serde = sd.getSerdeInfo();
if (serde.getName() != null) {
md.update(serde.getName().getBytes(ENCODING));
}
if (serde.getSerializationLib() != null) {
md.update(serde.getSerializationLib().getBytes(ENCODING));
}
if (serde.getParameters() != null) {
SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
for (Map.Entry<String, String> param : params.entrySet()) {
md.update(param.getKey().getBytes(ENCODING));
md.update(param.getValue().getBytes(ENCODING));
}
}
}
if (sd.getBucketCols() != null) {
SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
}
if (sd.getSortCols() != null) {
SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
for (Order order : orders) {
md.update(order.getCol().getBytes(ENCODING));
md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
}
}
if (sd.getSkewedInfo() != null) {
SkewedInfo skewed = sd.getSkewedInfo();
if (skewed.getSkewedColNames() != null) {
SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
for (String colname : colnames) md.update(colname.getBytes(ENCODING));
}
if (skewed.getSkewedColValues() != null) {
SortedSet<String> sortedOuterList = new TreeSet<>();
for (List<String> innerList : skewed.getSkewedColValues()) {
SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
}
for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
}
if (skewed.getSkewedColValueLocationMaps() != null) {
SortedMap<String, String> sortedMap = new TreeMap<>();
for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
}
for (Map.Entry<String, String> e : sortedMap.entrySet()) {
md.update(e.getKey().getBytes(ENCODING));
md.update(e.getValue().getBytes(ENCODING));
}
}
}
return md.digest();
}
use of java.util.TreeSet in project hive by apache.
the class SparkJobMonitor method getReport.
private String getReport(Map<String, SparkStageProgress> progressMap) {
StringBuilder reportBuffer = new StringBuilder();
SimpleDateFormat dt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
String currentDate = dt.format(new Date());
reportBuffer.append(currentDate + "\t");
// Num of total and completed tasks
int sumTotal = 0;
int sumComplete = 0;
SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
for (String s : keys) {
SparkStageProgress progress = progressMap.get(s);
final int complete = progress.getSucceededTaskCount();
final int total = progress.getTotalTaskCount();
final int running = progress.getRunningTaskCount();
final int failed = progress.getFailedTaskCount();
sumTotal += total;
sumComplete += complete;
String stageName = "Stage-" + s;
if (total <= 0) {
reportBuffer.append(String.format("%s: -/-\t", stageName));
} else {
if (complete == total && !completed.contains(s)) {
completed.add(s);
if (!perfLogger.startTimeHasMethod(PerfLogger.SPARK_RUN_STAGE + s)) {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
}
if (complete < total && (complete > 0 || running > 0 || failed > 0)) {
/* stage is started, but not complete */
if (!perfLogger.startTimeHasMethod(PerfLogger.SPARK_RUN_STAGE + s)) {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
}
if (failed > 0) {
reportBuffer.append(String.format("%s: %d(+%d,-%d)/%d\t", stageName, complete, running, failed, total));
} else {
reportBuffer.append(String.format("%s: %d(+%d)/%d\t", stageName, complete, running, total));
}
} else {
/* stage is waiting for input/slots or complete */
if (failed > 0) {
/* tasks finished but some failed */
reportBuffer.append(String.format("%s: %d(-%d)/%d Finished with failed tasks\t", stageName, complete, failed, total));
} else {
if (complete == total) {
reportBuffer.append(String.format("%s: %d/%d Finished\t", stageName, complete, total));
} else {
reportBuffer.append(String.format("%s: %d/%d\t", stageName, complete, total));
}
}
}
}
}
if (SessionState.get() != null) {
final float progress = (sumTotal == 0) ? 1.0f : (float) sumComplete / (float) sumTotal;
SessionState.get().updateProgressedPercentage(progress);
}
return reportBuffer.toString();
}
use of java.util.TreeSet in project hbase by apache.
the class TestRollingRestart method getDoubleAssignedRegions.
private NavigableSet<String> getDoubleAssignedRegions(MiniHBaseCluster cluster) throws IOException {
NavigableSet<String> online = new TreeSet<>();
NavigableSet<String> doubled = new TreeSet<>();
for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer().getRSRpcServices())) {
if (!online.add(region.getRegionNameAsString())) {
doubled.add(region.getRegionNameAsString());
}
}
}
return doubled;
}
Aggregations