Search in sources :

Example 11 with TreeSet

use of java.util.TreeSet in project hbase by apache.

the class SnapshotTestingUtils method confirmSnapshotValid.

/**
   * Confirm that the snapshot contains references to all the files that should
   * be in the snapshot. This method also perform some redundant check like
   * the existence of the snapshotinfo or the regioninfo which are done always
   * by the MasterSnapshotVerifier, at the end of the snapshot operation.
   */
public static void confirmSnapshotValid(HBaseProtos.SnapshotDescription snapshotDescriptor, TableName tableName, List<byte[]> nonEmptyTestFamilies, List<byte[]> emptyTestFamilies, Path rootDir, Admin admin, FileSystem fs) throws IOException {
    final Configuration conf = admin.getConfiguration();
    // check snapshot dir
    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotDescriptor, rootDir);
    assertTrue(fs.exists(snapshotDir));
    HBaseProtos.SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
    // Extract regions and families with store files
    final Set<byte[]> snapshotFamilies = new TreeSet<>(Bytes.BYTES_COMPARATOR);
    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, desc);
    Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap();
    for (SnapshotRegionManifest regionManifest : regionManifests.values()) {
        SnapshotReferenceUtil.visitRegionStoreFiles(regionManifest, new SnapshotReferenceUtil.StoreFileVisitor() {

            @Override
            public void storeFile(final HRegionInfo regionInfo, final String family, final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
                snapshotFamilies.add(Bytes.toBytes(family));
            }
        });
    }
    // Verify that there are store files in the specified families
    if (nonEmptyTestFamilies != null) {
        for (final byte[] familyName : nonEmptyTestFamilies) {
            assertTrue(snapshotFamilies.contains(familyName));
        }
    }
    // Verify that there are no store files in the specified families
    if (emptyTestFamilies != null) {
        for (final byte[] familyName : emptyTestFamilies) {
            assertFalse(snapshotFamilies.contains(familyName));
        }
    }
    // check the region snapshot for all the regions
    List<HRegionInfo> regions = admin.getTableRegions(tableName);
    // remove the non-default regions
    RegionReplicaUtil.removeNonDefaultRegions(regions);
    boolean hasMob = regionManifests.containsKey(MobUtils.getMobRegionInfo(tableName).getEncodedName());
    if (hasMob) {
        assertEquals(regions.size(), regionManifests.size() - 1);
    } else {
        assertEquals(regions.size(), regionManifests.size());
    }
    // Verify Regions (redundant check, see MasterSnapshotVerifier)
    for (HRegionInfo info : regions) {
        String regionName = info.getEncodedName();
        assertTrue(regionManifests.containsKey(regionName));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) IOException(java.io.IOException) HBaseProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TreeSet(java.util.TreeSet)

Example 12 with TreeSet

use of java.util.TreeSet in project lucida by claritylab.

the class TextProcessor method parse.

/** 
	 *  Returns the date range string by parsing text. 
	 *
	 *  @param text String representing the original query
	 */
public String[] parse(String text) {
    // The pipelines may produce same results, so store results in a set.
    TreeSet<Temporal> has_seen = new TreeSet<Temporal>(new TemporalComparator());
    // Time is comparable, so add temporal of type time and date into a TreeSet to get
    // the minimum and maximum time which define the range for event retrieval.
    TreeSet<Time> times = new TreeSet<Time>();
    for (AnnotationPipeline pipeline : pieplines) {
        Annotation annotation = new Annotation(text);
        annotation.set(CoreAnnotations.DocDateAnnotation.class, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
        pipeline.annotate(annotation);
        List<CoreMap> timexAnnsAll = annotation.get(TimeAnnotations.TimexAnnotations.class);
        for (CoreMap cm : timexAnnsAll) {
            Temporal temporal = cm.get(TimeExpression.Annotation.class).getTemporal();
            temporal.getTime();
            if (has_seen.contains(temporal)) {
                continue;
            }
            has_seen.add(temporal);
            if (temporal.getTimexType().name().equals("TIME") || temporal.getTimexType().name().equals("DATE")) {
                if (temporal.getTime() != null) {
                    try {
                        times.add(temporal.getTime());
                    } catch (NullPointerException e) {
                    }
                }
            }
        }
    }
    // Get the minimum and maximum time only if there are at least two Time objects in times. 
    if (times.size() >= 2) {
        return new String[] { regexNormalize(Collections.min(times).toString(), 0), regexNormalize(Collections.max(times).toString(), 1) };
    }
    // Since the range couldn't be defined by times, define the range from has_seen.
    for (Temporal temporal : has_seen) {
        // Due to a bug (?) in coreNLP, getRange() for "current week" will result in year 2015.
        // Thus, try parsing as week before getRange().
        String[] try_parse_as_week = parseAsWeek(temporal.toString(), text);
        if (try_parse_as_week != null) {
            return try_parse_as_week;
        }
        if (isReadbleTime(temporal.getRange().toString())) {
            List<String> string_list = Arrays.asList(temporal.getRange().toString().split(","));
            String s1 = regexNormalize(string_list.get(0), 0);
            String s2 = regexNormalize(string_list.get(1), 1);
            if (s1.length() >= 10 && s2.length() >= 10 && s1.substring(0, 10).equals(s2.substring(0, 10))) {
                if (text.contains("from") || text.contains("start") || text.contains("begin")) {
                    s2 = null;
                } else if (text.contains("until")) {
                    s1 = null;
                }
            }
            return new String[] { s1, s2 };
        }
    }
    // No temporal expression is found by any pipeline.
    return new String[] { null, null };
}
Also used : Time(edu.stanford.nlp.time.SUTime.Time) Date(java.util.Date) Temporal(edu.stanford.nlp.time.SUTime.Temporal) TreeSet(java.util.TreeSet) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SimpleDateFormat(java.text.SimpleDateFormat) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 13 with TreeSet

use of java.util.TreeSet in project hive by apache.

the class HBaseUtils method hashStorageDescriptor.

/**
   * Produce a hash for the storage descriptor
   * @param sd storage descriptor to hash
   * @param md message descriptor to use to generate the hash
   * @return the hash as a byte array
   */
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
    // Note all maps and lists have to be absolutely sorted.  Otherwise we'll produce different
    // results for hashes based on the OS or JVM being used.
    md.reset();
    for (FieldSchema fs : sd.getCols()) {
        md.update(fs.getName().getBytes(ENCODING));
        md.update(fs.getType().getBytes(ENCODING));
        if (fs.getComment() != null)
            md.update(fs.getComment().getBytes(ENCODING));
    }
    if (sd.getInputFormat() != null) {
        md.update(sd.getInputFormat().getBytes(ENCODING));
    }
    if (sd.getOutputFormat() != null) {
        md.update(sd.getOutputFormat().getBytes(ENCODING));
    }
    md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
    if (sd.getSerdeInfo() != null) {
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            md.update(serde.getName().getBytes(ENCODING));
        }
        if (serde.getSerializationLib() != null) {
            md.update(serde.getSerializationLib().getBytes(ENCODING));
        }
        if (serde.getParameters() != null) {
            SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
            for (Map.Entry<String, String> param : params.entrySet()) {
                md.update(param.getKey().getBytes(ENCODING));
                md.update(param.getValue().getBytes(ENCODING));
            }
        }
    }
    if (sd.getBucketCols() != null) {
        SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
        for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
    }
    if (sd.getSortCols() != null) {
        SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
        for (Order order : orders) {
            md.update(order.getCol().getBytes(ENCODING));
            md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
        }
    }
    if (sd.getSkewedInfo() != null) {
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
            for (String colname : colnames) md.update(colname.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValues() != null) {
            SortedSet<String> sortedOuterList = new TreeSet<>();
            for (List<String> innerList : skewed.getSkewedColValues()) {
                SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
                sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
            }
            for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            SortedMap<String, String> sortedMap = new TreeMap<>();
            for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
                SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
                sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
            }
            for (Map.Entry<String, String> e : sortedMap.entrySet()) {
                md.update(e.getKey().getBytes(ENCODING));
                md.update(e.getValue().getBytes(ENCODING));
            }
        }
    }
    return md.digest();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ByteString(com.google.protobuf.ByteString) TreeMap(java.util.TreeMap) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 14 with TreeSet

use of java.util.TreeSet in project hive by apache.

the class SparkJobMonitor method getReport.

private String getReport(Map<String, SparkStageProgress> progressMap) {
    StringBuilder reportBuffer = new StringBuilder();
    SimpleDateFormat dt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    String currentDate = dt.format(new Date());
    reportBuffer.append(currentDate + "\t");
    // Num of total and completed tasks
    int sumTotal = 0;
    int sumComplete = 0;
    SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
    for (String s : keys) {
        SparkStageProgress progress = progressMap.get(s);
        final int complete = progress.getSucceededTaskCount();
        final int total = progress.getTotalTaskCount();
        final int running = progress.getRunningTaskCount();
        final int failed = progress.getFailedTaskCount();
        sumTotal += total;
        sumComplete += complete;
        String stageName = "Stage-" + s;
        if (total <= 0) {
            reportBuffer.append(String.format("%s: -/-\t", stageName));
        } else {
            if (complete == total && !completed.contains(s)) {
                completed.add(s);
                if (!perfLogger.startTimeHasMethod(PerfLogger.SPARK_RUN_STAGE + s)) {
                    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
                }
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
            }
            if (complete < total && (complete > 0 || running > 0 || failed > 0)) {
                /* stage is started, but not complete */
                if (!perfLogger.startTimeHasMethod(PerfLogger.SPARK_RUN_STAGE + s)) {
                    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_RUN_STAGE + s);
                }
                if (failed > 0) {
                    reportBuffer.append(String.format("%s: %d(+%d,-%d)/%d\t", stageName, complete, running, failed, total));
                } else {
                    reportBuffer.append(String.format("%s: %d(+%d)/%d\t", stageName, complete, running, total));
                }
            } else {
                /* stage is waiting for input/slots or complete */
                if (failed > 0) {
                    /* tasks finished but some failed */
                    reportBuffer.append(String.format("%s: %d(-%d)/%d Finished with failed tasks\t", stageName, complete, failed, total));
                } else {
                    if (complete == total) {
                        reportBuffer.append(String.format("%s: %d/%d Finished\t", stageName, complete, total));
                    } else {
                        reportBuffer.append(String.format("%s: %d/%d\t", stageName, complete, total));
                    }
                }
            }
        }
    }
    if (SessionState.get() != null) {
        final float progress = (sumTotal == 0) ? 1.0f : (float) sumComplete / (float) sumTotal;
        SessionState.get().updateProgressedPercentage(progress);
    }
    return reportBuffer.toString();
}
Also used : TreeSet(java.util.TreeSet) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date)

Example 15 with TreeSet

use of java.util.TreeSet in project hbase by apache.

the class TestRollingRestart method getDoubleAssignedRegions.

private NavigableSet<String> getDoubleAssignedRegions(MiniHBaseCluster cluster) throws IOException {
    NavigableSet<String> online = new TreeSet<>();
    NavigableSet<String> doubled = new TreeSet<>();
    for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
        for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer().getRSRpcServices())) {
            if (!online.add(region.getRegionNameAsString())) {
                doubled.add(region.getRegionNameAsString());
            }
        }
    }
    return doubled;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TreeSet(java.util.TreeSet) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)

Aggregations

TreeSet (java.util.TreeSet)3795 ArrayList (java.util.ArrayList)835 Test (org.junit.Test)544 HashMap (java.util.HashMap)502 HashSet (java.util.HashSet)430 Set (java.util.Set)424 Map (java.util.Map)405 IOException (java.io.IOException)378 File (java.io.File)341 List (java.util.List)323 TreeMap (java.util.TreeMap)229 Iterator (java.util.Iterator)189 SortedSet (java.util.SortedSet)186 LinkedList (java.util.LinkedList)110 LinkedHashSet (java.util.LinkedHashSet)106 Date (java.util.Date)94 Collection (java.util.Collection)92 Comparator (java.util.Comparator)85 Test (org.testng.annotations.Test)81 Text (org.apache.hadoop.io.Text)79