Search in sources :

Example 16 with Multiset

use of com.google.common.collect.Multiset in project pyramid by cheng-li.

the class NgramEnumerator method gatherNgram.

public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF) {
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer, String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector, multiset, template);
    });
    Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
    for (Multiset.Entry entry : multiset.entrySet()) {
        Ngram ngram = (Ngram) entry.getElement();
        int count = entry.getCount();
        if (count >= minDF) {
            filtered.add(ngram, count);
        }
    }
    return filtered;
}
Also used : ConcurrentHashMultiset(com.google.common.collect.ConcurrentHashMultiset) Multiset(com.google.common.collect.Multiset) Ngram(edu.neu.ccs.pyramid.feature.Ngram)

Example 17 with Multiset

use of com.google.common.collect.Multiset in project pyramid by cheng-li.

the class App1 method gather.

static Set<Ngram> gather(Config config, ESIndex index, String[] ids, Logger logger) throws Exception {
    File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
    metaDataFolder.mkdirs();
    Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
    List<Integer> ns = config.getIntegers("train.feature.ngram.n");
    double minDf = config.getDouble("train.feature.ngram.minDf");
    int minDFrequency = (int) Math.floor(ids.length * minDf);
    List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
    List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
    boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
    boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
    for (String field : fields) {
        for (int n : ns) {
            for (int slop : slops) {
                logger.info("gathering " + n + "-grams from field " + field + " with slop " + slop + " and minDf " + minDf + ", (actual frequency threshold = " + minDFrequency + ")");
                NgramTemplate template = new NgramTemplate(field, n, slop);
                Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                logger.info("gathered " + ngrams.elementSet().size() + " ngrams");
                int newCounter = 0;
                for (Multiset.Entry<Ngram> entry : ngrams.entrySet()) {
                    Ngram ngram = entry.getElement();
                    ngram.setInOrder(inorder);
                    int count = entry.getCount();
                    if (interesting(allNgrams, ngram, count)) {
                        if (allowDuplicates) {
                            allNgrams.add(ngram, count);
                            newCounter += 1;
                        } else {
                            if (!ngram.hasDuplicate()) {
                                allNgrams.add(ngram, count);
                                newCounter += 1;
                            }
                        }
                    }
                }
                logger.info(newCounter + " are really new");
            }
        }
    }
    logger.info("there are " + allNgrams.elementSet().size() + " ngrams in total");
    // Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
    return allNgrams.elementSet();
}
Also used : NgramTemplate(edu.neu.ccs.pyramid.feature_extraction.NgramTemplate) Multiset(com.google.common.collect.Multiset) ConcurrentHashMultiset(com.google.common.collect.ConcurrentHashMultiset) File(java.io.File)

Example 18 with Multiset

use of com.google.common.collect.Multiset in project pyramid by cheng-li.

the class GeneralF1Predictor method predict.

/**
 * @param numClasses
 * @param samples sampled multi-labels; can have duplicates; their empirical probabilities will be estimated
 * @return
 */
public MultiLabel predict(int numClasses, List<MultiLabel> samples) {
    Multiset<MultiLabel> multiset = ConcurrentHashMultiset.create();
    for (MultiLabel multiLabel : samples) {
        multiset.add(multiLabel);
    }
    int sampleSize = samples.size();
    List<MultiLabel> uniqueOnes = new ArrayList<>();
    List<Double> probs = new ArrayList<>();
    for (Multiset.Entry<MultiLabel> entry : multiset.entrySet()) {
        uniqueOnes.add(entry.getElement());
        probs.add((double) entry.getCount() / sampleSize);
    }
    return predict(numClasses, uniqueOnes, probs);
}
Also used : MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) ArrayList(java.util.ArrayList) Multiset(com.google.common.collect.Multiset) ConcurrentHashMultiset(com.google.common.collect.ConcurrentHashMultiset)

Example 19 with Multiset

use of com.google.common.collect.Multiset in project bazel by bazelbuild.

the class AppleWatch2Extension method validateAttributesAndConfiguration.

private void validateAttributesAndConfiguration(RuleContext ruleContext) throws RuleErrorException {
    boolean hasError = false;
    Multiset<Artifact> appResources = HashMultiset.create();
    appResources.addAll(ruleContext.getPrerequisiteArtifacts("app_resources", Mode.TARGET).list());
    appResources.addAll(ruleContext.getPrerequisiteArtifacts("app_strings", Mode.TARGET).list());
    for (Multiset.Entry<Artifact> entry : appResources.entrySet()) {
        if (entry.getCount() > 1) {
            ruleContext.ruleError("The same file was included multiple times in this rule: " + entry.getElement().getRootRelativePathString());
            hasError = true;
        }
    }
    Multiset<Artifact> extResources = HashMultiset.create();
    extResources.addAll(ruleContext.getPrerequisiteArtifacts("ext_resources", Mode.TARGET).list());
    extResources.addAll(ruleContext.getPrerequisiteArtifacts("ext_strings", Mode.TARGET).list());
    for (Multiset.Entry<Artifact> entry : extResources.entrySet()) {
        if (entry.getCount() > 1) {
            ruleContext.ruleError("The same file was included multiple times in this rule: " + entry.getElement().getRootRelativePathString());
            hasError = true;
        }
    }
    AppleConfiguration appleConfiguration = ruleContext.getFragment(AppleConfiguration.class);
    Platform watchPlatform = appleConfiguration.getMultiArchPlatform(PlatformType.WATCHOS);
    Platform iosPlatform = appleConfiguration.getMultiArchPlatform(PlatformType.IOS);
    if (watchPlatform.isDevice() != iosPlatform.isDevice()) {
        hasError = true;
        if (watchPlatform.isDevice()) {
            ruleContext.ruleError(String.format("Building a watch extension for watch device architectures [%s] " + "requires a device ios architecture. Found [%s] instead.", Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.WATCHOS)), Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.IOS))));
        } else {
            ruleContext.ruleError(String.format("Building a watch extension for ios device architectures [%s] " + "requires a device watch architecture. Found [%s] instead.", Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.IOS)), Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.WATCHOS))));
        }
        ruleContext.ruleError("For building watch extension, there may only be a watch device " + "architecture if and only if there is an ios device architecture");
    }
    if (hasError) {
        throw new RuleErrorException();
    }
}
Also used : Platform(com.google.devtools.build.lib.rules.apple.Platform) Multiset(com.google.common.collect.Multiset) HashMultiset(com.google.common.collect.HashMultiset) AppleConfiguration(com.google.devtools.build.lib.rules.apple.AppleConfiguration) Artifact(com.google.devtools.build.lib.actions.Artifact)

Example 20 with Multiset

use of com.google.common.collect.Multiset in project weave by continuuity.

the class ApplicationMasterService method handleCompleted.

/**
   * Handling containers that are completed.
   */
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
    Multiset<String> restartRunnables = HashMultiset.create();
    for (YarnContainerStatus status : completedContainersStatuses) {
        LOG.info("Container {} completed with {}:{}.", status.getContainerId(), status.getState(), status.getDiagnostics());
        runningContainers.handleCompleted(status, restartRunnables);
    }
    for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
        LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
        for (int i = 0; i < entry.getCount(); i++) {
            runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement()));
        }
    }
    // For all runnables that needs to re-request for containers, update the expected count timestamp
    // so that the EventHandler would triggered with the right expiration timestamp.
    expectedContainers.updateRequestTime(restartRunnables.elementSet());
}
Also used : YarnContainerStatus(com.continuuity.weave.internal.yarn.YarnContainerStatus) HashMultiset(com.google.common.collect.HashMultiset) Multiset(com.google.common.collect.Multiset)

Aggregations

Multiset (com.google.common.collect.Multiset)34 HashMultiset (com.google.common.collect.HashMultiset)20 List (java.util.List)11 ArrayList (java.util.ArrayList)10 Set (java.util.Set)10 File (java.io.File)9 Map (java.util.Map)9 IOException (java.io.IOException)7 HashMap (java.util.HashMap)7 Collectors (java.util.stream.Collectors)7 Test (org.junit.Test)6 Collection (java.util.Collection)5 Entry (java.util.Map.Entry)5 UUID (java.util.UUID)5 ImmutableSet (com.google.common.collect.ImmutableSet)4 Lists (com.google.common.collect.Lists)4 Multimap (com.google.common.collect.Multimap)4 Predicate (java.util.function.Predicate)4 ConcurrentHashMultiset (com.google.common.collect.ConcurrentHashMultiset)3 ImmutableMap (com.google.common.collect.ImmutableMap)3