use of com.google.common.collect.Multiset in project pyramid by cheng-li.
the class NgramEnumerator method gatherNgram.
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF) {
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
String field = template.getField();
Arrays.stream(ids).parallel().forEach(id -> {
Map<Integer, String> termVector = index.getTermVectorFromIndex(field, id);
add(termVector, multiset, template);
});
Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
for (Multiset.Entry entry : multiset.entrySet()) {
Ngram ngram = (Ngram) entry.getElement();
int count = entry.getCount();
if (count >= minDF) {
filtered.add(ngram, count);
}
}
return filtered;
}
use of com.google.common.collect.Multiset in project pyramid by cheng-li.
the class App1 method gather.
static Set<Ngram> gather(Config config, ESIndex index, String[] ids, Logger logger) throws Exception {
File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
metaDataFolder.mkdirs();
Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
List<Integer> ns = config.getIntegers("train.feature.ngram.n");
double minDf = config.getDouble("train.feature.ngram.minDf");
int minDFrequency = (int) Math.floor(ids.length * minDf);
List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
for (String field : fields) {
for (int n : ns) {
for (int slop : slops) {
logger.info("gathering " + n + "-grams from field " + field + " with slop " + slop + " and minDf " + minDf + ", (actual frequency threshold = " + minDFrequency + ")");
NgramTemplate template = new NgramTemplate(field, n, slop);
Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
logger.info("gathered " + ngrams.elementSet().size() + " ngrams");
int newCounter = 0;
for (Multiset.Entry<Ngram> entry : ngrams.entrySet()) {
Ngram ngram = entry.getElement();
ngram.setInOrder(inorder);
int count = entry.getCount();
if (interesting(allNgrams, ngram, count)) {
if (allowDuplicates) {
allNgrams.add(ngram, count);
newCounter += 1;
} else {
if (!ngram.hasDuplicate()) {
allNgrams.add(ngram, count);
newCounter += 1;
}
}
}
}
logger.info(newCounter + " are really new");
}
}
}
logger.info("there are " + allNgrams.elementSet().size() + " ngrams in total");
// Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
return allNgrams.elementSet();
}
use of com.google.common.collect.Multiset in project pyramid by cheng-li.
the class GeneralF1Predictor method predict.
/**
* @param numClasses
* @param samples sampled multi-labels; can have duplicates; their empirical probabilities will be estimated
* @return
*/
public MultiLabel predict(int numClasses, List<MultiLabel> samples) {
Multiset<MultiLabel> multiset = ConcurrentHashMultiset.create();
for (MultiLabel multiLabel : samples) {
multiset.add(multiLabel);
}
int sampleSize = samples.size();
List<MultiLabel> uniqueOnes = new ArrayList<>();
List<Double> probs = new ArrayList<>();
for (Multiset.Entry<MultiLabel> entry : multiset.entrySet()) {
uniqueOnes.add(entry.getElement());
probs.add((double) entry.getCount() / sampleSize);
}
return predict(numClasses, uniqueOnes, probs);
}
use of com.google.common.collect.Multiset in project bazel by bazelbuild.
the class AppleWatch2Extension method validateAttributesAndConfiguration.
private void validateAttributesAndConfiguration(RuleContext ruleContext) throws RuleErrorException {
boolean hasError = false;
Multiset<Artifact> appResources = HashMultiset.create();
appResources.addAll(ruleContext.getPrerequisiteArtifacts("app_resources", Mode.TARGET).list());
appResources.addAll(ruleContext.getPrerequisiteArtifacts("app_strings", Mode.TARGET).list());
for (Multiset.Entry<Artifact> entry : appResources.entrySet()) {
if (entry.getCount() > 1) {
ruleContext.ruleError("The same file was included multiple times in this rule: " + entry.getElement().getRootRelativePathString());
hasError = true;
}
}
Multiset<Artifact> extResources = HashMultiset.create();
extResources.addAll(ruleContext.getPrerequisiteArtifacts("ext_resources", Mode.TARGET).list());
extResources.addAll(ruleContext.getPrerequisiteArtifacts("ext_strings", Mode.TARGET).list());
for (Multiset.Entry<Artifact> entry : extResources.entrySet()) {
if (entry.getCount() > 1) {
ruleContext.ruleError("The same file was included multiple times in this rule: " + entry.getElement().getRootRelativePathString());
hasError = true;
}
}
AppleConfiguration appleConfiguration = ruleContext.getFragment(AppleConfiguration.class);
Platform watchPlatform = appleConfiguration.getMultiArchPlatform(PlatformType.WATCHOS);
Platform iosPlatform = appleConfiguration.getMultiArchPlatform(PlatformType.IOS);
if (watchPlatform.isDevice() != iosPlatform.isDevice()) {
hasError = true;
if (watchPlatform.isDevice()) {
ruleContext.ruleError(String.format("Building a watch extension for watch device architectures [%s] " + "requires a device ios architecture. Found [%s] instead.", Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.WATCHOS)), Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.IOS))));
} else {
ruleContext.ruleError(String.format("Building a watch extension for ios device architectures [%s] " + "requires a device watch architecture. Found [%s] instead.", Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.IOS)), Joiner.on(",").join(appleConfiguration.getMultiArchitectures(PlatformType.WATCHOS))));
}
ruleContext.ruleError("For building watch extension, there may only be a watch device " + "architecture if and only if there is an ios device architecture");
}
if (hasError) {
throw new RuleErrorException();
}
}
use of com.google.common.collect.Multiset in project weave by continuuity.
the class ApplicationMasterService method handleCompleted.
/**
* Handling containers that are completed.
*/
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
Multiset<String> restartRunnables = HashMultiset.create();
for (YarnContainerStatus status : completedContainersStatuses) {
LOG.info("Container {} completed with {}:{}.", status.getContainerId(), status.getState(), status.getDiagnostics());
runningContainers.handleCompleted(status, restartRunnables);
}
for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
for (int i = 0; i < entry.getCount(); i++) {
runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement()));
}
}
// For all runnables that needs to re-request for containers, update the expected count timestamp
// so that the EventHandler would triggered with the right expiration timestamp.
expectedContainers.updateRequestTime(restartRunnables.elementSet());
}
Aggregations