use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.
the class SampleNode method addSample.
void addSample(final StackTraceElement[] stackTrace, final int from) {
sampleCount++;
if (from >= 0) {
Method method = Method.getMethod(stackTrace[from]);
SampleNode subNode = null;
if (subNodes == null) {
subNodes = new THashMap(4);
} else {
subNode = subNodes.get(method);
}
if (subNode == null) {
subNodes.put(method, new SampleNode(stackTrace, from - 1));
} else {
subNode.addSample(stackTrace, from - 1);
}
}
}
use of gnu.trove.map.hash.THashMap in project cogcomp-nlp by CogComp.
the class BrownClusters method get.
/**
* Initialze the brown cluster data. Clusters are stored in a static data structure to avoid reloading the same (read-only)
* clusters over and over.
* @param pathsToClusterFiles the files containing the data.
* @param thresholds
* @param isLowercaseBrownClusters
*/
public static BrownClusters get(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
boolean useLocalBrownCluster = true;
String key = null;
synchronized (INIT_SYNC) {
// first check for a cluster already loaded for this data.
key = getKey(pathsToClusterFiles);
if (!clusters.containsKey(key)) {
// check to see if all the paths exist on the local file system.
for (String path : pathsToClusterFiles) {
if (!new File(path).exists()) {
useLocalBrownCluster = false;
break;
}
}
// create the cluster data structure.
BrownClusters brownclusters = new BrownClusters();
brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
brownclusters.wordToPathByResource = new ArrayList<>();
brownclusters.resources = new ArrayList<>();
if (!useLocalBrownCluster) {
// load everything from Minio
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File bcDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = bcDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i);
InputStream is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from Minio system.");
clusters.put(key, brownclusters);
} catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
throw new RuntimeException("Brown Clusters could not be loaded.", e);
}
} else {
// load the clusters from the local file system.
try {
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = pathsToClusterFiles.elementAt(i);
InputStream is;
is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from the local file system.");
clusters.put(key, brownclusters);
} catch (FileNotFoundException e) {
throw new RuntimeException("Brown Clusters files existed on local disk, but could not be loaded.", e);
}
}
}
}
return clusters.get(key);
}
use of gnu.trove.map.hash.THashMap in project cogcomp-nlp by CogComp.
the class BrownClusters method init.
/**
* Initialze the brown cluster data. This is a singleton, so this process is sychronized and
* atomic with resprect to the <code>get()</code> method above.
* @param pathsToClusterFiles the files containing the data.
* @param thresholds
* @param isLowercaseBrownClusters
*/
public static void init(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File gazDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
synchronized (INIT_SYNC) {
brownclusters = new BrownClusters();
brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
brownclusters.wordToPathByResource = new ArrayList<>();
brownclusters.resources = new ArrayList<>();
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// We used to access the files as resources. Now we are accessing them programmatically.
// InFile in = new InFile(ResourceUtilities.loadResource(pathsToClusterFiles.elementAt(i)));
InputStream is = new FileInputStream(gazDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i));
InFile in = new InFile(is);
String line = in.readLine();
int wordsAdded = 0;
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
wordsAdded++;
}
line = in.readLine();
}
if (ParametersForLbjCode.currentParameters.debug) {
logger.info(wordsAdded + " words added");
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
}
} catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
e.printStackTrace();
}
}
use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.
the class Converter method loadLabeledDumps.
@SuppressFBWarnings("NP_LOAD_OF_KNOWN_NULL_VALUE")
public static Map<String, SampleNode> loadLabeledDumps(final File file) throws IOException {
try (MemorizingBufferedInputStream bis = new MemorizingBufferedInputStream(Files.newInputStream(file.toPath()))) {
final SpecificDatumReader<ASample> reader = new SpecificDatumReader<>(ASample.SCHEMA$);
final BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(bis, null);
long nrItems = decoder.readMapStart();
ASample asmp = new ASample();
Map<String, SampleNode> result = new HashMap<>((int) nrItems);
while (nrItems > 0) {
for (int i = 0; i < nrItems; i++) {
String key = decoder.readString();
TIntObjectMap<SampleNode> index = new TIntObjectHashMap<>();
long nrArrayItems = decoder.readArrayStart();
while (nrArrayItems > 0) {
for (int j = 0; j < nrArrayItems; j++) {
asmp = reader.read(asmp, decoder);
SampleNode sn = new SampleNode(asmp.count, new THashMap<Method, SampleNode>(4));
SampleNode parent = index.get(asmp.parentId);
if (parent != null) {
AMethod method = asmp.getMethod();
Method m = Method.getMethod(method.declaringClass, method.getName());
final Map<Method, SampleNode> subNodes = parent.getSubNodes();
if (subNodes == null) {
throw new IllegalStateException("Bug, state " + index + "; at node " + asmp);
}
subNodes.put(m, sn);
}
index.put(asmp.id, sn);
}
nrArrayItems = decoder.arrayNext();
}
result.put(key, index.get(0));
nrItems = decoder.mapNext();
}
}
return result;
}
}
use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.
the class SampleNode method filteredBy.
@Nullable
public SampleNode filteredBy(final Predicate<Method> predicate) {
int newCount = this.sampleCount;
THashMap<Method, SampleNode> sns = null;
if (this.subNodes != null) {
for (Map.Entry<Method, SampleNode> entry : this.subNodes.entrySet()) {
Method method = entry.getKey();
SampleNode sn = entry.getValue();
if (predicate.test(method)) {
newCount -= sn.getSampleCount();
} else {
if (sns == null) {
sns = new THashMap<>(4);
}
SampleNode sn2 = sn.filteredBy(predicate);
if (sn2 == null) {
newCount -= sn.getSampleCount();
} else {
newCount -= sn.getSampleCount() - sn2.getSampleCount();
sns.put(method, sn2);
}
}
}
}
if (newCount == 0) {
return null;
} else if (newCount < 0) {
throw new IllegalStateException("child sample counts must be <= parent sample count, detail: " + this);
} else {
return new SampleNode(newCount, sns);
}
}
Aggregations