Search in sources :

Example 1 with THashMap

use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.

the class SampleNode method addSample.

void addSample(final StackTraceElement[] stackTrace, final int from) {
    sampleCount++;
    if (from >= 0) {
        Method method = Method.getMethod(stackTrace[from]);
        SampleNode subNode = null;
        if (subNodes == null) {
            subNodes = new THashMap(4);
        } else {
            subNode = subNodes.get(method);
        }
        if (subNode == null) {
            subNodes.put(method, new SampleNode(stackTrace, from - 1));
        } else {
            subNode.addSample(stackTrace, from - 1);
        }
    }
}
Also used : THashMap(gnu.trove.map.hash.THashMap) Method(org.spf4j.base.Method)

Example 2 with THashMap

use of gnu.trove.map.hash.THashMap in project cogcomp-nlp by CogComp.

the class BrownClusters method get.

/**
 * Initialze the brown cluster data. Clusters are stored in a static data structure to avoid reloading the same (read-only)
 * clusters over and over.
 * @param pathsToClusterFiles the files containing the data.
 * @param thresholds
 * @param isLowercaseBrownClusters
 */
public static BrownClusters get(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
    boolean useLocalBrownCluster = true;
    String key = null;
    synchronized (INIT_SYNC) {
        // first check for a cluster already loaded for this data.
        key = getKey(pathsToClusterFiles);
        if (!clusters.containsKey(key)) {
            // check to see if all the paths exist on the local file system.
            for (String path : pathsToClusterFiles) {
                if (!new File(path).exists()) {
                    useLocalBrownCluster = false;
                    break;
                }
            }
            // create the cluster data structure.
            BrownClusters brownclusters = new BrownClusters();
            brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
            brownclusters.wordToPathByResource = new ArrayList<>();
            brownclusters.resources = new ArrayList<>();
            if (!useLocalBrownCluster) {
                // load everything from Minio
                try {
                    Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
                    File bcDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
                    for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                        THashMap<String, String> h = new THashMap<>();
                        // Here we check if local resource is specified.
                        String bcFilePath = bcDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i);
                        InputStream is = new FileInputStream(bcFilePath);
                        InFile in = new InFile(is);
                        String line = in.readLine();
                        while (line != null) {
                            StringTokenizer st = new StringTokenizer(line);
                            String path = st.nextToken();
                            String word = st.nextToken();
                            int occ = Integer.parseInt(st.nextToken());
                            if (occ >= thresholds.elementAt(i)) {
                                h.put(word, path);
                            }
                            line = in.readLine();
                        }
                        brownclusters.wordToPathByResource.add(h);
                        brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                        brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                        in.close();
                    }
                    logger.info("Loaded brown cluster from " + key + " from Minio system.");
                    clusters.put(key, brownclusters);
                } catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
                    throw new RuntimeException("Brown Clusters could not be loaded.", e);
                }
            } else {
                // load the clusters from the local file system.
                try {
                    for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                        THashMap<String, String> h = new THashMap<>();
                        // Here we check if local resource is specified.
                        String bcFilePath = pathsToClusterFiles.elementAt(i);
                        InputStream is;
                        is = new FileInputStream(bcFilePath);
                        InFile in = new InFile(is);
                        String line = in.readLine();
                        while (line != null) {
                            StringTokenizer st = new StringTokenizer(line);
                            String path = st.nextToken();
                            String word = st.nextToken();
                            int occ = Integer.parseInt(st.nextToken());
                            if (occ >= thresholds.elementAt(i)) {
                                h.put(word, path);
                            }
                            line = in.readLine();
                        }
                        brownclusters.wordToPathByResource.add(h);
                        brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                        brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                        in.close();
                    }
                    logger.info("Loaded brown cluster from " + key + " from the local file system.");
                    clusters.put(key, brownclusters);
                } catch (FileNotFoundException e) {
                    throw new RuntimeException("Brown Clusters files existed on local disk, but could not be loaded.", e);
                }
            }
        }
    }
    return clusters.get(key);
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) FileInputStream(java.io.FileInputStream) StringTokenizer(java.util.StringTokenizer) Datastore(org.cogcomp.Datastore) THashMap(gnu.trove.map.hash.THashMap) File(java.io.File) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 3 with THashMap

use of gnu.trove.map.hash.THashMap in project cogcomp-nlp by CogComp.

the class BrownClusters method init.

/**
     * Initialze the brown cluster data. This is a singleton, so this process is sychronized and
     * atomic with resprect to the <code>get()</code> method above.
     * @param pathsToClusterFiles the files containing the data.
     * @param thresholds
     * @param isLowercaseBrownClusters
     */
public static void init(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
    try {
        Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
        synchronized (INIT_SYNC) {
            brownclusters = new BrownClusters();
            brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
            brownclusters.wordToPathByResource = new ArrayList<>();
            brownclusters.resources = new ArrayList<>();
            for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                THashMap<String, String> h = new THashMap<>();
                // We used to access the files as resources. Now we are accessing them programmatically.
                // InFile in = new InFile(ResourceUtilities.loadResource(pathsToClusterFiles.elementAt(i)));
                InputStream is = new FileInputStream(gazDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i));
                InFile in = new InFile(is);
                String line = in.readLine();
                int wordsAdded = 0;
                while (line != null) {
                    StringTokenizer st = new StringTokenizer(line);
                    String path = st.nextToken();
                    String word = st.nextToken();
                    int occ = Integer.parseInt(st.nextToken());
                    if (occ >= thresholds.elementAt(i)) {
                        h.put(word, path);
                        wordsAdded++;
                    }
                    line = in.readLine();
                }
                if (ParametersForLbjCode.currentParameters.debug) {
                    logger.info(wordsAdded + " words added");
                }
                brownclusters.wordToPathByResource.add(h);
                brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                in.close();
            }
        }
    } catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
        e.printStackTrace();
    }
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) FileInputStream(java.io.FileInputStream) StringTokenizer(java.util.StringTokenizer) Datastore(org.cogcomp.Datastore) THashMap(gnu.trove.map.hash.THashMap) File(java.io.File) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 4 with THashMap

use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.

the class Converter method loadLabeledDumps.

@SuppressFBWarnings("NP_LOAD_OF_KNOWN_NULL_VALUE")
public static Map<String, SampleNode> loadLabeledDumps(final File file) throws IOException {
    try (MemorizingBufferedInputStream bis = new MemorizingBufferedInputStream(Files.newInputStream(file.toPath()))) {
        final SpecificDatumReader<ASample> reader = new SpecificDatumReader<>(ASample.SCHEMA$);
        final BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(bis, null);
        long nrItems = decoder.readMapStart();
        ASample asmp = new ASample();
        Map<String, SampleNode> result = new HashMap<>((int) nrItems);
        while (nrItems > 0) {
            for (int i = 0; i < nrItems; i++) {
                String key = decoder.readString();
                TIntObjectMap<SampleNode> index = new TIntObjectHashMap<>();
                long nrArrayItems = decoder.readArrayStart();
                while (nrArrayItems > 0) {
                    for (int j = 0; j < nrArrayItems; j++) {
                        asmp = reader.read(asmp, decoder);
                        SampleNode sn = new SampleNode(asmp.count, new THashMap<Method, SampleNode>(4));
                        SampleNode parent = index.get(asmp.parentId);
                        if (parent != null) {
                            AMethod method = asmp.getMethod();
                            Method m = Method.getMethod(method.declaringClass, method.getName());
                            final Map<Method, SampleNode> subNodes = parent.getSubNodes();
                            if (subNodes == null) {
                                throw new IllegalStateException("Bug, state " + index + "; at node " + asmp);
                            }
                            subNodes.put(m, sn);
                        }
                        index.put(asmp.id, sn);
                    }
                    nrArrayItems = decoder.arrayNext();
                }
                result.put(key, index.get(0));
                nrItems = decoder.mapNext();
            }
        }
        return result;
    }
}
Also used : MemorizingBufferedInputStream(org.spf4j.io.MemorizingBufferedInputStream) TIntObjectHashMap(gnu.trove.map.hash.TIntObjectHashMap) HashMap(java.util.HashMap) THashMap(gnu.trove.map.hash.THashMap) ASample(org.spf4j.ssdump2.avro.ASample) Method(org.spf4j.base.Method) AMethod(org.spf4j.ssdump2.avro.AMethod) BinaryDecoder(org.apache.avro.io.BinaryDecoder) TIntObjectHashMap(gnu.trove.map.hash.TIntObjectHashMap) SampleNode(org.spf4j.stackmonitor.SampleNode) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) AMethod(org.spf4j.ssdump2.avro.AMethod) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 5 with THashMap

use of gnu.trove.map.hash.THashMap in project spf4j by zolyfarkas.

the class SampleNode method filteredBy.

@Nullable
public SampleNode filteredBy(final Predicate<Method> predicate) {
    int newCount = this.sampleCount;
    THashMap<Method, SampleNode> sns = null;
    if (this.subNodes != null) {
        for (Map.Entry<Method, SampleNode> entry : this.subNodes.entrySet()) {
            Method method = entry.getKey();
            SampleNode sn = entry.getValue();
            if (predicate.test(method)) {
                newCount -= sn.getSampleCount();
            } else {
                if (sns == null) {
                    sns = new THashMap<>(4);
                }
                SampleNode sn2 = sn.filteredBy(predicate);
                if (sn2 == null) {
                    newCount -= sn.getSampleCount();
                } else {
                    newCount -= sn.getSampleCount() - sn2.getSampleCount();
                    sns.put(method, sn2);
                }
            }
        }
    }
    if (newCount == 0) {
        return null;
    } else if (newCount < 0) {
        throw new IllegalStateException("child sample counts must be <= parent sample count, detail: " + this);
    } else {
        return new SampleNode(newCount, sns);
    }
}
Also used : Method(org.spf4j.base.Method) TMap(gnu.trove.map.TMap) HashMap(java.util.HashMap) Map(java.util.Map) THashMap(gnu.trove.map.hash.THashMap) Nullable(javax.annotation.Nullable)

Aggregations

THashMap (gnu.trove.map.hash.THashMap)6 HashMap (java.util.HashMap)3 Method (org.spf4j.base.Method)3 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)2 InFile (edu.illinois.cs.cogcomp.ner.IO.InFile)2 InvalidEndpointException (io.minio.errors.InvalidEndpointException)2 InvalidPortException (io.minio.errors.InvalidPortException)2 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 InputStream (java.io.InputStream)2 StringTokenizer (java.util.StringTokenizer)2 Datastore (org.cogcomp.Datastore)2 DatastoreException (org.cogcomp.DatastoreException)2 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)1 NEWord (edu.illinois.cs.cogcomp.ner.LbjTagger.NEWord)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 TMap (gnu.trove.map.TMap)1 TIntObjectHashMap (gnu.trove.map.hash.TIntObjectHashMap)1 ArrayList (java.util.ArrayList)1