Search in sources :

Example 36 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class Units method loadUnits.

public static List<Unit> loadUnits(String filename) throws IOException {
    Pattern commaPattern = Pattern.compile("\\s*,\\s*");
    BufferedReader br = IOUtils.getBufferedFileReader(filename);
    String headerString = br.readLine();
    String[] header = commaPattern.split(headerString);
    Map<String, Integer> headerIndex = new HashMap<>();
    for (int i = 0; i < header.length; i++) {
        headerIndex.put(header[i], i);
    }
    int iName = headerIndex.get("unit");
    int iPrefix = headerIndex.get("prefix");
    int iSymbol = headerIndex.get("symbol");
    int iType = headerIndex.get("type");
    int iSystem = headerIndex.get("system");
    int iDefaultUnit = headerIndex.get("defaultUnit");
    int iDefaultUnitScale = headerIndex.get("defaultUnitScale");
    String line;
    List<Unit> list = new ArrayList<>();
    Map<String, Unit> unitsByName = new HashMap<>();
    Map<String, Pair<String, Double>> unitToDefaultUnits = new HashMap<>();
    while ((line = br.readLine()) != null) {
        String[] fields = commaPattern.split(line);
        Unit unit = new Unit(fields[iName], fields[iSymbol], fields[iType].toUpperCase());
        unit.system = fields[iSystem];
        if (fields.length > iPrefix) {
            unit.prefixSystem = fields[iPrefix];
        }
        if (fields.length > iDefaultUnit) {
            double scale = 1.0;
            if (fields.length > iDefaultUnitScale) {
                scale = Double.parseDouble(fields[iDefaultUnitScale]);
            }
            unitToDefaultUnits.put(unit.getName(), Pair.makePair(fields[iDefaultUnit], scale));
        }
        unitsByName.put(unit.getName(), unit);
        list.add(unit);
    }
    for (Map.Entry<String, Pair<String, Double>> entry : unitToDefaultUnits.entrySet()) {
        Unit unit = unitsByName.get(entry.getKey());
        Unit defaultUnit = unitsByName.get(entry.getValue().first);
        if (defaultUnit != null) {
            unit.defaultUnit = defaultUnit;
            unit.defaultUnitScale = entry.getValue().second;
        } else {
            Redwood.Util.warn("Unknown default unit " + entry.getValue().first + " for " + entry.getKey());
        }
    }
    br.close();
    return list;
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) HashMap(java.util.HashMap) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Example 37 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class CMMClassifier method getThresholds.

private static List<Pair<Pattern, Integer>> getThresholds(String filename) {
    BufferedReader in = null;
    try {
        in = IOUtils.readerFromString(filename);
        List<Pair<Pattern, Integer>> thresholds = new ArrayList<>();
        for (String line; (line = in.readLine()) != null; ) {
            int i = line.lastIndexOf(' ');
            Pattern p = Pattern.compile(line.substring(0, i));
            //log.info(":"+line.substring(0,i)+":");
            Integer t = Integer.valueOf(line.substring(i + 1));
            Pair<Pattern, Integer> pair = new Pair<>(p, t);
            thresholds.add(pair);
        }
        in.close();
        return thresholds;
    } catch (IOException e) {
        throw new RuntimeIOException("Error reading threshold file", e);
    } finally {
        IOUtils.closeIgnoringExceptions(in);
    }
}
Also used : Pattern(java.util.regex.Pattern) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Example 38 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class MultiWordStringMatcher method getPattern.

public Pattern getPattern(String targetString) {
    Pattern pattern = targetStringPatternCache.get(targetString);
    if (pattern == null) {
        pattern = createPattern(targetString);
        targetStringPatternCache.put(targetString, pattern);
    }
    return pattern;
}
Also used : Pattern(java.util.regex.Pattern)

Example 39 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class TreeToTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file%n", TreeToTSV.class.getName());
        System.exit(-1);
    }
    String treeFile = args[0];
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
        Pattern npPattern = Pattern.compile("^np0000.$");
        for (Tree tree; (tree = tr.readTree()) != null; ) {
            for (Tree t : tree) {
                if (!t.isPreTerminal())
                    continue;
                char type = 'O';
                Tree grandma = t.ancestor(1, tree);
                String grandmaValue = ((CoreLabel) grandma.label()).value();
                // grup.nom.x
                if (nePattern.matcher(grandmaValue).find())
                    type = grandmaValue.charAt(9);
                else // else check the pos for np0000x or not
                {
                    String pos = ((CoreLabel) t.label()).value();
                    if (npPattern.matcher(pos).find())
                        type = pos.charAt(6);
                }
                Tree wordNode = t.firstChild();
                String word = ((CoreLabel) wordNode.label()).value();
                sb.append(word).append("\t");
                switch(type) {
                    case 'p':
                        sb.append("PERS");
                        break;
                    case 'l':
                        sb.append("LUG");
                        break;
                    case 'o':
                        sb.append("ORG");
                        break;
                    case '0':
                        sb.append("OTROS");
                        break;
                    default:
                        sb.append("O");
                }
                sb.append(nl);
            }
            sb.append(nl);
        }
        System.out.print(sb.toString());
        tr.close();
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) FileNotFoundException(java.io.FileNotFoundException) TreeReader(edu.stanford.nlp.trees.TreeReader) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) BufferedReader(java.io.BufferedReader) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 40 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class ChineseSimWordAvgDepGrammar method getMap.

public Map<Pair<Integer, String>, List<Triple<Integer, String, Double>>> getMap(String filename) {
    Map<Pair<Integer, String>, List<Triple<Integer, String, Double>>> hashMap = Generics.newHashMap();
    try {
        BufferedReader wordMapBReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
        String wordMapLine;
        Pattern linePattern = Pattern.compile("sim\\((.+)/(.+):(.+)/(.+)\\)=(.+)");
        while ((wordMapLine = wordMapBReader.readLine()) != null) {
            Matcher m = linePattern.matcher(wordMapLine);
            if (!m.matches()) {
                log.info("Ill-formed line in similar word map file: " + wordMapLine);
                continue;
            }
            Pair<Integer, String> iTW = new Pair<>(wordIndex.addToIndex(m.group(1)), m.group(2));
            double score = Double.parseDouble(m.group(5));
            List<Triple<Integer, String, Double>> tripleList = hashMap.get(iTW);
            if (tripleList == null) {
                tripleList = new ArrayList<>();
                hashMap.put(iTW, tripleList);
            }
            tripleList.add(new Triple<>(wordIndex.addToIndex(m.group(3)), m.group(4), score));
        }
    } catch (IOException e) {
        throw new RuntimeException("Problem reading similar words file!");
    }
    return hashMap;
}
Also used : Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Triple(edu.stanford.nlp.util.Triple) BufferedReader(java.io.BufferedReader) Pair(edu.stanford.nlp.util.Pair)

Aggregations

Pattern (java.util.regex.Pattern)3181 Matcher (java.util.regex.Matcher)2116 ArrayList (java.util.ArrayList)387 IOException (java.io.IOException)247 Test (org.junit.Test)238 File (java.io.File)193 HashMap (java.util.HashMap)163 BufferedReader (java.io.BufferedReader)127 Field (java.lang.reflect.Field)119 PatternSyntaxException (java.util.regex.PatternSyntaxException)119 Map (java.util.Map)110 List (java.util.List)93 HashSet (java.util.HashSet)79 InputStreamReader (java.io.InputStreamReader)67 InputStream (java.io.InputStream)43 FileReader (java.io.FileReader)41 FileInputStream (java.io.FileInputStream)40 URL (java.net.URL)35 SmallTest (android.test.suitebuilder.annotation.SmallTest)31 LinkedHashMap (java.util.LinkedHashMap)31