Search in sources :

Example 36 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class Meka2Trec method main.

/**
     * this is only support multi-label classification dataset.
     * @param args
     */
public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    List<String> trecs = config.getStrings("trec");
    List<String> mekas = config.getStrings("meka");
    int numLabels = config.getInt("numLabels");
    int numFeatures = config.getInt("numFeatures");
    String dataMode = config.getString("dataMode");
    for (int i = 0; i < mekas.size(); i++) {
        System.out.println("processing on: " + trecs.get(i));
        MultiLabelClfDataSet dataSet = MekaFormat.loadMLClfDataset(mekas.get(i), numFeatures, numLabels, dataMode);
        TRECFormat.save(dataSet, trecs.get(i));
    }
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Example 37 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class Regex method main.

public static void main(String[] args) {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    boolean match = config.getString("string").matches(config.getString("regularExpression"));
    if (match) {
        System.out.println("match!");
    } else {
        System.out.println("not match!");
    }
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 38 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class TRECFormat method parseNumDataPoints.

//==========PRIVATE==========
private static int parseNumDataPoints(File trecFile) throws IOException {
    File configFile = new File(trecFile, TREC_CONFIG_FILE_NAME);
    int numDataPoints;
    try (BufferedReader br = new BufferedReader(new FileReader(configFile))) {
        Config config = new Config(configFile);
        numDataPoints = config.getInt(TREC_CONFIG_NUM_DATA_POINTS);
    }
    return numDataPoints;
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 39 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class App1 method keywordsFilter.

//    public static String splitListToString(List<String> splitValues){
//        String splitValueAll = "";
//        for (int i=0;i<splitValues.size();i++){
//            splitValueAll = splitValueAll+splitValues.get(i);
//            if (i<splitValues.size()-1){
//                splitValueAll = splitValueAll+"_";
//            }
//        }
//        return splitValueAll;
//    }
/**
     * filter ngrams by given unigrams in the file
     * do not filter unigram candidates
     */
private static Set<Ngram> keywordsFilter(Config config, ESIndex index, Set<Ngram> ngrams) throws IOException {
    String externalKeywordsFile = config.getString("train.feature.filterNgrams.keyWordsFile");
    List<String> lines = FileUtils.readLines(new File(externalKeywordsFile));
    String analyzer = config.getString("train.feature.analyzer");
    Set<String> keywords = new HashSet<>();
    for (String line : lines) {
        keywords.add(index.analyze(line, analyzer).getNgram());
    }
    return ngrams.stream().parallel().filter(ngram -> ngram.getN() == 1 || containsKeyWords(ngram, keywords)).collect(Collectors.toSet());
}
Also used : java.util.logging(java.util.logging) java.util(java.util) BoundedBlockPriorityQueue(edu.neu.ccs.pyramid.util.BoundedBlockPriorityQueue) Multiset(com.google.common.collect.Multiset) NgramEnumerator(edu.neu.ccs.pyramid.feature_extraction.NgramEnumerator) edu.neu.ccs.pyramid.feature(edu.neu.ccs.pyramid.feature) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) FeatureLoader(edu.neu.ccs.pyramid.elasticsearch.FeatureLoader) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileWriter(java.io.FileWriter) FileUtils(org.apache.commons.io.FileUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) MultiLabelIndex(edu.neu.ccs.pyramid.elasticsearch.MultiLabelIndex) ConcurrentHashMultiset(com.google.common.collect.ConcurrentHashMultiset) ESIndex(edu.neu.ccs.pyramid.elasticsearch.ESIndex) NgramTemplate(edu.neu.ccs.pyramid.feature_extraction.NgramTemplate) Serialization(edu.neu.ccs.pyramid.util.Serialization) Paths(java.nio.file.Paths) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) StumpSelector(edu.neu.ccs.pyramid.feature_extraction.StumpSelector) Pattern(java.util.regex.Pattern) File(java.io.File)

Example 40 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class App1 method loadLabels.

/**
     *
     * @return into 2d arrary: num label * num data
     */
private static double[][] loadLabels(Config config, MultiLabelIndex index, IdTranslator idTranslator, LabelTranslator labelTranslator) {
    File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
    Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
    int numDataPoints = idTranslator.numData();
    int numClasses = labelTranslator.getNumClasses();
    double[][] labels = new double[numClasses][numDataPoints];
    for (int i = 0; i < numDataPoints; i++) {
        String dataIndexId = idTranslator.toExtId(i);
        List<String> extMultiLabel = index.getExtMultiLabel(dataIndexId);
        if (savedConfig.getBoolean("train.label.filter")) {
            String prefix = savedConfig.getString("train.label.filter.prefix");
            extMultiLabel = extMultiLabel.stream().filter(extLabel -> extLabel.startsWith(prefix)).collect(Collectors.toList());
        }
        for (String extLabel : extMultiLabel) {
            int intLabel = labelTranslator.toIntLabel(extLabel);
            labels[intLabel][i] = 1;
        }
    }
    return labels;
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config) File(java.io.File)

Aggregations

Config (edu.neu.ccs.pyramid.configuration.Config)59 File (java.io.File)35 Collectors (java.util.stream.Collectors)18 FileUtils (org.apache.commons.io.FileUtils)18 StopWatch (org.apache.commons.lang3.time.StopWatch)18 Serialization (edu.neu.ccs.pyramid.util.Serialization)17 BufferedWriter (java.io.BufferedWriter)17 FileWriter (java.io.FileWriter)17 Paths (java.nio.file.Paths)17 IntStream (java.util.stream.IntStream)16 Pair (edu.neu.ccs.pyramid.util.Pair)15 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)14 EarlyStopper (edu.neu.ccs.pyramid.optimization.EarlyStopper)14 java.util (java.util)14 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)13 PrintUtil (edu.neu.ccs.pyramid.util.PrintUtil)13 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)12 edu.neu.ccs.pyramid.multilabel_classification.cbm (edu.neu.ccs.pyramid.multilabel_classification.cbm)12 ListUtil (edu.neu.ccs.pyramid.util.ListUtil)12 IOException (java.io.IOException)8