use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class Meka2Trec method main.
/**
* this is only support multi-label classification dataset.
* @param args
*/
public static void main(String[] args) throws IOException {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
List<String> trecs = config.getStrings("trec");
List<String> mekas = config.getStrings("meka");
int numLabels = config.getInt("numLabels");
int numFeatures = config.getInt("numFeatures");
String dataMode = config.getString("dataMode");
for (int i = 0; i < mekas.size(); i++) {
System.out.println("processing on: " + trecs.get(i));
MultiLabelClfDataSet dataSet = MekaFormat.loadMLClfDataset(mekas.get(i), numFeatures, numLabels, dataMode);
TRECFormat.save(dataSet, trecs.get(i));
}
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class Regex method main.
public static void main(String[] args) {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
boolean match = config.getString("string").matches(config.getString("regularExpression"));
if (match) {
System.out.println("match!");
} else {
System.out.println("not match!");
}
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class TRECFormat method parseNumDataPoints.
//==========PRIVATE==========
private static int parseNumDataPoints(File trecFile) throws IOException {
File configFile = new File(trecFile, TREC_CONFIG_FILE_NAME);
int numDataPoints;
try (BufferedReader br = new BufferedReader(new FileReader(configFile))) {
Config config = new Config(configFile);
numDataPoints = config.getInt(TREC_CONFIG_NUM_DATA_POINTS);
}
return numDataPoints;
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class App1 method keywordsFilter.
// public static String splitListToString(List<String> splitValues){
// String splitValueAll = "";
// for (int i=0;i<splitValues.size();i++){
// splitValueAll = splitValueAll+splitValues.get(i);
// if (i<splitValues.size()-1){
// splitValueAll = splitValueAll+"_";
// }
// }
// return splitValueAll;
// }
/**
* filter ngrams by given unigrams in the file
* do not filter unigram candidates
*/
private static Set<Ngram> keywordsFilter(Config config, ESIndex index, Set<Ngram> ngrams) throws IOException {
String externalKeywordsFile = config.getString("train.feature.filterNgrams.keyWordsFile");
List<String> lines = FileUtils.readLines(new File(externalKeywordsFile));
String analyzer = config.getString("train.feature.analyzer");
Set<String> keywords = new HashSet<>();
for (String line : lines) {
keywords.add(index.analyze(line, analyzer).getNgram());
}
return ngrams.stream().parallel().filter(ngram -> ngram.getN() == 1 || containsKeyWords(ngram, keywords)).collect(Collectors.toSet());
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class App1 method loadLabels.
/**
*
* @return into 2d arrary: num label * num data
*/
private static double[][] loadLabels(Config config, MultiLabelIndex index, IdTranslator idTranslator, LabelTranslator labelTranslator) {
File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
int numDataPoints = idTranslator.numData();
int numClasses = labelTranslator.getNumClasses();
double[][] labels = new double[numClasses][numDataPoints];
for (int i = 0; i < numDataPoints; i++) {
String dataIndexId = idTranslator.toExtId(i);
List<String> extMultiLabel = index.getExtMultiLabel(dataIndexId);
if (savedConfig.getBoolean("train.label.filter")) {
String prefix = savedConfig.getString("train.label.filter.prefix");
extMultiLabel = extMultiLabel.stream().filter(extLabel -> extLabel.startsWith(prefix)).collect(Collectors.toList());
}
for (String extLabel : extMultiLabel) {
int intLabel = labelTranslator.toIntLabel(extLabel);
labels[intLabel][i] = 1;
}
}
return labels;
}
Aggregations