Search in sources :

Example 1 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class TRECFormat method writeFeatureList.

private static void writeFeatureList(DataSet dataSet, File trecFile) {
    File file = new File(trecFile, TREC_FEATURE_LIST_FILE_NAME);
    FeatureList featureList = dataSet.getFeatureList();
    try (FileOutputStream fileOutputStream = new FileOutputStream(file);
        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(bufferedOutputStream)) {
        objectOutputStream.writeObject(featureList);
    } catch (IOException e) {
        e.printStackTrace();
    }
    File txtFile = new File(trecFile, TREC_FEATURE_LIST_TEXT_FILE_NAME);
    try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(txtFile))) {
        for (Feature feature : featureList.getAll()) {
            bufferedWriter.write(feature.toString());
            bufferedWriter.newLine();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 2 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class MekaFormat method loadMLClfDatasetDense.

private static MultiLabelClfDataSet loadMLClfDatasetDense(File file, int numClasses, int numFeatures, int numData, Map<String, String> labelMap, Map<String, String> featureMap) throws IOException {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numData).numClasses(numClasses).numFeatures(numFeatures).build();
    // set features
    List<Feature> featureList = new LinkedList<>();
    for (int m = 0; m < numFeatures; m++) {
        String featureIndex = Integer.toString(m);
        String featureName = featureMap.get(featureIndex);
        Feature feature = new Feature();
        feature.setIndex(m);
        feature.setName(featureName);
        featureList.add(feature);
    }
    dataSet.setFeatureList(new FeatureList(featureList));
    // set Label
    Map<Integer, String> labelIndexMap = new HashMap<>();
    for (Map.Entry<String, String> entry : labelMap.entrySet()) {
        String labelString = entry.getKey();
        String labelName = entry.getValue();
        labelIndexMap.put(Integer.parseInt(labelString) - numFeatures, labelName);
    }
    LabelTranslator labelTranslator = new LabelTranslator(labelIndexMap);
    dataSet.setLabelTranslator(labelTranslator);
    // create feature matrix
    BufferedReader br = new BufferedReader(new FileReader(file));
    String line;
    int dataCount = 0;
    boolean ifData = false;
    while ((line = br.readLine()) != null) {
        if (line.startsWith("@data")) {
            ifData = true;
            continue;
        }
        if (ifData) {
            if ((line.startsWith("{")) && (line.endsWith("}"))) {
                line = line.substring(1, line.length() - 1);
            }
            String[] indexValues = line.split(",");
            int indexValueI = -1;
            for (String indexValue : indexValues) {
                indexValueI++;
                String index = Integer.toString(indexValueI);
                String value = indexValue;
                if (labelMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    if (valueDouble == 1.0) {
                        dataSet.addLabel(dataCount, Integer.parseInt(index) - numFeatures);
                    }
                } else if (featureMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    int indexInt = Integer.parseInt(index);
                    dataSet.setFeatureValue(dataCount, indexInt, valueDouble);
                } else {
                    throw new RuntimeException("Index not found in the line: " + line);
                }
            }
            dataCount++;
        }
    }
    br.close();
    return dataSet;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList)

Example 3 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class MekaFormat method save.

public static void save(MultiLabelClfDataSet dataSet, String mekaFile, String dataName) throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter(mekaFile));
    // writing the header: @relation 'data_name: -C number_classes\n\n'
    LabelTranslator labelTranslator = dataSet.getLabelTranslator();
    int numClasses = labelTranslator.getNumClasses();
    bw.write("@relation " + "'" + dataName + ": -C " + numClasses + "'\n\n");
    // starting writing features
    FeatureList featureList = dataSet.getFeatureList();
    //        Pattern pattern = Pattern.compile("ngram=(.*?), field");
    for (int i = 0; i < featureList.size(); i++) {
        Feature feature = featureList.get(i);
        //            String featureName = "";
        //            if (feature instanceof Ngram) {
        //                Ngram ngram = (Ngram) feature;
        //                featureName = ngram.getNgram();
        //            }
        //            if (featureName.length() == 0) {
        //                featureName = featureName+"f"+i;
        //            }
        String featureName = "f" + i;
        bw.write("@attribute " + featureName + " numeric\n");
    }
    // starting writing labels
    for (int i = 0; i < numClasses; i++) {
        String labelName = labelTranslator.toExtLabel(i);
        bw.write("@attribute " + labelName.replace(" ", "_") + " {0,1}\n");
    }
    // starting @data
    MultiLabel[] multiLabels = dataSet.getMultiLabels();
    bw.write("\n@data\n\n");
    for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("{");
        Vector rowData = dataSet.getRow(i);
        MultiLabel multiLabel = multiLabels[i];
        // following by feature index
        Map<Integer, Double> sortedKeys = new TreeMap<>();
        for (Vector.Element element : rowData.nonZeroes()) {
            int index = element.index();
            double value = element.get();
            sortedKeys.put(index, value);
        }
        int count = 0;
        for (Map.Entry<Integer, Double> entry : sortedKeys.entrySet()) {
            int index = entry.getKey();
            double value = entry.getValue();
            stringBuffer.append(index + " " + value);
            count++;
            if (count < sortedKeys.size()) {
                stringBuffer.append(",");
            }
        }
        //starting with labels index.
        List<Integer> matchedLabels = multiLabel.getMatchedLabelsOrdered();
        for (int j = 0; j < matchedLabels.size(); j++) {
            int matchedLabel = matchedLabels.get(j) + dataSet.getNumFeatures();
            stringBuffer.append("," + matchedLabel + " " + "1");
        }
        stringBuffer.append("}\n");
        bw.write(stringBuffer.toString());
    }
    bw.close();
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector)

Example 4 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class MekaFormat method loadMLClfDataset.

private static MultiLabelClfDataSet loadMLClfDataset(File file, int numClasses, int numFeatures, int numData, Map<String, String> labelMap, Map<String, String> featureMap) throws IOException {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numData).numClasses(numClasses).numFeatures(numFeatures).build();
    // set features
    List<Feature> featureList = new LinkedList<>();
    for (int m = 0; m < numFeatures; m++) {
        String featureIndex = Integer.toString(m);
        String featureName = featureMap.get(featureIndex);
        Feature feature = new Feature();
        feature.setIndex(m);
        feature.setName(featureName);
        featureList.add(feature);
    }
    dataSet.setFeatureList(new FeatureList(featureList));
    // set Label
    Map<Integer, String> labelIndexMap = new HashMap<>();
    for (Map.Entry<String, String> entry : labelMap.entrySet()) {
        String labelString = entry.getKey();
        String labelName = entry.getValue();
        labelIndexMap.put(Integer.parseInt(labelString) - numFeatures, labelName);
    }
    LabelTranslator labelTranslator = new LabelTranslator(labelIndexMap);
    dataSet.setLabelTranslator(labelTranslator);
    // create feature matrix
    BufferedReader br = new BufferedReader(new FileReader(file));
    String line;
    int dataCount = 0;
    while ((line = br.readLine()) != null) {
        if ((line.startsWith("{")) && (line.endsWith("}"))) {
            line = line.substring(1, line.length() - 1);
            String[] indexValues = line.split(", ");
            for (String indexValue : indexValues) {
                String[] indexValuePair = indexValue.split(" ");
                String index = indexValuePair[0];
                String value = indexValuePair[1];
                if (labelMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    if (valueDouble == 1.0) {
                        dataSet.addLabel(dataCount, Integer.parseInt(index) - numFeatures);
                    }
                } else if (featureMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    int indexInt = Integer.parseInt(index);
                    dataSet.setFeatureValue(dataCount, indexInt, valueDouble);
                } else {
                    throw new RuntimeException("Index:" + index + " not found in the line: " + line);
                }
            }
            dataCount++;
        }
    }
    br.close();
    return dataSet;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList)

Example 5 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class MekaFormat method loadMLClfDatasetPreDense.

private static MultiLabelClfDataSet loadMLClfDatasetPreDense(File file, int numClasses, int numFeatures, int numData, Map<String, String> labelMap, Map<String, String> featureMap) throws IOException {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numData).numClasses(numClasses).numFeatures(numFeatures).build();
    // set features
    List<Feature> featureList = new LinkedList<>();
    for (int m = 0; m < numFeatures; m++) {
        String featureIndex = Integer.toString(m);
        String featureName = featureMap.get(featureIndex);
        Feature feature = new Feature();
        feature.setIndex(m);
        feature.setName(featureName);
        featureList.add(feature);
    }
    dataSet.setFeatureList(new FeatureList(featureList));
    // set Label
    Map<Integer, String> labelIndexMap = new HashMap<>();
    for (Map.Entry<String, String> entry : labelMap.entrySet()) {
        String labelString = entry.getKey();
        String labelName = entry.getValue();
        labelIndexMap.put(Integer.parseInt(labelString) - numFeatures, labelName);
    }
    LabelTranslator labelTranslator = new LabelTranslator(labelIndexMap);
    dataSet.setLabelTranslator(labelTranslator);
    // create feature matrix
    BufferedReader br = new BufferedReader(new FileReader(file));
    String line;
    int dataCount = 0;
    boolean ifData = false;
    while ((line = br.readLine()) != null) {
        if (line.startsWith("@data")) {
            ifData = true;
            continue;
        }
        if (ifData) {
            if ((line.startsWith("{")) && (line.endsWith("}"))) {
                line = line.substring(1, line.length() - 1);
            }
            String[] indexValues = line.split(",");
            int indexValueI = -2;
            for (String indexValue : indexValues) {
                indexValueI++;
                if (indexValueI == -1) {
                    continue;
                }
                String index = Integer.toString(indexValueI);
                String value = indexValue;
                if (labelMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    if (valueDouble == 1.0) {
                        dataSet.addLabel(dataCount, Integer.parseInt(index) - numFeatures);
                    }
                } else if (featureMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    int indexInt = Integer.parseInt(index);
                    dataSet.setFeatureValue(dataCount, indexInt, valueDouble);
                } else {
                    throw new RuntimeException("Index not found in the line: " + line);
                }
            }
            dataCount++;
        }
    }
    br.close();
    return dataSet;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList)

Aggregations

Feature (edu.neu.ccs.pyramid.feature.Feature)23 Vector (org.apache.mahout.math.Vector)14 FeatureList (edu.neu.ccs.pyramid.feature.FeatureList)13 Collectors (java.util.stream.Collectors)9 TopFeatures (edu.neu.ccs.pyramid.feature.TopFeatures)8 RegTreeInspector (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector)6 RegressionTree (edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree)6 java.util (java.util)6 MultiLabelPredictionAnalysis (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis)5 edu.neu.ccs.pyramid.regression (edu.neu.ccs.pyramid.regression)5 TreeRule (edu.neu.ccs.pyramid.regression.regression_tree.TreeRule)5 ClassProbability (edu.neu.ccs.pyramid.classification.ClassProbability)4 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)4 LabelTranslator (edu.neu.ccs.pyramid.dataset.LabelTranslator)4 Pair (edu.neu.ccs.pyramid.util.Pair)4 IntStream (java.util.stream.IntStream)4 PredictionAnalysis (edu.neu.ccs.pyramid.classification.PredictionAnalysis)3 IdTranslator (edu.neu.ccs.pyramid.dataset.IdTranslator)3 IMLGradientBoosting (edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting)3 ArrayList (java.util.ArrayList)3