Search in sources :

Example 1 with Instances

use of weka.core.Instances in project mcib3d-core by mcib3d.

the class DataSet method loadDataARFF.

public void loadDataARFF(String fileName) {
    Instances dataTmp = null;
    try {
        dataTmp = new Instances(new BufferedReader(new FileReader(fileName)));
    } catch (IOException e) {
        e.printStackTrace();
    }
    dataTmp.setClassIndex(attributes.getClassIndex());
    if (dataTmp.numAttributes() == attributes.size())
        instances = dataTmp;
    else
        IJ.log("Pb readind arff, number of attributes different");
}
Also used : Instances(weka.core.Instances) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IOException(java.io.IOException)

Example 2 with Instances

use of weka.core.Instances in project 490 by pauleibye.

the class MainFrame method createCSVandArff.

// convert our weird arraylist to a sick ARFF file and save it
public void createCSVandArff(FeatureExtractor FE) {
    try {
        File file = new File("csv.txt");
        if (!file.exists()) {
            file.createNewFile();
            PrintWriter writer = new PrintWriter(file);
            for (String s : FE.getFileFeatures()) {
                writer.println(s);
            }
            writer.close();
        }
        File arff = new File("data.arff");
        if (!arff.exists()) {
            CSVLoader loader = new CSVLoader();
            loader.setSource(file);
            Instances data = loader.getDataSet();
            ArffSaver saver = new ArffSaver();
            saver.setInstances(data);
            saver.setFile(arff);
            saver.writeBatch();
        }
    } catch (IOException e) {
        System.out.println("Exception Occurred:");
        e.printStackTrace();
    }
}
Also used : Instances(weka.core.Instances) CSVLoader(weka.core.converters.CSVLoader) ArffSaver(weka.core.converters.ArffSaver) IOException(java.io.IOException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 3 with Instances

use of weka.core.Instances in project iobserve-analysis by research-iobserve.

the class AbstractClustering method createInstances.

/**
 * It transforms the user sessions(userSessions in form of counts of their called operation
 * signatures) to Weka instances that can be used for the clustering.
 *
 * @param countModel
 *            contains the userSessions in form of counts of called operation signatures
 * @param listOfDistinctOperationSignatures
 *            contains the extracted distinct operation signatures of the input
 *            entryCallSequenceModel
 * @return the Weka instances that hold the data that is used for the clustering
 */
protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel, final List<String> listOfDistinctOperationSignatures) {
    final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size();
    final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures);
    for (int i = 0; i < numberOfDistinctOperationSignatures; i++) {
        final String attributeName = "Attribute" + i;
        final Attribute attribute = new Attribute(attributeName);
        fvWekaAttributes.addElement(attribute);
    }
    final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size());
    for (final UserSessionAsCountsOfCalls userSession : countModel) {
        int indexOfAttribute = 0;
        final Instance instance = new Instance(numberOfDistinctOperationSignatures);
        for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) {
            instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute), userSession.getAbsoluteCountOfCalls()[row]);
            indexOfAttribute++;
        }
        clusterSet.add(instance);
    }
    return clusterSet;
}
Also used : Instances(weka.core.Instances) FastVector(weka.core.FastVector) UserSessionAsCountsOfCalls(org.iobserve.analysis.userbehavior.data.UserSessionAsCountsOfCalls) Attribute(weka.core.Attribute) Instance(weka.core.Instance)

Example 4 with Instances

use of weka.core.Instances in project iobserve-analysis by research-iobserve.

the class BehaviorModelTable method toInstances.

/**
 * create an Instances object for clustering.
 *
 * @return instance
 */
public Instances toInstances() {
    final FastVector fastVector = new FastVector();
    // add transitions
    for (int i = 0; i < this.signatures.size(); i++) {
        for (int j = 0; j < this.signatures.size(); j++) {
            if (this.transitions[i][j] > AbstractBehaviorModelTable.TRANSITION_THRESHOLD) {
                final Attribute attribute = new Attribute(AbstractBehaviorModelTable.EDGE_INDICATOR + this.inverseSignatures[i] + AbstractBehaviorModelTable.EDGE_DIVIDER + this.inverseSignatures[j]);
                fastVector.addElement(attribute);
            } else {
                continue;
            }
        }
    }
    // add informations
    this.signatures.values().stream().forEach(pair -> Arrays.stream(pair.getSecond()).forEach(callInformation -> fastVector.addElement(new Attribute(AbstractBehaviorModelTable.INFORMATION_INDICATOR + this.inverseSignatures[pair.getFirst()] + AbstractBehaviorModelTable.INFORMATION_DIVIDER + callInformation.getSignature()))));
    // TODO name
    final Instances instances = new Instances("Test", fastVector, 0);
    final Instance instance = this.toInstance();
    instances.add(instance);
    return instances;
}
Also used : Arrays(java.util.Arrays) Logger(org.slf4j.Logger) FastVector(weka.core.FastVector) Pair(org.apache.commons.math3.util.Pair) SingleOrNoneCollector(org.iobserve.analysis.clustering.SingleOrNoneCollector) Instances(weka.core.Instances) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PayloadAwareEntryCallEvent(org.iobserve.stages.general.data.PayloadAwareEntryCallEvent) EntryCallEvent(org.iobserve.stages.general.data.EntryCallEvent) Instance(weka.core.Instance) List(java.util.List) Map(java.util.Map) Optional(java.util.Optional) Attribute(weka.core.Attribute) Instances(weka.core.Instances) FastVector(weka.core.FastVector) Attribute(weka.core.Attribute) Instance(weka.core.Instance)

Example 5 with Instances

use of weka.core.Instances in project cia by Hack23.

the class WordCounterImpl method calculateWordCount.

@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {
    final String html = documentContentData.getContent();
    final Attribute input = new Attribute(HTML, (ArrayList<String>) null);
    final ArrayList<Attribute> inputVec = new ArrayList<>();
    inputVec.add(input);
    final Instances htmlInst = new Instances(HTML, inputVec, 1);
    htmlInst.add(new DenseInstance(1));
    htmlInst.instance(0).setValue(0, html);
    final StopwordsHandler stopwordsHandler = new StopwordsHandler() {

        @Override
        public boolean isStopword(final String word) {
            return word.length() < 5;
        }
    };
    final NGramTokenizer tokenizer = new NGramTokenizer();
    tokenizer.setNGramMinSize(1);
    tokenizer.setNGramMaxSize(1);
    tokenizer.setDelimiters(TOKEN_DELIMITERS);
    final StringToWordVector filter = new StringToWordVector();
    filter.setTokenizer(tokenizer);
    filter.setStopwordsHandler(stopwordsHandler);
    filter.setLowerCaseTokens(true);
    filter.setOutputWordCounts(true);
    filter.setWordsToKeep(maxResult);
    final Map<String, Integer> result = new HashMap<>();
    try {
        filter.setInputFormat(htmlInst);
        final Instances dataFiltered = Filter.useFilter(htmlInst, filter);
        final Instance last = dataFiltered.lastInstance();
        final int numAttributes = last.numAttributes();
        for (int i = 0; i < numAttributes; i++) {
            result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
        }
    } catch (final Exception e) {
        LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
    }
    return result;
}
Also used : DenseInstance(weka.core.DenseInstance) Attribute(weka.core.Attribute) HashMap(java.util.HashMap) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) ArrayList(java.util.ArrayList) StopwordsHandler(weka.core.stopwords.StopwordsHandler) NGramTokenizer(weka.core.tokenizers.NGramTokenizer) Instances(weka.core.Instances) StringToWordVector(weka.filters.unsupervised.attribute.StringToWordVector)

Aggregations

Instances (weka.core.Instances)31 Attribute (weka.core.Attribute)12 ArrayList (java.util.ArrayList)9 File (java.io.File)8 Instance (org.dkpro.tc.api.features.Instance)8 Test (org.junit.Test)8 MultiLabelInstances (mulan.data.MultiLabelInstances)7 IOException (java.io.IOException)5 DenseInstance (weka.core.DenseInstance)5 Instance (weka.core.Instance)5 ArffSaver (weka.core.converters.ArffSaver)5 Feature (org.dkpro.tc.api.features.Feature)4 Classifier (weka.classifiers.Classifier)3 FastVector (weka.core.FastVector)3 SparseInstance (weka.core.SparseInstance)3 HashMap (java.util.HashMap)2 Result (meka.core.Result)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2