use of weka.core.Instances in project mcib3d-core by mcib3d.
the class DataSet method loadDataARFF.
public void loadDataARFF(String fileName) {
Instances dataTmp = null;
try {
dataTmp = new Instances(new BufferedReader(new FileReader(fileName)));
} catch (IOException e) {
e.printStackTrace();
}
dataTmp.setClassIndex(attributes.getClassIndex());
if (dataTmp.numAttributes() == attributes.size())
instances = dataTmp;
else
IJ.log("Pb readind arff, number of attributes different");
}
use of weka.core.Instances in project 490 by pauleibye.
the class MainFrame method createCSVandArff.
// convert our weird arraylist to a sick ARFF file and save it
public void createCSVandArff(FeatureExtractor FE) {
try {
File file = new File("csv.txt");
if (!file.exists()) {
file.createNewFile();
PrintWriter writer = new PrintWriter(file);
for (String s : FE.getFileFeatures()) {
writer.println(s);
}
writer.close();
}
File arff = new File("data.arff");
if (!arff.exists()) {
CSVLoader loader = new CSVLoader();
loader.setSource(file);
Instances data = loader.getDataSet();
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(arff);
saver.writeBatch();
}
} catch (IOException e) {
System.out.println("Exception Occurred:");
e.printStackTrace();
}
}
use of weka.core.Instances in project iobserve-analysis by research-iobserve.
the class AbstractClustering method createInstances.
/**
* It transforms the user sessions(userSessions in form of counts of their called operation
* signatures) to Weka instances that can be used for the clustering.
*
* @param countModel
* contains the userSessions in form of counts of called operation signatures
* @param listOfDistinctOperationSignatures
* contains the extracted distinct operation signatures of the input
* entryCallSequenceModel
* @return the Weka instances that hold the data that is used for the clustering
*/
protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel, final List<String> listOfDistinctOperationSignatures) {
final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size();
final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures);
for (int i = 0; i < numberOfDistinctOperationSignatures; i++) {
final String attributeName = "Attribute" + i;
final Attribute attribute = new Attribute(attributeName);
fvWekaAttributes.addElement(attribute);
}
final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size());
for (final UserSessionAsCountsOfCalls userSession : countModel) {
int indexOfAttribute = 0;
final Instance instance = new Instance(numberOfDistinctOperationSignatures);
for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) {
instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute), userSession.getAbsoluteCountOfCalls()[row]);
indexOfAttribute++;
}
clusterSet.add(instance);
}
return clusterSet;
}
use of weka.core.Instances in project iobserve-analysis by research-iobserve.
the class BehaviorModelTable method toInstances.
/**
* create an Instances object for clustering.
*
* @return instance
*/
public Instances toInstances() {
final FastVector fastVector = new FastVector();
// add transitions
for (int i = 0; i < this.signatures.size(); i++) {
for (int j = 0; j < this.signatures.size(); j++) {
if (this.transitions[i][j] > AbstractBehaviorModelTable.TRANSITION_THRESHOLD) {
final Attribute attribute = new Attribute(AbstractBehaviorModelTable.EDGE_INDICATOR + this.inverseSignatures[i] + AbstractBehaviorModelTable.EDGE_DIVIDER + this.inverseSignatures[j]);
fastVector.addElement(attribute);
} else {
continue;
}
}
}
// add informations
this.signatures.values().stream().forEach(pair -> Arrays.stream(pair.getSecond()).forEach(callInformation -> fastVector.addElement(new Attribute(AbstractBehaviorModelTable.INFORMATION_INDICATOR + this.inverseSignatures[pair.getFirst()] + AbstractBehaviorModelTable.INFORMATION_DIVIDER + callInformation.getSignature()))));
// TODO name
final Instances instances = new Instances("Test", fastVector, 0);
final Instance instance = this.toInstance();
instances.add(instance);
return instances;
}
use of weka.core.Instances in project cia by Hack23.
the class WordCounterImpl method calculateWordCount.
@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {
final String html = documentContentData.getContent();
final Attribute input = new Attribute(HTML, (ArrayList<String>) null);
final ArrayList<Attribute> inputVec = new ArrayList<>();
inputVec.add(input);
final Instances htmlInst = new Instances(HTML, inputVec, 1);
htmlInst.add(new DenseInstance(1));
htmlInst.instance(0).setValue(0, html);
final StopwordsHandler stopwordsHandler = new StopwordsHandler() {
@Override
public boolean isStopword(final String word) {
return word.length() < 5;
}
};
final NGramTokenizer tokenizer = new NGramTokenizer();
tokenizer.setNGramMinSize(1);
tokenizer.setNGramMaxSize(1);
tokenizer.setDelimiters(TOKEN_DELIMITERS);
final StringToWordVector filter = new StringToWordVector();
filter.setTokenizer(tokenizer);
filter.setStopwordsHandler(stopwordsHandler);
filter.setLowerCaseTokens(true);
filter.setOutputWordCounts(true);
filter.setWordsToKeep(maxResult);
final Map<String, Integer> result = new HashMap<>();
try {
filter.setInputFormat(htmlInst);
final Instances dataFiltered = Filter.useFilter(htmlInst, filter);
final Instance last = dataFiltered.lastInstance();
final int numAttributes = last.numAttributes();
for (int i = 0; i < numAttributes; i++) {
result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
}
} catch (final Exception e) {
LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
}
return result;
}
Aggregations