use of weka.core.Instance in project cia by Hack23.
the class WordCounterImpl method calculateWordCount.
@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {
final String html = documentContentData.getContent();
final Attribute input = new Attribute(HTML, (ArrayList<String>) null);
final ArrayList<Attribute> inputVec = new ArrayList<>();
inputVec.add(input);
final Instances htmlInst = new Instances(HTML, inputVec, 1);
htmlInst.add(new DenseInstance(1));
htmlInst.instance(0).setValue(0, html);
final StopwordsHandler stopwordsHandler = new StopwordsHandler() {
@Override
public boolean isStopword(final String word) {
return word.length() < 5;
}
};
final NGramTokenizer tokenizer = new NGramTokenizer();
tokenizer.setNGramMinSize(1);
tokenizer.setNGramMaxSize(1);
tokenizer.setDelimiters(TOKEN_DELIMITERS);
final StringToWordVector filter = new StringToWordVector();
filter.setTokenizer(tokenizer);
filter.setStopwordsHandler(stopwordsHandler);
filter.setLowerCaseTokens(true);
filter.setOutputWordCounts(true);
filter.setWordsToKeep(maxResult);
final Map<String, Integer> result = new HashMap<>();
try {
filter.setInputFormat(htmlInst);
final Instances dataFiltered = Filter.useFilter(htmlInst, filter);
final Instance last = dataFiltered.lastInstance();
final int numAttributes = last.numAttributes();
for (int i = 0; i < numAttributes; i++) {
result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
}
} catch (final Exception e) {
LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
}
return result;
}
use of weka.core.Instance in project dkpro-tc by dkpro.
the class ReplaceMissingValuesWithZeroFilter method convertInstance.
/**
* Convert a single instance over. The converted instance is added to the end of the output
* queue.
*
* @param instance
* the instance to convert
*/
private void convertInstance(Instance instance) {
Instance inst = null;
if (instance instanceof SparseInstance) {
double[] vals = new double[instance.numValues()];
int[] indices = new int[instance.numValues()];
int num = 0;
for (int j = 0; j < instance.numValues(); j++) {
if (instance.isMissingSparse(j) && (getInputFormat().classIndex() != instance.index(j)) && (instance.attributeSparse(j).isNominal() || instance.attributeSparse(j).isNumeric())) {
} else {
vals[num] = instance.valueSparse(j);
indices[num] = instance.index(j);
num++;
}
}
if (num == instance.numValues()) {
inst = new SparseInstance(instance.weight(), vals, indices, instance.numAttributes());
} else {
double[] tempVals = new double[num];
int[] tempInd = new int[num];
System.arraycopy(vals, 0, tempVals, 0, num);
System.arraycopy(indices, 0, tempInd, 0, num);
inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
}
} else {
double[] vals = new double[getInputFormat().numAttributes()];
for (int j = 0; j < instance.numAttributes(); j++) {
if (instance.isMissing(j) && (getInputFormat().classIndex() != j) && (getInputFormat().attribute(j).isNominal() || getInputFormat().attribute(j).isNumeric())) {
vals[j] = 0.0d;
} else {
vals[j] = instance.value(j);
}
}
inst = new DenseInstance(instance.weight(), vals);
}
inst.setDataset(instance.dataset());
push(inst);
}
use of weka.core.Instance in project TrakEM2 by trakem2.
the class LineageClassifier method classify.
public static final boolean classify(final double[] vector) throws Exception {
// Obtain or generate a Thread-local instance
Operator op;
synchronized (table) {
// avoid clashes within weka
final Thread t = Thread.currentThread();
op = table.get(t);
if (null == op) {
op = new Operator();
table.put(t, op);
}
}
// Future weka versions will use new DenseInstance(1, vector) instead
final Instance ins = new DenseInstance(1, vector);
ins.setDataset(op.data);
// Was trained to return true or false, represented in weka as 0 or 1
return 1 == ((int) Math.round(op.c.classifyInstance(ins)));
}
use of weka.core.Instance in project lobcder by skoulouzis.
the class LDClustering method getNextState.
@Override
public Vertex getNextState(Vertex currentState) {
ArrayList<Vertex> states = new ArrayList<>();
String rName = currentState.getResourceName();
if (!rName.endsWith("/")) {
rName += "/";
}
rName = rName.replaceFirst("/lobcder/dav/", "");
try (Connection connection = getConnection()) {
LogicalData data = getLogicalDataByPath(Path.path(rName), connection);
Instance instance = getInstances(data, currentState.getMethod()).get(0);
double[] features = instance.toDoubleArray();
switch(type) {
case state:
return getNextLobState(connection, features);
case resource:
return getNextResourceState(connection, features);
case method:
return getNextMethodState(connection, features);
default:
return getNextLobState(connection, features);
}
} catch (SQLException ex) {
Logger.getLogger(LDClustering.class.getName()).log(Level.SEVERE, null, ex);
}
return null;
}
use of weka.core.Instance in project 490 by pauleibye.
the class MainFrame method initialize.
/**
* Initialize the contents of the frame.
*/
private void initialize() {
frame = new JFrame();
frame.getContentPane().setFont(new Font("Tahoma", Font.PLAIN, 18));
frame.setBounds(100, 100, 615, 377);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
GridBagLayout gridBagLayout = new GridBagLayout();
gridBagLayout.columnWidths = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
gridBagLayout.rowHeights = new int[] { 0, 0, 0, 0, 0 };
gridBagLayout.columnWeights = new double[] { 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, Double.MIN_VALUE };
gridBagLayout.rowWeights = new double[] { 0.0, 0.0, 1.0, 1.0, Double.MIN_VALUE };
frame.getContentPane().setLayout(gridBagLayout);
JButton btnParse = new JButton("Parse");
GridBagConstraints gbc_btnParse = new GridBagConstraints();
gbc_btnParse.insets = new Insets(0, 0, 5, 5);
gbc_btnParse.gridx = 2;
gbc_btnParse.gridy = 1;
frame.getContentPane().add(btnParse, gbc_btnParse);
for (int i = 0; i < numFiles; i++) {
listModel.addElement("Audio File: " + i);
}
lblFeatures = new JLabel("Features");
lblFeatures.setFont(new Font("Tahoma", Font.PLAIN, 36));
GridBagConstraints gbc_lblFeatures = new GridBagConstraints();
gbc_lblFeatures.insets = new Insets(0, 0, 5, 5);
gbc_lblFeatures.gridx = 4;
gbc_lblFeatures.gridy = 1;
frame.getContentPane().add(lblFeatures, gbc_lblFeatures);
btnPlayAudio = new JButton("Play Audio");
GridBagConstraints gbc_btnPlayAudio = new GridBagConstraints();
gbc_btnPlayAudio.insets = new Insets(0, 0, 5, 5);
gbc_btnPlayAudio.gridx = 5;
gbc_btnPlayAudio.gridy = 1;
frame.getContentPane().add(btnPlayAudio, gbc_btnPlayAudio);
final JList<JCheckBox> list = new JList(listModel);
GridBagConstraints gbc_list = new GridBagConstraints();
gbc_list.gridheight = 2;
gbc_list.insets = new Insets(0, 0, 0, 5);
gbc_list.fill = GridBagConstraints.BOTH;
gbc_list.gridx = 2;
gbc_list.gridy = 2;
frame.getContentPane().add(list, gbc_list);
resultLabel = new JLabel("New label");
resultLabel.setFont(new Font("Tahoma", Font.PLAIN, 18));
GridBagConstraints gbc_resultLabel = new GridBagConstraints();
gbc_resultLabel.insets = new Insets(0, 0, 5, 5);
gbc_resultLabel.gridx = 4;
gbc_resultLabel.gridy = 2;
frame.getContentPane().add(resultLabel, gbc_resultLabel);
featuresDisplay = new JLabel("No Image Proccessed");
featuresDisplay.setFont(new Font("Tahoma", Font.PLAIN, 18));
GridBagConstraints gbc_featuresDisplay = new GridBagConstraints();
gbc_featuresDisplay.insets = new Insets(0, 0, 0, 5);
gbc_featuresDisplay.gridx = 4;
gbc_featuresDisplay.gridy = 3;
frame.getContentPane().add(featuresDisplay, gbc_featuresDisplay);
this.resultLabel.setText("No Image Processed");
labelCorrect = new JLabel("");
GridBagConstraints gbc_labelCorrect = new GridBagConstraints();
gbc_labelCorrect.insets = new Insets(0, 0, 0, 5);
gbc_labelCorrect.gridx = 5;
gbc_labelCorrect.gridy = 3;
frame.getContentPane().add(labelCorrect, gbc_labelCorrect);
createCSVandArff(featureExtractor);
buildModel();
/*
* Parse button handler
* Takes selected file and runs it against the machine learning model
* updates label to display if algorithm was correct or not
*/
btnParse.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
if (list.isSelectionEmpty()) {
return;
}
selected = list.getSelectedIndex();
String path = convertToImagePath();
FeatureExtractor fe = new FeatureExtractor();
Double[] features = fe.extractFeaturesForTest("resources/testing/" + path);
Instance inst = new DenseInstance(features.length);
inst.setDataset(trainDataSet);
for (int i = 0; i < features.length; i++) {
inst.setValue(i, features[i]);
}
double result = nb.classifyInstance(inst);
String predStr = trainDataSet.classAttribute().value((int) result);
String actual = convertToImagePath();
resultLabel.setText("Selected File: " + actual);
featuresDisplay.setText("Algorithm Prediction: " + predStr);
if (predStr.substring(0, 1).toLowerCase().equals(actual.substring(0, 1).toLowerCase())) {
labelCorrect.setText("Yes, Prediction is correct");
} else {
labelCorrect.setText("No, Prediction is wrong");
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (UnsupportedAudioFileException e1) {
e1.printStackTrace();
} catch (Exception e1) {
e1.printStackTrace();
}
}
});
/*
* Plays selected audio file on click, NOTE, file has to parsed first
*/
btnPlayAudio.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
File file = new File("resources/testing/" + convertToImagePath());
AudioInputStream audioIn;
try {
audioIn = AudioSystem.getAudioInputStream(file);
Clip clip = AudioSystem.getClip();
clip.open(audioIn);
clip.start();
} catch (UnsupportedAudioFileException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
} catch (LineUnavailableException e1) {
e1.printStackTrace();
}
}
});
}
Aggregations