use of structures._stat in project IR_Base by Linda-sunshine.
the class Analyzer method expandVocabulary.
// Add one more token to the current vocabulary.
protected void expandVocabulary(String token) {
// set the index of the new feature.
m_featureNameIndex.put(token, m_featureNames.size());
// Add the new feature.
m_featureNames.add(token);
m_featureStat.put(token, new _stat(m_classNo));
}
use of structures._stat in project IR_Base by Linda-sunshine.
the class Analyzer method SaveCVStat.
// Save all the features and feature stat into a file.
public void SaveCVStat(String fvStatFile) {
if (fvStatFile == null || fvStatFile.isEmpty())
return;
ArrayList<Double> DFList = new ArrayList<Double>();
double totalDF = 0;
ArrayList<Double> TTFList = new ArrayList<Double>();
double totalTTF = 0;
try {
PrintWriter writer = new PrintWriter(new File(fvStatFile));
for (int i = 0; i < m_featureNames.size(); i++) {
writer.print(m_featureNames.get(i));
_stat temp = m_featureStat.get(m_featureNames.get(i));
for (int j = 0; j < temp.getDF().length; j++) {
if (temp.getDF()[j] > 0) {
DFList.add((double) temp.getDF()[j]);
totalDF += temp.getDF()[j];
}
writer.print("\t" + temp.getDF()[j]);
}
for (int j = 0; j < temp.getTTF().length; j++) {
if (temp.getTTF()[j] > 0) {
TTFList.add((double) temp.getTTF()[j]);
totalTTF += temp.getTTF()[j];
}
writer.print("\t" + temp.getTTF()[j]);
}
writer.println();
}
writer.close();
// print out some basic statistics of the corpus
double maxDF = Collections.max(DFList);
double avgDF = totalDF / m_featureNames.size();
System.out.println("maxDF\t" + maxDF + "\t avgDF \t" + avgDF + "\t totalDF\t" + totalDF);
double maxTTF = Collections.max(TTFList);
double avgTTF = totalTTF / m_featureNames.size();
System.out.println("maxTTF\t" + maxTTF + "avgTTF\t" + avgTTF + "\t totalTTF \t" + totalTTF);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of structures._stat in project IR_Base by Linda-sunshine.
the class FeatureSelector method CHI.
// Feature Selection -- CHI.
public void CHI(HashMap<String, _stat> featureStat, int[] classMemberNo) {
m_selectedFeatures.clear();
int classNo = classMemberNo.length;
int N = Utils.sumOfArray(classMemberNo), sumDF;
double[] X2tc = new double[classNo];
double X2avg = 0;
for (String f : featureStat.keySet()) {
// Filter the features which have smaller DFs.
_stat temp = featureStat.get(f);
sumDF = Utils.sumOfArray(temp.getDF());
if (sumDF > m_minDF && sumDF < m_maxDF) {
X2avg = 0;
for (int i = 0; i < classNo; i++) {
X2tc[i] = Utils.ChiSquare(N, sumDF, temp.getDF()[i], classMemberNo[i]);
X2avg += X2tc[i] * classMemberNo[i] / N;
}
// X2max = Utils.maxOfArrayValue(X2tc);
m_selectedFeatures.add(new _RankItem(f, X2avg));
}
}
}
use of structures._stat in project IR_Base by Linda-sunshine.
the class UserAnalyzer method setVocabStat.
void setVocabStat(String term, int[] DFs) {
_stat stat = m_featureStat.get(term);
stat.setRawDF(DFs);
}
Aggregations