use of structures._Review in project IR_Base by Linda-sunshine.
the class MultiThreadedLMAnalyzer method getStat.
public void getStat() {
ArrayList<Integer> medians = new ArrayList<Integer>();
double pos = 0, total = 0;
for (_User u : m_users) {
medians.add(u.getReviewSize());
for (_Review r : u.getReviews()) {
if (r.getYLabel() == 1)
pos++;
total++;
}
}
Collections.sort(medians);
double median = 0;
if (medians.size() % 2 == 0)
median = (medians.get(medians.size() / 2) + medians.get(medians.size() / 2 - 1)) / 2;
else
median = medians.get(medians.size() / 2);
System.out.println("median: " + median);
System.out.println("pos: " + pos);
System.out.println("total: " + total);
System.out.println("pos ratio: " + pos / total);
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class MultiThreadedLMAnalyzer method estimateGlobalLM.
// Estimate a global language model.
// We traverse all review documents instead of using the global TF
public double[] estimateGlobalLM() {
double[] lm = new double[getLMFeatureSize()];
double sum = 0;
for (_User u : m_users) {
for (_Review r : u.getReviews()) {
for (_SparseFeature fv : r.getLMSparse()) {
lm[fv.getIndex()] += fv.getValue();
sum += fv.getValue();
}
}
}
for (int i = 0; i < lm.length; i++) {
lm[i] /= sum;
if (lm[i] == 0)
lm[i] = 0.0001;
}
return lm;
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class UserAnalyzer method loadUser.
// Load one file as a user here.
public void loadUser(String filename) {
try {
File file = new File(filename);
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
String line;
// UserId is contained in the filename.
String userID = extractUserID(file.getName());
// Skip the first line since it is user name.
reader.readLine();
String productID, source, category;
ArrayList<_Review> reviews = new ArrayList<_Review>();
_Review review;
int ylabel;
long timestamp;
while ((line = reader.readLine()) != null) {
productID = line;
// review content
source = reader.readLine();
// review category
category = reader.readLine();
ylabel = Integer.valueOf(reader.readLine());
timestamp = Long.valueOf(reader.readLine());
// Construct the new review.
if (ylabel != 3) {
ylabel = (ylabel >= 4) ? 1 : 0;
review = new _Review(m_corpus.getCollection().size(), source, ylabel, userID, productID, category, timestamp);
if (// Create the sparse vector for the review.
AnalyzeDoc(review))
reviews.add(review);
}
}
if (reviews.size() > 1) {
// at least one for adaptation and one for testing
allocateReviews(reviews);
// create new user from the file.
m_users.add(new _User(userID, m_classNo, reviews));
} else if (reviews.size() == 1) {
// added by Lin, for those users with fewer than 2 reviews, ignore them.
review = reviews.get(0);
rollBack(Utils.revertSpVct(review.getSparse()), review.getYLabel());
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class IndividualSVM method train.
@Override
public double train() {
init();
// Transfer all user reviews to instances recognized by SVM, indexed by users.
int trainSize = 0, validUserIndex = 0;
ArrayList<Feature[]> fvs = new ArrayList<Feature[]>();
ArrayList<Double> ys = new ArrayList<Double>();
// Two for loop to access the reviews, indexed by users.
ArrayList<_Review> reviews;
for (_AdaptStruct user : m_supFlag ? m_supUserList : m_userList) {
trainSize = 0;
reviews = user.getReviews();
boolean validUser = false;
for (_Review r : reviews) {
if (r.getType() == rType.ADAPTATION) {
// we will only use the adaptation data for this purpose
fvs.add(createLibLinearFV(r, validUserIndex));
ys.add(new Double(r.getYLabel()));
trainSize++;
validUser = true;
}
}
if (validUser)
validUserIndex++;
// Train individual model for each user.
Problem libProblem = new Problem();
libProblem.l = trainSize;
libProblem.x = new Feature[trainSize][];
libProblem.y = new double[trainSize];
for (int i = 0; i < trainSize; i++) {
libProblem.x[i] = fvs.get(i);
libProblem.y[i] = ys.get(i);
}
if (m_bias) {
// including bias term; global model + user models
libProblem.n = m_featureSize + 1;
// bias term in liblinear.
libProblem.bias = 1;
} else {
libProblem.n = m_featureSize;
// no bias term in liblinear.
libProblem.bias = -1;
}
m_libModel = Linear.train(libProblem, new Parameter(m_solverType, m_C, SVM.EPS));
// Set users in the same cluster.
if (m_supFlag)
setPersonalizedModelInCluster(user.getUser().getClusterIndex());
else
setPersonalizedModel(user);
}
return 0;
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class asyncCoLinAdapt method train.
// this is online training in each individual user
@Override
public double train() {
double gNorm, gNormOld = Double.MAX_VALUE;
int updateCount = 0;
_CoLinAdaptStruct user;
int predL, trueL;
_Review doc;
_PerformanceStat perfStat;
initLBFGS();
init();
for (int t = 0; t < m_userOrder.length; t++) {
user = (_CoLinAdaptStruct) m_userList.get(m_userOrder[t]);
if (user.hasNextAdaptationIns()) {
// test the latest model
if (m_testmode != TestMode.TM_batch && (doc = user.getLatestTestIns()) != null) {
perfStat = user.getPerfStat();
predL = predict(doc, user);
trueL = doc.getYLabel();
perfStat.addOnePredResult(predL, trueL);
}
// in batch mode we will not accumulate the performance during adaptation
// prepare to adapt: initialize gradient
Arrays.fill(m_g, 0);
calculateGradients(user);
gNorm = gradientTest(user);
if (m_displayLv == 1) {
if (gNorm < gNormOld)
System.out.print("o");
else
System.out.print("x");
}
// gradient descent
gradientDescent(user, m_initStepSize, 1.0);
// gradientDescent(user, asyncLinAdapt.getStepSize(initStepSize, user));
gNormOld = gNorm;
if (m_displayLv > 0 && ++updateCount % 100 == 0)
System.out.println();
}
}
if (m_displayLv > 0)
System.out.println();
setPersonalizedModel();
// we do not evaluate function value
return 0;
}
Aggregations