Search in sources :

Example 16 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class MultiThreadedLMAnalyzer method getStat.

public void getStat() {
    ArrayList<Integer> medians = new ArrayList<Integer>();
    double pos = 0, total = 0;
    for (_User u : m_users) {
        medians.add(u.getReviewSize());
        for (_Review r : u.getReviews()) {
            if (r.getYLabel() == 1)
                pos++;
            total++;
        }
    }
    Collections.sort(medians);
    double median = 0;
    if (medians.size() % 2 == 0)
        median = (medians.get(medians.size() / 2) + medians.get(medians.size() / 2 - 1)) / 2;
    else
        median = medians.get(medians.size() / 2);
    System.out.println("median: " + median);
    System.out.println("pos: " + pos);
    System.out.println("total: " + total);
    System.out.println("pos ratio: " + pos / total);
}
Also used : structures._Review(structures._Review) ArrayList(java.util.ArrayList) structures._User(structures._User)

Example 17 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class MultiThreadedLMAnalyzer method estimateGlobalLM.

// Estimate a global language model.
// We traverse all review documents instead of using the global TF
public double[] estimateGlobalLM() {
    double[] lm = new double[getLMFeatureSize()];
    double sum = 0;
    for (_User u : m_users) {
        for (_Review r : u.getReviews()) {
            for (_SparseFeature fv : r.getLMSparse()) {
                lm[fv.getIndex()] += fv.getValue();
                sum += fv.getValue();
            }
        }
    }
    for (int i = 0; i < lm.length; i++) {
        lm[i] /= sum;
        if (lm[i] == 0)
            lm[i] = 0.0001;
    }
    return lm;
}
Also used : structures._Review(structures._Review) structures._User(structures._User) structures._SparseFeature(structures._SparseFeature)

Example 18 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class UserAnalyzer method loadUser.

// Load one file as a user here.
public void loadUser(String filename) {
    try {
        File file = new File(filename);
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        String line;
        // UserId is contained in the filename.
        String userID = extractUserID(file.getName());
        // Skip the first line since it is user name.
        reader.readLine();
        String productID, source, category;
        ArrayList<_Review> reviews = new ArrayList<_Review>();
        _Review review;
        int ylabel;
        long timestamp;
        while ((line = reader.readLine()) != null) {
            productID = line;
            // review content
            source = reader.readLine();
            // review category
            category = reader.readLine();
            ylabel = Integer.valueOf(reader.readLine());
            timestamp = Long.valueOf(reader.readLine());
            // Construct the new review.
            if (ylabel != 3) {
                ylabel = (ylabel >= 4) ? 1 : 0;
                review = new _Review(m_corpus.getCollection().size(), source, ylabel, userID, productID, category, timestamp);
                if (// Create the sparse vector for the review.
                AnalyzeDoc(review))
                    reviews.add(review);
            }
        }
        if (reviews.size() > 1) {
            // at least one for adaptation and one for testing
            allocateReviews(reviews);
            // create new user from the file.
            m_users.add(new _User(userID, m_classNo, reviews));
        } else if (reviews.size() == 1) {
            // added by Lin, for those users with fewer than 2 reviews, ignore them.
            review = reviews.get(0);
            rollBack(Utils.revertSpVct(review.getSparse()), review.getYLabel());
        }
        reader.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) structures._Review(structures._Review) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) structures._User(structures._User) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 19 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class IndividualSVM method train.

@Override
public double train() {
    init();
    // Transfer all user reviews to instances recognized by SVM, indexed by users.
    int trainSize = 0, validUserIndex = 0;
    ArrayList<Feature[]> fvs = new ArrayList<Feature[]>();
    ArrayList<Double> ys = new ArrayList<Double>();
    // Two for loop to access the reviews, indexed by users.
    ArrayList<_Review> reviews;
    for (_AdaptStruct user : m_supFlag ? m_supUserList : m_userList) {
        trainSize = 0;
        reviews = user.getReviews();
        boolean validUser = false;
        for (_Review r : reviews) {
            if (r.getType() == rType.ADAPTATION) {
                // we will only use the adaptation data for this purpose
                fvs.add(createLibLinearFV(r, validUserIndex));
                ys.add(new Double(r.getYLabel()));
                trainSize++;
                validUser = true;
            }
        }
        if (validUser)
            validUserIndex++;
        // Train individual model for each user.
        Problem libProblem = new Problem();
        libProblem.l = trainSize;
        libProblem.x = new Feature[trainSize][];
        libProblem.y = new double[trainSize];
        for (int i = 0; i < trainSize; i++) {
            libProblem.x[i] = fvs.get(i);
            libProblem.y[i] = ys.get(i);
        }
        if (m_bias) {
            // including bias term; global model + user models
            libProblem.n = m_featureSize + 1;
            // bias term in liblinear.
            libProblem.bias = 1;
        } else {
            libProblem.n = m_featureSize;
            // no bias term in liblinear.
            libProblem.bias = -1;
        }
        m_libModel = Linear.train(libProblem, new Parameter(m_solverType, m_C, SVM.EPS));
        // Set users in the same cluster.
        if (m_supFlag)
            setPersonalizedModelInCluster(user.getUser().getClusterIndex());
        else
            setPersonalizedModel(user);
    }
    return 0;
}
Also used : structures._Review(structures._Review) Classifier.supervised.modelAdaptation._AdaptStruct(Classifier.supervised.modelAdaptation._AdaptStruct) ArrayList(java.util.ArrayList) Parameter(Classifier.supervised.liblinear.Parameter) Problem(Classifier.supervised.liblinear.Problem) Feature(Classifier.supervised.liblinear.Feature) structures._SparseFeature(structures._SparseFeature)

Example 20 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class asyncCoLinAdapt method train.

// this is online training in each individual user
@Override
public double train() {
    double gNorm, gNormOld = Double.MAX_VALUE;
    int updateCount = 0;
    _CoLinAdaptStruct user;
    int predL, trueL;
    _Review doc;
    _PerformanceStat perfStat;
    initLBFGS();
    init();
    for (int t = 0; t < m_userOrder.length; t++) {
        user = (_CoLinAdaptStruct) m_userList.get(m_userOrder[t]);
        if (user.hasNextAdaptationIns()) {
            // test the latest model
            if (m_testmode != TestMode.TM_batch && (doc = user.getLatestTestIns()) != null) {
                perfStat = user.getPerfStat();
                predL = predict(doc, user);
                trueL = doc.getYLabel();
                perfStat.addOnePredResult(predL, trueL);
            }
            // in batch mode we will not accumulate the performance during adaptation
            // prepare to adapt: initialize gradient
            Arrays.fill(m_g, 0);
            calculateGradients(user);
            gNorm = gradientTest(user);
            if (m_displayLv == 1) {
                if (gNorm < gNormOld)
                    System.out.print("o");
                else
                    System.out.print("x");
            }
            // gradient descent
            gradientDescent(user, m_initStepSize, 1.0);
            // gradientDescent(user, asyncLinAdapt.getStepSize(initStepSize, user));
            gNormOld = gNorm;
            if (m_displayLv > 0 && ++updateCount % 100 == 0)
                System.out.println();
        }
    }
    if (m_displayLv > 0)
        System.out.println();
    setPersonalizedModel();
    // we do not evaluate function value
    return 0;
}
Also used : structures._Review(structures._Review) structures._PerformanceStat(structures._PerformanceStat)

Aggregations

structures._Review (structures._Review)44 structures._SparseFeature (structures._SparseFeature)24 structures._HDPThetaStar (structures._HDPThetaStar)9 ArrayList (java.util.ArrayList)8 Feature (Classifier.supervised.liblinear.Feature)6 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)6 structures._PerformanceStat (structures._PerformanceStat)6 IOException (java.io.IOException)5 File (java.io.File)4 structures._User (structures._User)4 FeatureNode (Classifier.supervised.liblinear.FeatureNode)3 Parameter (Classifier.supervised.liblinear.Parameter)3 Problem (Classifier.supervised.liblinear.Problem)3 structures._RankItem (structures._RankItem)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 PrintWriter (java.io.PrintWriter)2 MyPriorityQueue (structures.MyPriorityQueue)2 SolverType (Classifier.supervised.liblinear.SolverType)1