Search in sources :

Example 26 with structures._HDPThetaStar

use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.

the class CLRWithHDP method printInfo.

public void printInfo(boolean printDetails) {
    MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(10);
    // clear the statistics
    for (int i = 0; i < m_kBar; i++) {
        m_hdpThetaStars[i].resetCount();
        // get the most popular clusters
        clusterRanker.add(new _RankItem(i, m_hdpThetaStars[i].getMemSize()));
    }
    // collect statistics across users in adaptation data
    _HDPThetaStar theta = null;
    _HDPAdaptStruct user;
    for (int i = 0; i < m_userList.size(); i++) {
        user = (_HDPAdaptStruct) m_userList.get(i);
        for (_Review r : user.getReviews()) {
            if (r.getType() != rType.ADAPTATION)
                // only touch the adaptation data
                continue;
            else {
                theta = r.getHDPThetaStar();
                if (r.getYLabel() == 1)
                    theta.incPosCount();
                else
                    theta.incNegCount();
            }
        }
    }
    System.out.print("[Info]Clusters:");
    for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_hdpThetaStars[i].showStat());
    if (m_features == null)
        System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
    else if (printDetails) {
        System.out.print(String.format("\n[Info]%d Clusters are found in total! And the highligt is as follows\n", m_kBar));
        accumulateFeatureCount();
        for (_RankItem it : clusterRanker) printTopWords(m_hdpThetaStars[it.m_index]);
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Review(structures._Review) MyPriorityQueue(structures.MyPriorityQueue) structures._HDPThetaStar(structures._HDPThetaStar)

Example 27 with structures._HDPThetaStar

use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.

the class CLRWithHDP method sampleGamma.

// Sample the global mixture proportion, \gamma~Dir(m1, m2,..,\alpha)
protected void sampleGamma() {
    for (int k = 0; k < m_kBar; k++) m_hdpThetaStars[k].m_hSize = 0;
    _HDPAdaptStruct user;
    for (int i = 0; i < m_userList.size(); i++) {
        user = (_HDPAdaptStruct) m_userList.get(i);
        if (user.getAdaptationSize() == 0)
            continue;
        for (_HDPThetaStar s : user.getHDPTheta4Rvw()) s.m_hSize += sampleH(user, s);
    }
    // for gamma_e
    m_cache[m_kBar] = Gamma.staticNextDouble(m_alpha, 1);
    double sum = m_cache[m_kBar];
    for (int k = 0; k < m_kBar; k++) {
        m_cache[k] = Gamma.staticNextDouble(m_hdpThetaStars[k].m_hSize + m_alpha, 1);
        sum += m_cache[k];
    }
    for (int k = 0; k < m_kBar; k++) m_hdpThetaStars[k].setGamma(m_cache[k] / sum);
    // \gamma_e.
    m_gamma_e = m_cache[m_kBar] / sum;
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 28 with structures._HDPThetaStar

use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.

the class CLRWithHDP method updateDocMembership.

public void updateDocMembership(_HDPAdaptStruct user, _Review r) {
    int index = -1;
    _HDPThetaStar curThetaStar = r.getHDPThetaStar();
    // remove the current review from the user side.
    user.incHDPThetaStarMemSize(r.getHDPThetaStar(), -1);
    // remove the current review from the theta side.
    // remove the lm stat first before decrease the document count
    curThetaStar.rmLMStat(r.getLMSparse());
    curThetaStar.updateMemCount(-1);
    // No data associated with the cluster
    if (curThetaStar.getMemSize() == 0) {
        // check if every dim gets 0 count in language model
        LMStatSanityCheck(curThetaStar);
        // recycle the gamma
        m_gamma_e += curThetaStar.getGamma();
        curThetaStar.resetGamma();
        // swap the disabled theta to the last for later use
        index = findHDPThetaStar(curThetaStar);
        // move it back to \theta*
        swapTheta(m_kBar - 1, index);
        // in case we forget to init some variable, we set it to null
        curThetaStar = null;
        // curThetaStar.disable();
        m_kBar--;
    }
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 29 with structures._HDPThetaStar

use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.

the class MTCLinAdaptWithHDPMultipleE method calcLogLikelihoodY.

// In function logLikelihood, we update the loglikelihood and corresponding gradients.
// Thus, we only need to update the two functions correspondingly with.
protected double calcLogLikelihoodY(_Review r) {
    int index = -1;
    _HDPThetaStar oldTheta = r.getHDPThetaStar();
    HashMap<_HDPThetaStar, Integer> thetaCountMap = r.getThetaCountMap();
    double likelihood = 0;
    for (_HDPThetaStar theta : thetaCountMap.keySet()) {
        index = findHDPThetaStar(theta);
        // some of the cluster may disappear, ignore them.
        if (index >= m_kBar || index < 0)
            continue;
        r.setHDPThetaStar(theta);
        // log(likelihood^k) = k * log likelihood.
        likelihood += thetaCountMap.get(theta) * super.calcLogLikelihoodY(r);
    }
    r.setHDPThetaStar(oldTheta);
    return likelihood;
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Aggregations

structures._HDPThetaStar (structures._HDPThetaStar)26 structures._Review (structures._Review)6 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)4 File (java.io.File)4 PrintWriter (java.io.PrintWriter)4 structures._HDPThetaStar._Connection (structures._HDPThetaStar._Connection)3 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 MyPriorityQueue (structures.MyPriorityQueue)2 structures._RankItem (structures._RankItem)2 structures._SparseFeature (structures._SparseFeature)2 structures._User (structures._User)2