Search in sources :

Example 21 with structures._HDPThetaStar._Connection

use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.

the class CLRWithMMB method MLEB.

// MLE of B matrix
public double[][] MLEB() {
    int e_0 = 0, e_1 = 0;
    double b = 0;
    double[][] B = new double[m_kBar][m_kBar];
    _HDPThetaStar theta_g, theta_h;
    for (int g = 0; g < m_kBar; g++) {
        theta_g = m_hdpThetaStars[g];
        for (int h = 0; h < m_kBar; h++) {
            theta_h = m_hdpThetaStars[h];
            e_0 = theta_g.getConnectionEdgeCount(theta_h, 0);
            e_1 = theta_g.getConnectionEdgeCount(theta_h, 1);
            b = (e_1 + m_abcd[0] - 1) / (e_0 + e_1 + m_abcd[0] + m_abcd[1] - 2);
            B[g][h] = b;
            B[h][g] = b;
        }
    }
    return B;
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 22 with structures._HDPThetaStar._Connection

use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.

the class CLRWithMMB method sampleEdge.

protected void sampleEdge(int i, int j, int e) {
    int k = 0;
    _HDPThetaStar theta_s, theta_h = m_indicator[j][i];
    double likelihood, logNew, gamma_k, logSum = 0;
    _MMBAdaptStruct ui = (_MMBAdaptStruct) m_userList.get(i);
    _MMBAdaptStruct uj = (_MMBAdaptStruct) m_userList.get(j);
    for (k = 0; k < m_kBar; k++) {
        // log likelihood of the edge p(e_{ij}, z, B)
        // p(eij|z_{i->j}, z_{j->i}, B)*p(z_{i->j}|\pi_i)*p(z_{j->i|\pj_j})
        theta_s = m_hdpThetaStars[k];
        // we record all the
        if (!theta_h.isValid())
            System.out.println("[Error]Invalid theta inside sampleEdge()!!");
        likelihood = calcLogLikelihoodEMarginal(theta_s, theta_h, e);
        if (Double.isInfinite(likelihood))
            System.out.println("Infinite!");
        // p(z=k|\gamma,\eta)
        gamma_k = m_hdpThetaStars[k].getGamma();
        likelihood += Math.log(calcGroupPopularity(ui, k, gamma_k));
        // this is in log space!
        m_hdpThetaStars[k].setProportion(likelihood);
        if (k == 0)
            logSum = likelihood;
        else
            logSum = Utils.logSum(logSum, likelihood);
    }
    // fix1: the probability for new cluster
    logNew = Math.log(m_eta * m_gamma_e) + m_pNew[e];
    logSum = Utils.logSum(logSum, logNew);
    // Sample group k with likelihood.
    k = sampleEdgeInLogSpace(logSum, e);
    if (k == -1) {
        // shall we consider the current edge?? posterior sampling??
        sampleNewCluster4Edge();
        k = m_kBar - 1;
    }
    // update the setting after sampling z_ij.
    // first 1 means edge 1, the second one mean increase by 1.
    m_hdpThetaStars[k].updateEdgeCount(e, 1);
    m_MNL[e]++;
    // update the user info with the newly sampled hdpThetaStar.
    ui.addNeighbor(uj, m_hdpThetaStars[k], e);
    // -->3
    ui.incHDPThetaStarEdgeSize(m_hdpThetaStars[k], 1);
    // Put the reference to the matrix for later usage.
    // Since we have all the info, we don't need to put the theta info in the _MMBNeighbor structure.
    m_indicator[i][j] = m_hdpThetaStars[k];
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 23 with structures._HDPThetaStar._Connection

use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.

the class CLRWithMMB method sampleZeroEdgeJoint.

// sample eij = 0 from the joint probabilities of cij, zij and zji.
public void sampleZeroEdgeJoint(int i, int j) {
    /**
     *we will consider all possible combinations of different memberships.
     * 1.cij=0, cji=0, prob: (1-\rho), 1 case
     * 2.cij=1, cji=1, known (Bgh, Bhg), prob: \rho(1-Bgh), k(k+1)/2 possible cases
     * posterior prob: \rho*(b+e_0)/(a+b+e_0+e_1)
     * 3.cij=1, cji=1, unknows (Bgh, Bhg), prob: \rho*b/(a+b), k+1 possible cases
     * In total, we have (k+1)*(k+2)/2+1 possible cases. *
     */
    // Step 1: calc prob for different cases of cij, cji.
    // case 0: background model while the prob is not stored in the two-dim array.
    double logSum = Math.log(1 - m_rho);
    /**
     *We maintain a matrix for storing probability. As the matrix is
     * symmetric, we only calculate upper-triangle. *
     */
    double[][] cacheB = new double[m_kBar + 1][m_kBar + 1];
    for (double[] b : cacheB) Arrays.fill(b, Double.NEGATIVE_INFINITY);
    _MMBAdaptStruct ui = (_MMBAdaptStruct) m_userList.get(i);
    _MMBAdaptStruct uj = (_MMBAdaptStruct) m_userList.get(j);
    _HDPThetaStar theta_g, theta_h;
    // case 1: existing thetas.
    for (int g = 0; g < m_kBar; g++) {
        theta_g = m_hdpThetaStars[g];
        for (int h = g; h < m_kBar; h++) {
            theta_h = m_hdpThetaStars[h];
            cacheB[g][h] = calcLogLikelihoodE(theta_g, theta_h, 0);
            cacheB[g][h] += Math.log(theta_g.getGamma()) + Math.log(theta_h.getGamma());
            if (g == h) {
                logSum = Utils.logSum(logSum, cacheB[g][h]);
            } else {
                cacheB[h][g] = cacheB[g][h];
                // we need to add twice of logp.
                logSum = Utils.logSum(logSum, cacheB[h][g] + m_log2);
            }
        }
    }
    // case 2: either one is from new cluster.
    // pre-calculate \rho*(b/(a+b))*\gamma_e
    double pNew = Math.log(m_rho) + Math.log(m_abcd[1]) - Math.log(m_abcd[0] + m_abcd[1]);
    double gamma_g = 0;
    for (int k = 0; k < m_kBar; k++) {
        gamma_g = m_hdpThetaStars[k].getGamma();
        // if either one is 0, then prob is 0 -> log prob = -Infinity
        if (m_gamma_e != 0 && gamma_g != 0) {
            cacheB[k][m_kBar] = pNew + Math.log(m_gamma_e) + Math.log(gamma_g);
            cacheB[m_kBar][k] = cacheB[k][m_kBar];
            logSum = Utils.logSum(logSum, cacheB[k][m_kBar] + m_log2);
        }
    }
    // both are from new clusters.
    if (m_gamma_e != 0) {
        cacheB[m_kBar][m_kBar] = pNew + Math.log(m_gamma_e) + m_log2;
        logSum = Utils.logSum(logSum, cacheB[m_kBar][m_kBar]);
    }
    // Step 2: sample one pair from the prob matrix.
    int k = sampleIn2DimArrayLogSpace(logSum, Math.log(1 - m_rho), cacheB);
    // Step 3: Analyze the sampled cluster results.
    // case 1: k == -1, sample from the background model;
    // case 2: k!= 1, sample from mmb model.
    int g = 0, h = 0;
    if (k != -1) {
        g = k / (m_kBar + 1);
        h = k % (m_kBar + 1);
        if (g == m_kBar || h == m_kBar) {
            // we need to sample the new cluster
            // shall we consider the current edge?? posterior sampling??
            sampleNewCluster4Edge();
        }
        // Update the thetaStar and user info after getting z_ij.
        // -->1
        m_hdpThetaStars[g].updateEdgeCount(0, 1);
        ui.addNeighbor(uj, m_hdpThetaStars[g], 0);
        ui.incHDPThetaStarEdgeSize(m_hdpThetaStars[g], 1);
        m_indicator[i][j] = m_hdpThetaStars[g];
        updateSampleSize(0, 1);
        // Update the thetaStar and user info after getting z_ji.
        m_hdpThetaStars[h].updateEdgeCount(0, 1);
        uj.addNeighbor(ui, m_hdpThetaStars[h], 0);
        uj.incHDPThetaStarEdgeSize(m_hdpThetaStars[h], 1);
        m_indicator[j][i] = m_hdpThetaStars[h];
        updateSampleSize(0, 1);
        addConnection(ui, uj, 0);
    } else {
        updateSampleSize(2, 2);
    }
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 24 with structures._HDPThetaStar._Connection

use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.

the class CLRWithMMB method updateDocMembership.

@Override
public // Override this function since we have different conditions for removing clusters.
void updateDocMembership(_HDPAdaptStruct user, _Review r) {
    int index = -1;
    _HDPThetaStar curThetaStar = r.getHDPThetaStar();
    // remove the current review from the user side.
    user.incHDPThetaStarMemSize(r.getHDPThetaStar(), -1);
    // remove the current review from the theta side.
    // remove the lm stat first before decrease the document count
    curThetaStar.rmLMStat(r.getLMSparse());
    curThetaStar.updateMemCount(-1);
    // No data associated with the cluster
    if (curThetaStar.getMemSize() == 0 && curThetaStar.getTotalEdgeSize() == 0) {
        System.out.println("[Debug]Zero cluster detected in updating doc!");
        // check if every dim gets 0 count in language mode
        LMStatSanityCheck(curThetaStar);
        // recycle the gamma
        m_gamma_e += curThetaStar.getGamma();
        // curThetaStar.resetGamma();
        // swap the disabled theta to the last for later use
        index = findHDPThetaStar(curThetaStar);
        // move it back to \theta*
        swapTheta(m_kBar - 1, index);
        curThetaStar.reset();
        m_kBar--;
    }
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Example 25 with structures._HDPThetaStar._Connection

use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.

the class CLRWithHDP method swapTheta.

@Override
protected void swapTheta(int a, int b) {
    if (a == b)
        // If they are the same, no need to swap.
        return;
    _HDPThetaStar cTheta = m_hdpThetaStars[a];
    m_hdpThetaStars[a] = m_hdpThetaStars[b];
    // kBar starts from 0, the size decides how many are valid.
    m_hdpThetaStars[b] = cTheta;
}
Also used : structures._HDPThetaStar(structures._HDPThetaStar)

Aggregations

structures._HDPThetaStar (structures._HDPThetaStar)26 structures._Review (structures._Review)6 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)4 File (java.io.File)4 PrintWriter (java.io.PrintWriter)4 structures._HDPThetaStar._Connection (structures._HDPThetaStar._Connection)3 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 MyPriorityQueue (structures.MyPriorityQueue)2 structures._RankItem (structures._RankItem)2 structures._SparseFeature (structures._SparseFeature)2 structures._User (structures._User)2