Search in sources :

Example 16 with org.haiku.haikudepotserver.dataobjects.auto._User

use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.

the class MultiThreadedNetworkAnalyzer method printDocs4Plane.

/**
 **We need to output some files for running baselines
 * TADW, PLANE etc.
 * ***
 */
public void printDocs4Plane(String filename) {
    try {
        PrintWriter writer = new PrintWriter(new File(filename));
        for (_User user : m_users) {
            ArrayList<_SparseFeature[]> vectors = new ArrayList<>();
            for (_Review r : user.getReviews()) {
                vectors.add(r.getSparse());
            }
            _SparseFeature[] fvs = Utils.mergeSpVcts(vectors);
            for (_SparseFeature fv : fvs) {
                int index = fv.getIndex();
                double val = fv.getValue();
                for (int i = 0; i < val; i++) {
                    writer.write(index + " ");
                }
            }
            writer.write("\n");
        }
        writer.close();
        System.out.println("Finish writing docs for PLANE!!");
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : structures._Review(structures._Review) structures._User(structures._User) structures._SparseFeature(structures._SparseFeature)

Example 17 with org.haiku.haikudepotserver.dataobjects.auto._User

use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.

the class MultiThreadedNetworkAnalyzer method writeCVIndex4Edges.

// write out the training interactions for users
public void writeCVIndex4Edges(String filename, HashSet<String> light, HashSet<String> medium, HashSet<String> heavy) {
    try {
        int count = 0;
        PrintWriter writer = new PrintWriter(new File(filename));
        for (String uid : m_networkMap.keySet()) {
            _User user = m_users.get(m_userIDIndex.get(uid));
            if (user.getFriends() == null || user.getFriends().length == 0)
                continue;
            if (!light.contains(uid) && !medium.contains(uid) && !heavy.contains(uid)) {
                count++;
                writer.write(uid + "\t");
                for (String frd : user.getFriends()) writer.write(frd + "\t");
                writer.write("\n");
            }
        }
        writer.close();
        System.out.format("[stat]%d/%d users' interactions are written in filname.\n", count, m_networkMap.keySet().size());
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : structures._User(structures._User)

Example 18 with org.haiku.haikudepotserver.dataobjects.auto._User

use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.

the class MultiThreadedNetworkAnalyzer method assignCVIndex4Network.

// Assign interactions to different folds for CV, try to balance different folds.
public void assignCVIndex4Network(int kFold, int time) {
    m_uidInteractionsMap.clear();
    m_uidNonInteractionsMap.clear();
    System.out.println("[Info]Start CV Index assignment for network....");
    ArrayList<Integer> interactions = new ArrayList<Integer>();
    ArrayList<Integer> nonInteractions = new ArrayList<Integer>();
    int orgTotal = 0, realTotal = 0;
    for (int i = 0; i < m_users.size(); i++) {
        _User ui = m_users.get(i);
        String uiId = ui.getUserID();
        String[] friends = ui.getFriends();
        interactions.clear();
        nonInteractions.clear();
        // ignore the users without any interactions
        if (friends != null && friends.length > 0) {
            if (!m_uidInteractionsMap.containsKey(uiId))
                m_uidInteractionsMap.put(uiId, new ArrayList<_Edge4CV>());
            if (!m_uidNonInteractionsMap.containsKey(uiId))
                m_uidNonInteractionsMap.put(uiId, new ArrayList<_Edge4CV>());
            orgTotal += friends.length;
            // construct the friend indexes
            for (String frd : friends) {
                int frdIdx = m_userIDIndex.get(frd);
                if (frdIdx > i)
                    interactions.add(frdIdx);
            }
            for (int j = i + 1; j < m_users.size(); j++) {
                if (!interactions.contains(j))
                    nonInteractions.add(j);
            }
            // sample masks for interactions: assign fold number to interactiosn
            int[] masks4Interactions = generateMasks(interactions.size(), kFold);
            // collect the interactions in the hashmap
            for (int m = 0; m < interactions.size(); m++) {
                String ujId = m_users.get(interactions.get(m)).getUserID();
                if (!m_uidInteractionsMap.containsKey(ujId))
                    m_uidInteractionsMap.put(ujId, new ArrayList<_Edge4CV>());
                m_uidInteractionsMap.get(uiId).add(new _Edge4CV(ujId, masks4Interactions[m]));
                m_uidInteractionsMap.get(ujId).add(new _Edge4CV(uiId, masks4Interactions[m]));
            }
            // sample non-interactions: select non-interactions for each fold, might be repetitive
            HashMap<Integer, HashSet<Integer>> foldNonInteractions = new HashMap<Integer, HashSet<Integer>>();
            for (int k = 0; k < kFold; k++) {
                int number = time * interactions.size() / 5;
                foldNonInteractions.put(k, sampleNonInteractions(nonInteractions, number));
            }
            // collect the non-interactions in the hashmap
            for (int k : foldNonInteractions.keySet()) {
                for (int ujIdx : foldNonInteractions.get(k)) {
                    String ujId = m_users.get(ujIdx).getUserID();
                    if (!m_uidNonInteractionsMap.containsKey(ujId))
                        m_uidNonInteractionsMap.put(ujId, new ArrayList<_Edge4CV>());
                    m_uidNonInteractionsMap.get(uiId).add(new _Edge4CV(ujId, k));
                    m_uidNonInteractionsMap.get(ujId).add(new _Edge4CV(uiId, k));
                }
            }
        }
    }
    System.out.println("Interaction user size: " + m_uidInteractionsMap.size());
    System.out.println("Non-interaction user size: " + m_uidNonInteractionsMap.size());
    for (String uid : m_uidInteractionsMap.keySet()) {
        realTotal += m_uidInteractionsMap.get(uid).size();
    }
    System.out.format("Org Total: %d, real Total: %d\n", orgTotal, realTotal);
}
Also used : structures._User(structures._User)

Example 19 with org.haiku.haikudepotserver.dataobjects.auto._User

use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.

the class MultiThreadedNetworkAnalyzer method writeAggregatedUsers.

public void writeAggregatedUsers(String filename, int kFold) {
    try {
        int count = 0;
        int[] masks = new int[kFold];
        for (int i = 0; i < kFold; i++) {
            masks[i] = i;
        }
        PrintWriter writer = new PrintWriter(new File(filename));
        HashMap<Integer, ArrayList<String>> indexContentMap = new HashMap<>();
        for (_User user : m_users) {
            if (user.getUserID().equals("-dF9A2Q3L8C0d2ZyEIgDSQ"))
                System.out.println("!!!!The user exists in the dataset!!!!");
            indexContentMap.clear();
            for (_Review r : user.getReviews()) {
                if (!indexContentMap.containsKey(r.getMask4CV())) {
                    indexContentMap.put(r.getMask4CV(), new ArrayList<String>());
                }
                indexContentMap.get(r.getMask4CV()).add(r.getSource());
            }
            if (indexContentMap.size() == 0)
                continue;
            // write the data for the user
            count++;
            writer.write(user.getUserID() + "\n");
            for (int mask : masks) {
                if (!indexContentMap.containsKey(mask)) {
                    writer.write(mask + "\n");
                    writer.write(" \n");
                } else {
                    writer.write(mask + "\n");
                    for (String cont : indexContentMap.get(mask)) {
                        writer.write(cont + " ");
                    }
                    writer.write("\n");
                }
            }
        }
        writer.close();
        System.out.format("%d/%d users' data are writenn in %s.\n", count, m_users.size(), filename);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : structures._User(structures._User) structures._Review(structures._Review)

Example 20 with org.haiku.haikudepotserver.dataobjects.auto._User

use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.

the class MultiThreadedNetworkAnalyzer method sampleUsers4ColdStart4Docs.

// reserve docs for perplexity calculation, group users based on connectivity
// e1 and e2 are thresholds for splitting users into light, medium and heavy users
public void sampleUsers4ColdStart4Docs(String filename, int e1, int e2, int sampleSize) {
    Random rand = new Random();
    ArrayList<String> light = new ArrayList<>();
    ArrayList<String> medium = new ArrayList<>();
    ArrayList<String> heavy = new ArrayList<>();
    // step 1: collect all the user ids in different groups
    for (_User user : m_users) {
        if (user.getFriends() == null)
            continue;
        String userId = user.getUserID();
        int frdSize = user.getFriends().length;
        if (frdSize > e2) {
            heavy.add(userId);
        } else if (frdSize > e1) {
            medium.add(userId);
        } else
            light.add(userId);
    }
    // step 2: sample specified number of users from each group
    HashSet<String> sampledLight = sample(light, sampleSize);
    HashSet<String> sampledMedium = sample(medium, sampleSize);
    HashSet<String> sampledHeavey = sample(heavy, sampleSize);
    // step 3: save the sampled users and their documenets
    writeCVIndex4Docs(filename, sampledLight, sampledMedium, sampledHeavey);
}
Also used : structures._User(structures._User)

Aggregations

structures._User (structures._User)56 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)15 File (java.io.File)10 IOException (java.io.IOException)10 structures._Review (structures._Review)10 ArrayList (java.util.ArrayList)6 PrintWriter (java.io.PrintWriter)5 BufferedReader (java.io.BufferedReader)4 FileInputStream (java.io.FileInputStream)4 InputStreamReader (java.io.InputStreamReader)4 HashMap (java.util.HashMap)3 ObjectContext (org.apache.cayenne.ObjectContext)3 ObjectNotFoundException (org.haiku.haikudepotserver.api1.support.ObjectNotFoundException)3 User (org.haiku.haikudepotserver.dataobjects.User)3 org.haiku.haikudepotserver.dataobjects.auto._User (org.haiku.haikudepotserver.dataobjects.auto._User)3 AccessDeniedException (org.springframework.security.access.AccessDeniedException)3 InvalidFormatException (opennlp.tools.util.InvalidFormatException)2 structures._SparseFeature (structures._SparseFeature)2 FileNotFoundException (java.io.FileNotFoundException)1 HashSet (java.util.HashSet)1