use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedNetworkAnalyzer method printDocs4Plane.
/**
**We need to output some files for running baselines
* TADW, PLANE etc.
* ***
*/
public void printDocs4Plane(String filename) {
try {
PrintWriter writer = new PrintWriter(new File(filename));
for (_User user : m_users) {
ArrayList<_SparseFeature[]> vectors = new ArrayList<>();
for (_Review r : user.getReviews()) {
vectors.add(r.getSparse());
}
_SparseFeature[] fvs = Utils.mergeSpVcts(vectors);
for (_SparseFeature fv : fvs) {
int index = fv.getIndex();
double val = fv.getValue();
for (int i = 0; i < val; i++) {
writer.write(index + " ");
}
}
writer.write("\n");
}
writer.close();
System.out.println("Finish writing docs for PLANE!!");
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedNetworkAnalyzer method writeCVIndex4Edges.
// write out the training interactions for users
public void writeCVIndex4Edges(String filename, HashSet<String> light, HashSet<String> medium, HashSet<String> heavy) {
try {
int count = 0;
PrintWriter writer = new PrintWriter(new File(filename));
for (String uid : m_networkMap.keySet()) {
_User user = m_users.get(m_userIDIndex.get(uid));
if (user.getFriends() == null || user.getFriends().length == 0)
continue;
if (!light.contains(uid) && !medium.contains(uid) && !heavy.contains(uid)) {
count++;
writer.write(uid + "\t");
for (String frd : user.getFriends()) writer.write(frd + "\t");
writer.write("\n");
}
}
writer.close();
System.out.format("[stat]%d/%d users' interactions are written in filname.\n", count, m_networkMap.keySet().size());
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedNetworkAnalyzer method assignCVIndex4Network.
// Assign interactions to different folds for CV, try to balance different folds.
public void assignCVIndex4Network(int kFold, int time) {
m_uidInteractionsMap.clear();
m_uidNonInteractionsMap.clear();
System.out.println("[Info]Start CV Index assignment for network....");
ArrayList<Integer> interactions = new ArrayList<Integer>();
ArrayList<Integer> nonInteractions = new ArrayList<Integer>();
int orgTotal = 0, realTotal = 0;
for (int i = 0; i < m_users.size(); i++) {
_User ui = m_users.get(i);
String uiId = ui.getUserID();
String[] friends = ui.getFriends();
interactions.clear();
nonInteractions.clear();
// ignore the users without any interactions
if (friends != null && friends.length > 0) {
if (!m_uidInteractionsMap.containsKey(uiId))
m_uidInteractionsMap.put(uiId, new ArrayList<_Edge4CV>());
if (!m_uidNonInteractionsMap.containsKey(uiId))
m_uidNonInteractionsMap.put(uiId, new ArrayList<_Edge4CV>());
orgTotal += friends.length;
// construct the friend indexes
for (String frd : friends) {
int frdIdx = m_userIDIndex.get(frd);
if (frdIdx > i)
interactions.add(frdIdx);
}
for (int j = i + 1; j < m_users.size(); j++) {
if (!interactions.contains(j))
nonInteractions.add(j);
}
// sample masks for interactions: assign fold number to interactiosn
int[] masks4Interactions = generateMasks(interactions.size(), kFold);
// collect the interactions in the hashmap
for (int m = 0; m < interactions.size(); m++) {
String ujId = m_users.get(interactions.get(m)).getUserID();
if (!m_uidInteractionsMap.containsKey(ujId))
m_uidInteractionsMap.put(ujId, new ArrayList<_Edge4CV>());
m_uidInteractionsMap.get(uiId).add(new _Edge4CV(ujId, masks4Interactions[m]));
m_uidInteractionsMap.get(ujId).add(new _Edge4CV(uiId, masks4Interactions[m]));
}
// sample non-interactions: select non-interactions for each fold, might be repetitive
HashMap<Integer, HashSet<Integer>> foldNonInteractions = new HashMap<Integer, HashSet<Integer>>();
for (int k = 0; k < kFold; k++) {
int number = time * interactions.size() / 5;
foldNonInteractions.put(k, sampleNonInteractions(nonInteractions, number));
}
// collect the non-interactions in the hashmap
for (int k : foldNonInteractions.keySet()) {
for (int ujIdx : foldNonInteractions.get(k)) {
String ujId = m_users.get(ujIdx).getUserID();
if (!m_uidNonInteractionsMap.containsKey(ujId))
m_uidNonInteractionsMap.put(ujId, new ArrayList<_Edge4CV>());
m_uidNonInteractionsMap.get(uiId).add(new _Edge4CV(ujId, k));
m_uidNonInteractionsMap.get(ujId).add(new _Edge4CV(uiId, k));
}
}
}
}
System.out.println("Interaction user size: " + m_uidInteractionsMap.size());
System.out.println("Non-interaction user size: " + m_uidNonInteractionsMap.size());
for (String uid : m_uidInteractionsMap.keySet()) {
realTotal += m_uidInteractionsMap.get(uid).size();
}
System.out.format("Org Total: %d, real Total: %d\n", orgTotal, realTotal);
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedNetworkAnalyzer method writeAggregatedUsers.
public void writeAggregatedUsers(String filename, int kFold) {
try {
int count = 0;
int[] masks = new int[kFold];
for (int i = 0; i < kFold; i++) {
masks[i] = i;
}
PrintWriter writer = new PrintWriter(new File(filename));
HashMap<Integer, ArrayList<String>> indexContentMap = new HashMap<>();
for (_User user : m_users) {
if (user.getUserID().equals("-dF9A2Q3L8C0d2ZyEIgDSQ"))
System.out.println("!!!!The user exists in the dataset!!!!");
indexContentMap.clear();
for (_Review r : user.getReviews()) {
if (!indexContentMap.containsKey(r.getMask4CV())) {
indexContentMap.put(r.getMask4CV(), new ArrayList<String>());
}
indexContentMap.get(r.getMask4CV()).add(r.getSource());
}
if (indexContentMap.size() == 0)
continue;
// write the data for the user
count++;
writer.write(user.getUserID() + "\n");
for (int mask : masks) {
if (!indexContentMap.containsKey(mask)) {
writer.write(mask + "\n");
writer.write(" \n");
} else {
writer.write(mask + "\n");
for (String cont : indexContentMap.get(mask)) {
writer.write(cont + " ");
}
writer.write("\n");
}
}
}
writer.close();
System.out.format("%d/%d users' data are writenn in %s.\n", count, m_users.size(), filename);
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedNetworkAnalyzer method sampleUsers4ColdStart4Docs.
// reserve docs for perplexity calculation, group users based on connectivity
// e1 and e2 are thresholds for splitting users into light, medium and heavy users
public void sampleUsers4ColdStart4Docs(String filename, int e1, int e2, int sampleSize) {
Random rand = new Random();
ArrayList<String> light = new ArrayList<>();
ArrayList<String> medium = new ArrayList<>();
ArrayList<String> heavy = new ArrayList<>();
// step 1: collect all the user ids in different groups
for (_User user : m_users) {
if (user.getFriends() == null)
continue;
String userId = user.getUserID();
int frdSize = user.getFriends().length;
if (frdSize > e2) {
heavy.add(userId);
} else if (frdSize > e1) {
medium.add(userId);
} else
light.add(userId);
}
// step 2: sample specified number of users from each group
HashSet<String> sampledLight = sample(light, sampleSize);
HashSet<String> sampledMedium = sample(medium, sampleSize);
HashSet<String> sampledHeavey = sample(heavy, sampleSize);
// step 3: save the sampled users and their documenets
writeCVIndex4Docs(filename, sampledLight, sampledMedium, sampledHeavey);
}
Aggregations