use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class UserAnalyzer method loadSVDFile.
// Added by Lin: Load the svd file to get the low dim representation of users.
public void loadSVDFile(String filename) {
try {
// Construct the <userID, user> map first.
int count = 0;
HashMap<String, double[]> IDLowDimMap = new HashMap<String, double[]>();
int skip = 3;
File file = new File(filename);
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
String line, userID;
String[] strs;
double[] lowDims;
// Skip the first three lines.
while (skip-- > 0) reader.readLine();
while ((line = reader.readLine()) != null) {
strs = line.split("\\s+");
userID = strs[0];
lowDims = new double[strs.length - 1];
for (int i = 1; i < strs.length; i++) lowDims[i - 1] = Double.valueOf(strs[i]);
IDLowDimMap.put(userID, lowDims);
count++;
}
// Currently, there are missing low dimension representation of users.
for (_User u : m_users) {
if (IDLowDimMap.containsKey(u.getUserID()))
u.setLowDimProfile(IDLowDimMap.get(u.getUserID()));
else {
System.out.println("[Warning]" + u.getUserID() + " : low dim profile missing.");
u.setLowDimProfile(new double[11]);
}
}
reader.close();
System.out.format("Ther are %d users and %d users' low dimension profile are loaded.\n", m_users.size(), count);
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedLinkPredAnalyzer method checkFriendSize.
public void checkFriendSize() {
int train = 0, test = 0;
for (_User u : m_users) {
if (u.getFriendSize() != 0)
train++;
if (u.getTestFriendSize() != 0)
test++;
}
System.out.format("[Check]%d users have train friends, %d users have test friends.\n", train, test);
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedLinkPredAnalyzer method loadTestFriendship.
// load the test user friends, for link prediction only
public void loadTestFriendship(String filename) {
try {
m_testMap.clear();
File file = new File(filename);
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
String line;
String[] users, friends;
while ((line = reader.readLine()) != null) {
users = line.trim().split("\t");
friends = Arrays.copyOfRange(users, 1, users.length);
if (friends.length == 0) {
continue;
}
m_testMap.put(users[0], friends);
}
reader.close();
// map friends to users.
for (_User u : m_users) {
if (m_testMap.containsKey(u.getUserID()))
u.setTestFriends(m_testMap.get(u.getUserID()));
}
checkFriendSize();
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedLinkPredAnalyzer method saveUserUserPairs.
// save the user-user pairs to graphlab for model training.
public void saveUserUserPairs(String dir) {
int trainUser = 0, testUser = 0, trainPair = 0, testPair = 0;
try {
PrintWriter trainWriter = new PrintWriter(new File(dir + "train.csv"));
PrintWriter testWriter = new PrintWriter(new File(dir + "test.csv"));
trainWriter.write("user_id,item_id,rating\n");
testWriter.write("user_id,item_id,rating\n");
for (_User u : m_users) {
if (u.getFriendSize() != 0) {
trainUser++;
for (String frd : u.getFriends()) {
trainPair++;
trainWriter.write(String.format("%s,%s,%d\n", u.getUserID(), frd, 1));
trainWriter.write(String.format("%s,%s,%d\n", frd, u.getUserID(), 1));
}
}
// for test users, we also need to write out non-friends
if (u.getTestFriendSize() != 0) {
testUser++;
for (_User nei : m_users) {
String neiID = nei.getUserID();
if (u.hasFriend(neiID) || u.getUserID().equals(neiID))
continue;
else if (u.hasTestFriend(neiID)) {
testPair++;
testWriter.write(String.format("%s,%s,%d\n", u.getUserID(), neiID, 1));
testWriter.write(String.format("%s,%s,%d\n", neiID, u.getUserID(), 1));
} else if (m_trainMap.containsKey(neiID)) {
testPair++;
testWriter.write(String.format("%s,%s,%d\n", u.getUserID(), neiID, 0));
testWriter.write(String.format("%s,%s,%d\n", neiID, u.getUserID(), 0));
}
}
}
}
trainWriter.close();
testWriter.close();
System.out.format("[Info]Finish writing (%d,%d) training users/pairs, (%d,%d) testing users/pairs.\n", trainUser, trainPair, testUser, testPair);
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.haiku.haikudepotserver.dataobjects.auto._User in project IR_Base by Linda-sunshine.
the class MultiThreadedLinkPredAnalyzer method rmMultipleReviews4OneItem.
public void rmMultipleReviews4OneItem() {
Set<String> items = new HashSet<String>();
ArrayList<Integer> indexes = new ArrayList<Integer>();
int uCount = 0, rCount = 0;
boolean flag = false;
for (_User u : m_users) {
ArrayList<_Review> reviews = u.getReviews();
items.clear();
indexes.clear();
for (int i = 0; i < reviews.size(); i++) {
_Review r = reviews.get(i);
if (items.contains(r.getItemID())) {
indexes.add(i);
rCount++;
flag = true;
} else {
items.add(r.getItemID());
}
}
// record the user number
if (flag) {
uCount++;
flag = false;
}
// remove the reviews.
Collections.sort(indexes, Collections.reverseOrder());
for (int idx : indexes) {
reviews.remove(idx);
}
u.constructTrainTestReviews();
}
System.out.format("%d users have %d duplicate reviews for items.\n", uCount, rCount);
}
Aggregations