use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.
the class FriendshipMerger method apply.
public Person apply(Iterable<Person> valueSet) {
List<Knows> knows = new ArrayList<>();
Person person = null;
int index = 0;
for (Person p : valueSet) {
if (index == 0) {
person = new Person(p);
}
knows.addAll(p.getKnows());
index++;
}
person.getKnows().clear();
Knows.FullComparator comparator = new Knows.FullComparator();
knows.sort(comparator);
if (knows.size() > 0) {
long currentTo = knows.get(0).to().getAccountId();
person.getKnows().add(knows.get(0));
for (index = 1; index < knows.size(); ++index) {
Knows nextKnows = knows.get(index);
if (currentTo != knows.get(index).to().getAccountId()) {
person.getKnows().add(nextKnows);
currentTo = nextKnows.to().getAccountId();
} else {
numRepeated++;
}
}
}
return person;
}
use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.
the class ForumGenerator method createGroup.
/**
* Creates a Group with the Person as the moderator. 30% membership come from friends the rest are random.
*
* @param randomFarm random number generator
* @param forumId forumID
* @param moderator moderator
* @param block person block
* @return Group
*/
Forum createGroup(RandomGeneratorFarm randomFarm, long forumId, Person moderator, List<Person> block, long blockId) {
// creation date
long groupMinCreationDate = moderator.getCreationDate() + DatagenParams.delta;
long groupMaxCreationDate = Math.min(moderator.getDeletionDate(), Dictionaries.dates.getSimulationEnd());
long groupCreationDate = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), groupMinCreationDate, groupMaxCreationDate);
// deletion date
long groupDeletionDate;
boolean isExplicitlyDeleted;
if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_FORUM).nextDouble() < DatagenParams.probForumDeleted) {
isExplicitlyDeleted = true;
long groupMinDeletionDate = groupCreationDate + DatagenParams.delta;
long groupMaxDeletionDate = Dictionaries.dates.getSimulationEnd();
groupDeletionDate = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), groupMinDeletionDate, groupMaxDeletionDate);
} else {
isExplicitlyDeleted = false;
groupDeletionDate = Dictionaries.dates.getNetworkCollapse();
}
// the hasModerator edge is deleted if either the Forum (group) or the Person (moderator) is deleted
long moderatorDeletionDate = Math.min(groupDeletionDate, moderator.getDeletionDate());
int language = moderator.getLanguages().get(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE).nextInt(moderator.getLanguages().size()));
Iterator<Integer> iter = moderator.getInterests().iterator();
int idx = randomFarm.get(RandomGeneratorFarm.Aspect.FORUM_INTEREST).nextInt(moderator.getInterests().size());
for (int i = 0; i < idx; i++) {
iter.next();
}
int interestId = iter.next();
List<Integer> interest = new ArrayList<>();
interest.add(interestId);
// Create group
Forum forum = new Forum(SN.formId(SN.composeId(forumId, groupCreationDate), blockId), groupCreationDate, groupDeletionDate, new PersonSummary(moderator), moderatorDeletionDate, StringUtils.clampString("Group for " + Dictionaries.tags.getName(interestId).replace("\"", "\\\"") + " in " + Dictionaries.places.getPlaceName(moderator.getCityId()), 256), moderator.getCityId(), language, Forum.ForumType.GROUP, isExplicitlyDeleted);
// Set tags of this forum
forum.setTags(interest);
// Add members
TreeSet<Long> groupMembers = new TreeSet<>();
List<Knows> moderatorKnows = new ArrayList<>(moderator.getKnows());
int numModeratorKnows = moderatorKnows.size();
int groupSize = randomFarm.get(RandomGeneratorFarm.Aspect.NUM_USERS_PER_FORUM).nextInt(DatagenParams.maxGroupSize);
int numLoop = 0;
while ((forum.getMemberships().size() < groupSize) && (numLoop < DatagenParams.blockSize)) {
// controls the proportion of members that are friends
double prob = randomFarm.get(RandomGeneratorFarm.Aspect.KNOWS_LEVEL).nextDouble();
if (prob < 0.3 && numModeratorKnows > 0) {
// pick random knows edge from friends
int knowsIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(numModeratorKnows);
Knows knows = moderatorKnows.get(knowsIndex);
if (!groupMembers.contains(knows.to().getAccountId())) {
// if friend not already member of group
long minCreationDate = Math.max(forum.getCreationDate(), knows.to().getCreationDate()) + DatagenParams.delta;
long maxCreationDate = Collections.min(Arrays.asList(forum.getDeletionDate(), knows.to().getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxCreationDate - minCreationDate > 0) {
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
long hasMemberCreationDate = Dictionaries.dates.randomDate(random, minCreationDate, maxCreationDate);
long hasMemberDeletionDate;
boolean isHasMemberExplicitlyDeleted;
if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_MEMB).nextDouble() < DatagenParams.probMembDeleted) {
isHasMemberExplicitlyDeleted = true;
long minDeletionDate = hasMemberCreationDate + DatagenParams.delta;
long maxDeletionDate = Collections.min(Arrays.asList(knows.to().getDeletionDate(), forum.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxDeletionDate - minDeletionDate < 0) {
continue;
}
hasMemberDeletionDate = Dictionaries.dates.randomDate(random, minDeletionDate, maxDeletionDate);
} else {
isHasMemberExplicitlyDeleted = false;
hasMemberDeletionDate = Collections.min(Arrays.asList(knows.to().getDeletionDate(), forum.getDeletionDate()));
}
ForumMembership hasMember = new ForumMembership(forum.getId(), hasMemberCreationDate, hasMemberDeletionDate, knows.to(), Forum.ForumType.GROUP, isHasMemberExplicitlyDeleted);
forum.addMember(hasMember);
groupMembers.add(knows.to().getAccountId());
}
}
} else {
// pick from the person block
int candidateIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(block.size());
Person member = block.get(candidateIndex);
prob = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP).nextDouble();
if ((prob < 0.1) && !groupMembers.contains(member.getAccountId())) {
long minHasMemberCreationDate = Math.max(forum.getCreationDate(), member.getCreationDate()) + DatagenParams.delta;
long maxHasMemberCreationDate = Collections.min(Arrays.asList(forum.getDeletionDate(), member.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxHasMemberCreationDate - minHasMemberCreationDate > 0) {
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
long hasMemberCreationDate = Dictionaries.dates.randomDate(random, minHasMemberCreationDate, maxHasMemberCreationDate);
long hasMemberDeletionDate;
boolean isHasMemberExplicitlyDeleted;
if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_MEMB).nextDouble() < DatagenParams.probMembDeleted) {
isHasMemberExplicitlyDeleted = true;
long minHasMemberDeletionDate = hasMemberCreationDate + DatagenParams.delta;
long maxHasMemberDeletionDate = Collections.min(Arrays.asList(member.getDeletionDate(), forum.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxHasMemberCreationDate - minHasMemberDeletionDate < 0) {
continue;
}
hasMemberDeletionDate = Dictionaries.dates.randomDate(random, minHasMemberDeletionDate, maxHasMemberDeletionDate);
} else {
isHasMemberExplicitlyDeleted = false;
hasMemberDeletionDate = Collections.min(Arrays.asList(member.getDeletionDate(), forum.getDeletionDate()));
}
forum.addMember(new ForumMembership(forum.getId(), hasMemberCreationDate, hasMemberDeletionDate, new PersonSummary(member), Forum.ForumType.GROUP, isHasMemberExplicitlyDeleted));
groupMembers.add(member.getAccountId());
}
}
}
numLoop++;
}
return forum;
}
use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.
the class PersonGenerator method generatePerson.
private Person generatePerson() {
long creationDate = Dictionaries.dates.randomPersonCreationDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE));
int countryId = Dictionaries.places.getCountryForPerson(randomFarm.get(RandomGeneratorFarm.Aspect.COUNTRY));
Person person = new Person();
person.setCreationDate(creationDate);
person.setGender((randomFarm.get(RandomGeneratorFarm.Aspect.GENDER).nextDouble() > 0.5) ? (byte) 1 : (byte) 0);
person.setBirthday(Dictionaries.dates.getBirthDay(randomFarm.get(RandomGeneratorFarm.Aspect.BIRTH_DAY)));
person.setBrowserId(Dictionaries.browsers.getRandomBrowserId(randomFarm.get(RandomGeneratorFarm.Aspect.BROWSER)));
person.setCountryId(countryId);
person.setCityId(Dictionaries.places.getRandomCity(randomFarm.get(RandomGeneratorFarm.Aspect.CITY), countryId));
person.setIpAddress(Dictionaries.ips.getIP(randomFarm.get(RandomGeneratorFarm.Aspect.IP), countryId));
person.setMessageDeleter(randomFarm.get(RandomGeneratorFarm.Aspect.RANDOM).nextDouble() > 0.5);
long maxKnows = Math.min(degreeDistribution.nextDegree(), DatagenParams.maxNumFriends);
person.setMaxNumKnows(maxKnows);
long deletionDate;
boolean delete = personDeleteDistribution.isDeleted(randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_PERSON), maxKnows);
if (delete) {
person.setExplicitlyDeleted(true);
long maxDeletionDate = Dictionaries.dates.getSimulationEnd();
deletionDate = Dictionaries.dates.randomPersonDeletionDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), creationDate, person.getMaxNumKnows(), maxDeletionDate);
} else {
person.setExplicitlyDeleted(false);
deletionDate = Dictionaries.dates.getNetworkCollapse();
}
person.setDeletionDate(deletionDate);
assert (person.getCreationDate() + DatagenParams.delta <= person.getDeletionDate()) : "Person creation date is larger than person deletion date";
person.setAccountId(composePersonId(nextId++, creationDate));
person.setMainInterest(Dictionaries.tags.getaTagByCountry(randomFarm.get(RandomGeneratorFarm.Aspect.TAG_OTHER_COUNTRY), randomFarm.get(RandomGeneratorFarm.Aspect.TAG), person.getCountryId()));
short numTags = ((short) randomTagPowerLaw.getValue(randomFarm.get(RandomGeneratorFarm.Aspect.NUM_TAG)));
person.setInterests(new ArrayList<>(Dictionaries.tagMatrix.getSetofTags(randomFarm.get(RandomGeneratorFarm.Aspect.TOPIC), randomFarm.get(RandomGeneratorFarm.Aspect.TAG_OTHER_COUNTRY), person.getMainInterest(), numTags)));
person.setUniversityLocationId(Dictionaries.universities.getRandomUniversity(randomFarm, person.getCountryId()));
person.setRandomId(randomFarm.get(RandomGeneratorFarm.Aspect.RANDOM).nextInt(Integer.MAX_VALUE) % 100);
person.setFirstName(Dictionaries.names.getRandomGivenName(randomFarm.get(RandomGeneratorFarm.Aspect.NAME), person.getCountryId(), person.getGender() == 1, DateUtils.getYear(person.getBirthday())));
person.setLastName(Dictionaries.names.getRandomSurname(randomFarm.get(RandomGeneratorFarm.Aspect.SURNAME), person.getCountryId()));
int numEmails = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextInt(DatagenParams.maxEmails) + 1;
double prob;
String base = person.getFirstName();
base = Normalizer.normalize(base, Normalizer.Form.NFD);
base = base.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
base = base.replaceAll(" ", ".");
base = base.replaceAll("[.]+", ".");
for (int i = 0; i < numEmails; i++) {
String email = base + "" + person.getAccountId() + "@" + Dictionaries.emails.getRandomEmail(randomFarm.get(RandomGeneratorFarm.Aspect.TOP_EMAIL), randomFarm.get(RandomGeneratorFarm.Aspect.EMAIL));
person.getEmails().add(email);
}
// Set class year
prob = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextDouble();
if ((prob < DatagenParams.missingRatio) || person.getUniversityLocationId() == -1) {
person.setClassYear(-1);
} else {
person.setClassYear(Dictionaries.dates.randomClassYear(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.getBirthday()));
}
// Set company and workFrom
int numCompanies = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextInt(DatagenParams.maxCompanies) + 1;
prob = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextDouble();
if (prob >= DatagenParams.missingRatio) {
for (int i = 0; i < numCompanies; i++) {
long workFrom;
workFrom = Dictionaries.dates.getWorkFromYear(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.getClassYear(), person.getBirthday());
long company = Dictionaries.companies.getRandomCompany(randomFarm, person.getCountryId());
person.getCompanies().put(company, workFrom);
}
}
List<Integer> personLanguages = Dictionaries.languages.getLanguages(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE), person.getCountryId());
int internationalLang = Dictionaries.languages.getInternationlLanguage(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE));
if (internationalLang != -1 && personLanguages.indexOf(internationalLang) == -1) {
personLanguages.add(internationalLang);
}
person.getLanguages().addAll(personLanguages);
// Set activity characteristics
person.setIsLargePoster(isLargePoster(person));
return person;
}
use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.
the class RandomKnowsGenerator method generateKnows.
public void generateKnows(List<Person> persons, int blockId, List<Float> percentages, int step_index, Person.PersonSimilarity personSimilarity) {
rand.setSeed(blockId);
List<Integer> stubs = new ArrayList<>();
int index = 0;
for (Person p : persons) {
long degree = Knows.targetEdges(p, percentages, step_index);
for (int i = 0; i < degree; ++i) {
stubs.add(index);
}
++index;
}
Collections.shuffle(stubs, rand);
while (!stubs.isEmpty()) {
int first = rand.nextInt(stubs.size());
int first_index = stubs.get(first);
stubs.remove(first);
if (!stubs.isEmpty()) {
int second = rand.nextInt(stubs.size());
int second_index = stubs.get(second);
stubs.remove(second);
if (first_index != second_index) {
Person p1 = persons.get(first_index);
Person p2 = persons.get(second_index);
Knows.createKnow(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_KNOWS), p1, p2, personSimilarity, true);
}
}
}
}
Aggregations