Search in sources :

Example 1 with Person

use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.

the class FriendshipMerger method apply.

public Person apply(Iterable<Person> valueSet) {
    List<Knows> knows = new ArrayList<>();
    Person person = null;
    int index = 0;
    for (Person p : valueSet) {
        if (index == 0) {
            person = new Person(p);
        }
        knows.addAll(p.getKnows());
        index++;
    }
    person.getKnows().clear();
    Knows.FullComparator comparator = new Knows.FullComparator();
    knows.sort(comparator);
    if (knows.size() > 0) {
        long currentTo = knows.get(0).to().getAccountId();
        person.getKnows().add(knows.get(0));
        for (index = 1; index < knows.size(); ++index) {
            Knows nextKnows = knows.get(index);
            if (currentTo != knows.get(index).to().getAccountId()) {
                person.getKnows().add(nextKnows);
                currentTo = nextKnows.to().getAccountId();
            } else {
                numRepeated++;
            }
        }
    }
    return person;
}
Also used : ArrayList(java.util.ArrayList) Knows(ldbc.snb.datagen.entities.dynamic.relations.Knows) Person(ldbc.snb.datagen.entities.dynamic.person.Person)

Example 2 with Person

use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.

the class ForumGenerator method createGroup.

/**
 * Creates a Group with the Person as the moderator. 30% membership come from friends the rest are random.
 *
 * @param randomFarm random number generator
 * @param forumId    forumID
 * @param moderator  moderator
 * @param block      person block
 * @return Group
 */
Forum createGroup(RandomGeneratorFarm randomFarm, long forumId, Person moderator, List<Person> block, long blockId) {
    // creation date
    long groupMinCreationDate = moderator.getCreationDate() + DatagenParams.delta;
    long groupMaxCreationDate = Math.min(moderator.getDeletionDate(), Dictionaries.dates.getSimulationEnd());
    long groupCreationDate = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), groupMinCreationDate, groupMaxCreationDate);
    // deletion date
    long groupDeletionDate;
    boolean isExplicitlyDeleted;
    if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_FORUM).nextDouble() < DatagenParams.probForumDeleted) {
        isExplicitlyDeleted = true;
        long groupMinDeletionDate = groupCreationDate + DatagenParams.delta;
        long groupMaxDeletionDate = Dictionaries.dates.getSimulationEnd();
        groupDeletionDate = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), groupMinDeletionDate, groupMaxDeletionDate);
    } else {
        isExplicitlyDeleted = false;
        groupDeletionDate = Dictionaries.dates.getNetworkCollapse();
    }
    // the hasModerator edge is deleted if either the Forum (group) or the Person (moderator) is deleted
    long moderatorDeletionDate = Math.min(groupDeletionDate, moderator.getDeletionDate());
    int language = moderator.getLanguages().get(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE).nextInt(moderator.getLanguages().size()));
    Iterator<Integer> iter = moderator.getInterests().iterator();
    int idx = randomFarm.get(RandomGeneratorFarm.Aspect.FORUM_INTEREST).nextInt(moderator.getInterests().size());
    for (int i = 0; i < idx; i++) {
        iter.next();
    }
    int interestId = iter.next();
    List<Integer> interest = new ArrayList<>();
    interest.add(interestId);
    // Create group
    Forum forum = new Forum(SN.formId(SN.composeId(forumId, groupCreationDate), blockId), groupCreationDate, groupDeletionDate, new PersonSummary(moderator), moderatorDeletionDate, StringUtils.clampString("Group for " + Dictionaries.tags.getName(interestId).replace("\"", "\\\"") + " in " + Dictionaries.places.getPlaceName(moderator.getCityId()), 256), moderator.getCityId(), language, Forum.ForumType.GROUP, isExplicitlyDeleted);
    // Set tags of this forum
    forum.setTags(interest);
    // Add members
    TreeSet<Long> groupMembers = new TreeSet<>();
    List<Knows> moderatorKnows = new ArrayList<>(moderator.getKnows());
    int numModeratorKnows = moderatorKnows.size();
    int groupSize = randomFarm.get(RandomGeneratorFarm.Aspect.NUM_USERS_PER_FORUM).nextInt(DatagenParams.maxGroupSize);
    int numLoop = 0;
    while ((forum.getMemberships().size() < groupSize) && (numLoop < DatagenParams.blockSize)) {
        // controls the proportion of members that are friends
        double prob = randomFarm.get(RandomGeneratorFarm.Aspect.KNOWS_LEVEL).nextDouble();
        if (prob < 0.3 && numModeratorKnows > 0) {
            // pick random knows edge from friends
            int knowsIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(numModeratorKnows);
            Knows knows = moderatorKnows.get(knowsIndex);
            if (!groupMembers.contains(knows.to().getAccountId())) {
                // if friend not already member of group
                long minCreationDate = Math.max(forum.getCreationDate(), knows.to().getCreationDate()) + DatagenParams.delta;
                long maxCreationDate = Collections.min(Arrays.asList(forum.getDeletionDate(), knows.to().getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
                if (maxCreationDate - minCreationDate > 0) {
                    Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
                    long hasMemberCreationDate = Dictionaries.dates.randomDate(random, minCreationDate, maxCreationDate);
                    long hasMemberDeletionDate;
                    boolean isHasMemberExplicitlyDeleted;
                    if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_MEMB).nextDouble() < DatagenParams.probMembDeleted) {
                        isHasMemberExplicitlyDeleted = true;
                        long minDeletionDate = hasMemberCreationDate + DatagenParams.delta;
                        long maxDeletionDate = Collections.min(Arrays.asList(knows.to().getDeletionDate(), forum.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
                        if (maxDeletionDate - minDeletionDate < 0) {
                            continue;
                        }
                        hasMemberDeletionDate = Dictionaries.dates.randomDate(random, minDeletionDate, maxDeletionDate);
                    } else {
                        isHasMemberExplicitlyDeleted = false;
                        hasMemberDeletionDate = Collections.min(Arrays.asList(knows.to().getDeletionDate(), forum.getDeletionDate()));
                    }
                    ForumMembership hasMember = new ForumMembership(forum.getId(), hasMemberCreationDate, hasMemberDeletionDate, knows.to(), Forum.ForumType.GROUP, isHasMemberExplicitlyDeleted);
                    forum.addMember(hasMember);
                    groupMembers.add(knows.to().getAccountId());
                }
            }
        } else {
            // pick from the person block
            int candidateIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(block.size());
            Person member = block.get(candidateIndex);
            prob = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP).nextDouble();
            if ((prob < 0.1) && !groupMembers.contains(member.getAccountId())) {
                long minHasMemberCreationDate = Math.max(forum.getCreationDate(), member.getCreationDate()) + DatagenParams.delta;
                long maxHasMemberCreationDate = Collections.min(Arrays.asList(forum.getDeletionDate(), member.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
                if (maxHasMemberCreationDate - minHasMemberCreationDate > 0) {
                    Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
                    long hasMemberCreationDate = Dictionaries.dates.randomDate(random, minHasMemberCreationDate, maxHasMemberCreationDate);
                    long hasMemberDeletionDate;
                    boolean isHasMemberExplicitlyDeleted;
                    if (randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_MEMB).nextDouble() < DatagenParams.probMembDeleted) {
                        isHasMemberExplicitlyDeleted = true;
                        long minHasMemberDeletionDate = hasMemberCreationDate + DatagenParams.delta;
                        long maxHasMemberDeletionDate = Collections.min(Arrays.asList(member.getDeletionDate(), forum.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
                        if (maxHasMemberCreationDate - minHasMemberDeletionDate < 0) {
                            continue;
                        }
                        hasMemberDeletionDate = Dictionaries.dates.randomDate(random, minHasMemberDeletionDate, maxHasMemberDeletionDate);
                    } else {
                        isHasMemberExplicitlyDeleted = false;
                        hasMemberDeletionDate = Collections.min(Arrays.asList(member.getDeletionDate(), forum.getDeletionDate()));
                    }
                    forum.addMember(new ForumMembership(forum.getId(), hasMemberCreationDate, hasMemberDeletionDate, new PersonSummary(member), Forum.ForumType.GROUP, isHasMemberExplicitlyDeleted));
                    groupMembers.add(member.getAccountId());
                }
            }
        }
        numLoop++;
    }
    return forum;
}
Also used : Forum(ldbc.snb.datagen.entities.dynamic.Forum) ForumMembership(ldbc.snb.datagen.entities.dynamic.relations.ForumMembership) PersonSummary(ldbc.snb.datagen.entities.dynamic.person.PersonSummary) Knows(ldbc.snb.datagen.entities.dynamic.relations.Knows) Person(ldbc.snb.datagen.entities.dynamic.person.Person)

Example 3 with Person

use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.

the class PersonGenerator method generatePerson.

private Person generatePerson() {
    long creationDate = Dictionaries.dates.randomPersonCreationDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE));
    int countryId = Dictionaries.places.getCountryForPerson(randomFarm.get(RandomGeneratorFarm.Aspect.COUNTRY));
    Person person = new Person();
    person.setCreationDate(creationDate);
    person.setGender((randomFarm.get(RandomGeneratorFarm.Aspect.GENDER).nextDouble() > 0.5) ? (byte) 1 : (byte) 0);
    person.setBirthday(Dictionaries.dates.getBirthDay(randomFarm.get(RandomGeneratorFarm.Aspect.BIRTH_DAY)));
    person.setBrowserId(Dictionaries.browsers.getRandomBrowserId(randomFarm.get(RandomGeneratorFarm.Aspect.BROWSER)));
    person.setCountryId(countryId);
    person.setCityId(Dictionaries.places.getRandomCity(randomFarm.get(RandomGeneratorFarm.Aspect.CITY), countryId));
    person.setIpAddress(Dictionaries.ips.getIP(randomFarm.get(RandomGeneratorFarm.Aspect.IP), countryId));
    person.setMessageDeleter(randomFarm.get(RandomGeneratorFarm.Aspect.RANDOM).nextDouble() > 0.5);
    long maxKnows = Math.min(degreeDistribution.nextDegree(), DatagenParams.maxNumFriends);
    person.setMaxNumKnows(maxKnows);
    long deletionDate;
    boolean delete = personDeleteDistribution.isDeleted(randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_PERSON), maxKnows);
    if (delete) {
        person.setExplicitlyDeleted(true);
        long maxDeletionDate = Dictionaries.dates.getSimulationEnd();
        deletionDate = Dictionaries.dates.randomPersonDeletionDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), creationDate, person.getMaxNumKnows(), maxDeletionDate);
    } else {
        person.setExplicitlyDeleted(false);
        deletionDate = Dictionaries.dates.getNetworkCollapse();
    }
    person.setDeletionDate(deletionDate);
    assert (person.getCreationDate() + DatagenParams.delta <= person.getDeletionDate()) : "Person creation date is larger than person deletion date";
    person.setAccountId(composePersonId(nextId++, creationDate));
    person.setMainInterest(Dictionaries.tags.getaTagByCountry(randomFarm.get(RandomGeneratorFarm.Aspect.TAG_OTHER_COUNTRY), randomFarm.get(RandomGeneratorFarm.Aspect.TAG), person.getCountryId()));
    short numTags = ((short) randomTagPowerLaw.getValue(randomFarm.get(RandomGeneratorFarm.Aspect.NUM_TAG)));
    person.setInterests(new ArrayList<>(Dictionaries.tagMatrix.getSetofTags(randomFarm.get(RandomGeneratorFarm.Aspect.TOPIC), randomFarm.get(RandomGeneratorFarm.Aspect.TAG_OTHER_COUNTRY), person.getMainInterest(), numTags)));
    person.setUniversityLocationId(Dictionaries.universities.getRandomUniversity(randomFarm, person.getCountryId()));
    person.setRandomId(randomFarm.get(RandomGeneratorFarm.Aspect.RANDOM).nextInt(Integer.MAX_VALUE) % 100);
    person.setFirstName(Dictionaries.names.getRandomGivenName(randomFarm.get(RandomGeneratorFarm.Aspect.NAME), person.getCountryId(), person.getGender() == 1, DateUtils.getYear(person.getBirthday())));
    person.setLastName(Dictionaries.names.getRandomSurname(randomFarm.get(RandomGeneratorFarm.Aspect.SURNAME), person.getCountryId()));
    int numEmails = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextInt(DatagenParams.maxEmails) + 1;
    double prob;
    String base = person.getFirstName();
    base = Normalizer.normalize(base, Normalizer.Form.NFD);
    base = base.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    base = base.replaceAll(" ", ".");
    base = base.replaceAll("[.]+", ".");
    for (int i = 0; i < numEmails; i++) {
        String email = base + "" + person.getAccountId() + "@" + Dictionaries.emails.getRandomEmail(randomFarm.get(RandomGeneratorFarm.Aspect.TOP_EMAIL), randomFarm.get(RandomGeneratorFarm.Aspect.EMAIL));
        person.getEmails().add(email);
    }
    // Set class year
    prob = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextDouble();
    if ((prob < DatagenParams.missingRatio) || person.getUniversityLocationId() == -1) {
        person.setClassYear(-1);
    } else {
        person.setClassYear(Dictionaries.dates.randomClassYear(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.getBirthday()));
    }
    // Set company and workFrom
    int numCompanies = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextInt(DatagenParams.maxCompanies) + 1;
    prob = randomFarm.get(RandomGeneratorFarm.Aspect.EXTRA_INFO).nextDouble();
    if (prob >= DatagenParams.missingRatio) {
        for (int i = 0; i < numCompanies; i++) {
            long workFrom;
            workFrom = Dictionaries.dates.getWorkFromYear(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.getClassYear(), person.getBirthday());
            long company = Dictionaries.companies.getRandomCompany(randomFarm, person.getCountryId());
            person.getCompanies().put(company, workFrom);
        }
    }
    List<Integer> personLanguages = Dictionaries.languages.getLanguages(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE), person.getCountryId());
    int internationalLang = Dictionaries.languages.getInternationlLanguage(randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE));
    if (internationalLang != -1 && personLanguages.indexOf(internationalLang) == -1) {
        personLanguages.add(internationalLang);
    }
    person.getLanguages().addAll(personLanguages);
    // Set activity characteristics
    person.setIsLargePoster(isLargePoster(person));
    return person;
}
Also used : Person(ldbc.snb.datagen.entities.dynamic.person.Person)

Example 4 with Person

use of ldbc.snb.datagen.entities.dynamic.person.Person in project ldbc_snb_datagen_spark by ldbc.

the class RandomKnowsGenerator method generateKnows.

public void generateKnows(List<Person> persons, int blockId, List<Float> percentages, int step_index, Person.PersonSimilarity personSimilarity) {
    rand.setSeed(blockId);
    List<Integer> stubs = new ArrayList<>();
    int index = 0;
    for (Person p : persons) {
        long degree = Knows.targetEdges(p, percentages, step_index);
        for (int i = 0; i < degree; ++i) {
            stubs.add(index);
        }
        ++index;
    }
    Collections.shuffle(stubs, rand);
    while (!stubs.isEmpty()) {
        int first = rand.nextInt(stubs.size());
        int first_index = stubs.get(first);
        stubs.remove(first);
        if (!stubs.isEmpty()) {
            int second = rand.nextInt(stubs.size());
            int second_index = stubs.get(second);
            stubs.remove(second);
            if (first_index != second_index) {
                Person p1 = persons.get(first_index);
                Person p2 = persons.get(second_index);
                Knows.createKnow(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_KNOWS), p1, p2, personSimilarity, true);
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Person(ldbc.snb.datagen.entities.dynamic.person.Person)

Aggregations

Person (ldbc.snb.datagen.entities.dynamic.person.Person)4 ArrayList (java.util.ArrayList)2 Knows (ldbc.snb.datagen.entities.dynamic.relations.Knows)2 Forum (ldbc.snb.datagen.entities.dynamic.Forum)1 PersonSummary (ldbc.snb.datagen.entities.dynamic.person.PersonSummary)1 ForumMembership (ldbc.snb.datagen.entities.dynamic.relations.ForumMembership)1