Search in sources :

Example 1 with HarvestedRecordFormatEnum

use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.

the class MetadataMarcRecord method getDetectedFormatList.

@Override
public List<HarvestedRecordFormatEnum> getDetectedFormatList() {
    List<HarvestedRecordFormatEnum> hrf = new ArrayList<HarvestedRecordFormatEnum>();
    if (isBook())
        hrf.add(HarvestedRecordFormatEnum.BOOKS);
    if (isPeriodical())
        hrf.add(HarvestedRecordFormatEnum.PERIODICALS);
    if (isArticle())
        hrf.add(HarvestedRecordFormatEnum.ARTICLES);
    if (isArticle773())
        return Collections.singletonList(HarvestedRecordFormatEnum.ARTICLES);
    if (isMap())
        hrf.add(HarvestedRecordFormatEnum.MAPS);
    if (isMusicalScores())
        hrf.add(HarvestedRecordFormatEnum.MUSICAL_SCORES);
    if (isVisualDocument())
        hrf.add(HarvestedRecordFormatEnum.VISUAL_DOCUMENTS);
    if (isMicroform())
        hrf.add(HarvestedRecordFormatEnum.OTHER_MICROFORMS);
    if (isBlindBraille())
        hrf.add(HarvestedRecordFormatEnum.BLIND_BRAILLE);
    HarvestedRecordFormatEnum audio = getAudioFormat();
    if (audio != null) {
        if (isAudioDVD())
            hrf.add(HarvestedRecordFormatEnum.AUDIO_DVD);
        else
            hrf.add(audio);
        if (isBlindAudio()) {
            hrf.clear();
            hrf.add(HarvestedRecordFormatEnum.BLIND_AUDIO);
            return hrf;
        }
    }
    HarvestedRecordFormatEnum video = getVideoDocument();
    if (video != null) {
        if (isVideoDVD()) {
            if (!hrf.contains(HarvestedRecordFormatEnum.VIDEO_DVD))
                hrf.add(HarvestedRecordFormatEnum.VIDEO_DVD);
        } else
            hrf.add(video);
    }
    if (isComputerCarrier())
        hrf.add(HarvestedRecordFormatEnum.OTHER_COMPUTER_CARRIER);
    if (isOthers())
        hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    if (hrf.isEmpty())
        hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    return hrf;
}
Also used : ArrayList(java.util.ArrayList) HarvestedRecordFormatEnum(cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum)

Example 2 with HarvestedRecordFormatEnum

use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.

the class HashingDedupKeyParser method parse.

@Override
public HarvestedRecord parse(HarvestedRecord record, MetadataRecord metadataRecord) throws DedupKeyParserException {
    record.setShouldBeProcessed(metadataRecord.matchFilter());
    // not dedup key
    record.setUpvApplicationId(metadataRecord.getUpvApplicationId());
    if (!record.getHarvestedFrom().isGenerateDedupKeys()) {
        return record;
    }
    boolean dedupKeysChanged = false;
    boolean oaiTimestampChanged = false;
    DedupKeysencapsulator encapsulator = new DedupKeysencapsulator();
    List<Title> titles = new ArrayList<>();
    for (Title title : metadataRecord.getTitle()) {
        title.setTitleStr(MetadataUtils.normalizeAndShorten(title.getTitleStr(), EFFECTIVE_TITLE_LENGTH));
        if (title.getTitleStr().isEmpty())
            continue;
        if (!titles.contains(title)) {
            titles.add(title);
        }
    }
    encapsulator.setTitles(titles);
    List<ShortTitle> shortTitles = new ArrayList<>();
    for (ShortTitle shortTitle : metadataRecord.getShortTitles()) {
        shortTitle.setShortTitleStr(MetadataUtils.normalizeAndShorten(shortTitle.getShortTitleStr(), EFFECTIVE_TITLE_LENGTH));
        if (shortTitle.getShortTitleStr().isEmpty())
            continue;
        if (!shortTitles.contains(shortTitle)) {
            shortTitles.add(shortTitle);
        }
    }
    encapsulator.setShortTitles(shortTitles);
    encapsulator.setIsbns(metadataRecord.getISBNs());
    encapsulator.setIssns(metadataRecord.getISSNs());
    encapsulator.setIsmns(metadataRecord.getISMNs());
    encapsulator.setCnbs(metadataRecord.getCNBs());
    encapsulator.setPublicationYear(metadataRecord.getPublicationYear());
    List<HarvestedRecordFormatEnum> formatEnums = metadataRecord.getDetectedFormatList();
    encapsulator.setFormats(harvestedRecordFormatDAO.getFormatsFromEnums(formatEnums));
    encapsulator.setAuthorAuthKey(MetadataUtils.shorten(metadataRecord.getAuthorAuthKey(), EFFECTIVE_AUTHOR_AUTH_KEY_LENGTH));
    encapsulator.setAuthorString(MetadataUtils.normalizeAndShorten(metadataRecord.getAuthorString(), EFFECTIVE_AUTHOR_LENGTH));
    encapsulator.setScale(metadataRecord.getScale());
    encapsulator.setUuid(metadataRecord.getUUId());
    encapsulator.setPages(metadataRecord.getPageCount());
    encapsulator.setIssnSeries(MetadataUtils.normalize(metadataRecord.getISSNSeries()));
    encapsulator.setIssnSeriesOrder(MetadataUtils.normalize(metadataRecord.getISSNSeriesOrder()));
    encapsulator.setOclcs(metadataRecord.getOclcs());
    encapsulator.setClusterId(metadataRecord.getClusterId());
    encapsulator.setRaw001Id(metadataRecord.getRaw001Id());
    encapsulator.setSourceInfoT(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoT(), EFFECTIVE_SOURCE_INFO_LENGTH));
    encapsulator.setSourceInfoG(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoG(), EFFECTIVE_SOURCE_INFO_LENGTH));
    encapsulator.setSourceInfoX(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoX(), EFFECTIVE_LENGTH_30));
    encapsulator.setEans(metadataRecord.getEANs());
    encapsulator.setPublisherNumbers(metadataRecord.getPublisherNumber());
    encapsulator.setLanguages(new HashSet<>(metadataRecord.getLanguages()));
    String computedHash = computeHashValue(encapsulator);
    String oldHash = record.getDedupKeysHash();
    String temporalHash = record.getTemporalDedupHash() == null ? "0000000000000000000000000000000000000000" : record.getTemporalDedupHash();
    // during one batch
    if ((!temporalHash.equals(computedHash)) && (oldHash == null || oldHash.isEmpty() || !computedHash.equals(oldHash))) {
        // keys changed, updated in database
        dedupKeysChanged = true;
        // drop old keys
        harvestedRecordDao.dropDedupKeys(record);
        if (record.getHarvestedFrom() != null)
            record.setWeight(metadataRecord.getWeight(record.getHarvestedFrom().getBaseWeight()));
        // assign new keys
        record.setTitles(encapsulator.getTitles());
        record.setIsbns(encapsulator.getIsbns());
        record.setIssns(encapsulator.getIssns());
        record.setIsmns(encapsulator.getIsmns());
        record.setCnb(encapsulator.getCnbs());
        record.setPublicationYear(encapsulator.getPublicationYear());
        record.setPhysicalFormats(harvestedRecordFormatDAO.getFormatsFromEnums(formatEnums));
        record.setAuthorAuthKey(encapsulator.getAuthorAuthKey());
        record.setAuthorString(encapsulator.getAuthorString());
        record.setScale(encapsulator.getScale());
        record.setUuid(encapsulator.getUuid());
        record.setPages(encapsulator.getPages());
        record.setIssnSeries(encapsulator.getIssnSeries());
        record.setIssnSeriesOrder(encapsulator.getIssnSeriesOrder());
        record.setOclcs(encapsulator.getOclcs());
        record.setLanguages(metadataRecord.getLanguages());
        record.setClusterId(encapsulator.getClusterId());
        record.setRaw001Id(encapsulator.getRaw001Id());
        record.setSourceInfoG(encapsulator.getSourceInfoG());
        record.setSourceInfoX(encapsulator.getSourceInfoX());
        record.setSourceInfoT(encapsulator.getSourceInfoT());
        record.setEans(encapsulator.getEans());
        record.setShortTitles(encapsulator.getShortTitles());
        record.setPublisherNumbers(metadataRecord.getPublisherNumber());
        record.setTemporalDedupHash(computedHash);
    }
    record.setDedupKeysHash(computedHash);
    if (record.getOaiTimestamp() != null && record.getTemporalOldOaiTimestamp() != null && !record.getOaiTimestamp().equals(record.getTemporalOldOaiTimestamp())) {
        oaiTimestampChanged = true;
    } else {
        oaiTimestampChanged = false;
    }
    // decide whether record should be deduplicated
    if (dedupKeysChanged) {
        // new record or change in keys
        record.setNextDedupFlag(true);
    } else {
        // key are equal
        if (oaiTimestampChanged) {
            // neither keys neither oai timestamp changed,
            // don't deduplicate
            record.setNextDedupFlag(false);
        } else {
        // keys are same but timestamp changed
        // keep previsous dedup flag
        // this may happen during repeated harvesting before
        }
    }
    return record;
}
Also used : ShortTitle(cz.mzk.recordmanager.server.model.ShortTitle) ArrayList(java.util.ArrayList) Title(cz.mzk.recordmanager.server.model.Title) ShortTitle(cz.mzk.recordmanager.server.model.ShortTitle) HarvestedRecordFormatEnum(cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum)

Example 3 with HarvestedRecordFormatEnum

use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.

the class MarcRecordImplTest method getDetectedFormatListTest.

@Test
public void getDetectedFormatListTest() throws Exception {
    MarcRecordImpl mri;
    MetadataRecord metadataRecord;
    List<String> data = new ArrayList<>();
    List<HarvestedRecordFormatEnum> hrf = new ArrayList<>();
    // Books
    data.add("000 00000000");
    data.add("006 a");
    hrf.add(HarvestedRecordFormatEnum.BOOKS);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Periodicals
    data.add("000 0000000i");
    hrf.add(HarvestedRecordFormatEnum.PERIODICALS);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Articles
    data.add("000 0000000a");
    hrf.add(HarvestedRecordFormatEnum.ARTICLES);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Maps
    data.add("000 000000000");
    data.add("245 $hkartografický dokument");
    hrf.add(HarvestedRecordFormatEnum.MAPS);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Musical
    data.add("000 00000000");
    data.add("336 $bntv");
    hrf.add(HarvestedRecordFormatEnum.MUSICAL_SCORES);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Vusial documents
    data.add("000 00000000");
    data.add("338 $bgasd");
    hrf.add(HarvestedRecordFormatEnum.VISUAL_DOCUMENTS);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Microforms
    data.add("000 0000000");
    data.add("337 $bh");
    hrf.add(HarvestedRecordFormatEnum.OTHER_MICROFORMS);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Braill
    data.add("000 00000000");
    data.add("007 fb");
    data.add("245 $hhmatové písmo");
    hrf.add(HarvestedRecordFormatEnum.BLIND_BRAILLE);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Audio
    data.add("000 00000000");
    data.add("300 $fanaloaSg$amagnetofonová kazeta");
    hrf.add(HarvestedRecordFormatEnum.AUDIO_CASSETTE);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Video
    data.add("000 000000000");
    data.add("007 vlllv");
    hrf.add(HarvestedRecordFormatEnum.VIDEO_DVD);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Kit
    data.add("000 00000000");
    data.add("006 o");
    data.add("007 o");
    hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Object
    data.add("000 00000000");
    data.add("008 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzd");
    hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Mix document
    data.add("000 000000p");
    hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Unspecified
    data.add("000 00000000");
    data.add("337 $bx");
    hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
    // Nothing
    data.add("000 00000000");
    hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
    mri = MarcRecordFactory.recordFactory(data);
    metadataRecord = metadataFactory.getMetadataRecord(mri);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
    data.clear();
    hrf.clear();
}
Also used : ArrayList(java.util.ArrayList) HarvestedRecordFormatEnum(cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Aggregations

HarvestedRecordFormatEnum (cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum)3 ArrayList (java.util.ArrayList)3 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)1 MetadataRecord (cz.mzk.recordmanager.server.metadata.MetadataRecord)1 ShortTitle (cz.mzk.recordmanager.server.model.ShortTitle)1 Title (cz.mzk.recordmanager.server.model.Title)1 Test (org.testng.annotations.Test)1