use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.
the class MetadataMarcRecord method getDetectedFormatList.
@Override
public List<HarvestedRecordFormatEnum> getDetectedFormatList() {
List<HarvestedRecordFormatEnum> hrf = new ArrayList<HarvestedRecordFormatEnum>();
if (isBook())
hrf.add(HarvestedRecordFormatEnum.BOOKS);
if (isPeriodical())
hrf.add(HarvestedRecordFormatEnum.PERIODICALS);
if (isArticle())
hrf.add(HarvestedRecordFormatEnum.ARTICLES);
if (isArticle773())
return Collections.singletonList(HarvestedRecordFormatEnum.ARTICLES);
if (isMap())
hrf.add(HarvestedRecordFormatEnum.MAPS);
if (isMusicalScores())
hrf.add(HarvestedRecordFormatEnum.MUSICAL_SCORES);
if (isVisualDocument())
hrf.add(HarvestedRecordFormatEnum.VISUAL_DOCUMENTS);
if (isMicroform())
hrf.add(HarvestedRecordFormatEnum.OTHER_MICROFORMS);
if (isBlindBraille())
hrf.add(HarvestedRecordFormatEnum.BLIND_BRAILLE);
HarvestedRecordFormatEnum audio = getAudioFormat();
if (audio != null) {
if (isAudioDVD())
hrf.add(HarvestedRecordFormatEnum.AUDIO_DVD);
else
hrf.add(audio);
if (isBlindAudio()) {
hrf.clear();
hrf.add(HarvestedRecordFormatEnum.BLIND_AUDIO);
return hrf;
}
}
HarvestedRecordFormatEnum video = getVideoDocument();
if (video != null) {
if (isVideoDVD()) {
if (!hrf.contains(HarvestedRecordFormatEnum.VIDEO_DVD))
hrf.add(HarvestedRecordFormatEnum.VIDEO_DVD);
} else
hrf.add(video);
}
if (isComputerCarrier())
hrf.add(HarvestedRecordFormatEnum.OTHER_COMPUTER_CARRIER);
if (isOthers())
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
if (hrf.isEmpty())
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
return hrf;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.
the class HashingDedupKeyParser method parse.
@Override
public HarvestedRecord parse(HarvestedRecord record, MetadataRecord metadataRecord) throws DedupKeyParserException {
record.setShouldBeProcessed(metadataRecord.matchFilter());
// not dedup key
record.setUpvApplicationId(metadataRecord.getUpvApplicationId());
if (!record.getHarvestedFrom().isGenerateDedupKeys()) {
return record;
}
boolean dedupKeysChanged = false;
boolean oaiTimestampChanged = false;
DedupKeysencapsulator encapsulator = new DedupKeysencapsulator();
List<Title> titles = new ArrayList<>();
for (Title title : metadataRecord.getTitle()) {
title.setTitleStr(MetadataUtils.normalizeAndShorten(title.getTitleStr(), EFFECTIVE_TITLE_LENGTH));
if (title.getTitleStr().isEmpty())
continue;
if (!titles.contains(title)) {
titles.add(title);
}
}
encapsulator.setTitles(titles);
List<ShortTitle> shortTitles = new ArrayList<>();
for (ShortTitle shortTitle : metadataRecord.getShortTitles()) {
shortTitle.setShortTitleStr(MetadataUtils.normalizeAndShorten(shortTitle.getShortTitleStr(), EFFECTIVE_TITLE_LENGTH));
if (shortTitle.getShortTitleStr().isEmpty())
continue;
if (!shortTitles.contains(shortTitle)) {
shortTitles.add(shortTitle);
}
}
encapsulator.setShortTitles(shortTitles);
encapsulator.setIsbns(metadataRecord.getISBNs());
encapsulator.setIssns(metadataRecord.getISSNs());
encapsulator.setIsmns(metadataRecord.getISMNs());
encapsulator.setCnbs(metadataRecord.getCNBs());
encapsulator.setPublicationYear(metadataRecord.getPublicationYear());
List<HarvestedRecordFormatEnum> formatEnums = metadataRecord.getDetectedFormatList();
encapsulator.setFormats(harvestedRecordFormatDAO.getFormatsFromEnums(formatEnums));
encapsulator.setAuthorAuthKey(MetadataUtils.shorten(metadataRecord.getAuthorAuthKey(), EFFECTIVE_AUTHOR_AUTH_KEY_LENGTH));
encapsulator.setAuthorString(MetadataUtils.normalizeAndShorten(metadataRecord.getAuthorString(), EFFECTIVE_AUTHOR_LENGTH));
encapsulator.setScale(metadataRecord.getScale());
encapsulator.setUuid(metadataRecord.getUUId());
encapsulator.setPages(metadataRecord.getPageCount());
encapsulator.setIssnSeries(MetadataUtils.normalize(metadataRecord.getISSNSeries()));
encapsulator.setIssnSeriesOrder(MetadataUtils.normalize(metadataRecord.getISSNSeriesOrder()));
encapsulator.setOclcs(metadataRecord.getOclcs());
encapsulator.setClusterId(metadataRecord.getClusterId());
encapsulator.setRaw001Id(metadataRecord.getRaw001Id());
encapsulator.setSourceInfoT(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoT(), EFFECTIVE_SOURCE_INFO_LENGTH));
encapsulator.setSourceInfoG(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoG(), EFFECTIVE_SOURCE_INFO_LENGTH));
encapsulator.setSourceInfoX(MetadataUtils.normalizeAndShorten(metadataRecord.getSourceInfoX(), EFFECTIVE_LENGTH_30));
encapsulator.setEans(metadataRecord.getEANs());
encapsulator.setPublisherNumbers(metadataRecord.getPublisherNumber());
encapsulator.setLanguages(new HashSet<>(metadataRecord.getLanguages()));
String computedHash = computeHashValue(encapsulator);
String oldHash = record.getDedupKeysHash();
String temporalHash = record.getTemporalDedupHash() == null ? "0000000000000000000000000000000000000000" : record.getTemporalDedupHash();
// during one batch
if ((!temporalHash.equals(computedHash)) && (oldHash == null || oldHash.isEmpty() || !computedHash.equals(oldHash))) {
// keys changed, updated in database
dedupKeysChanged = true;
// drop old keys
harvestedRecordDao.dropDedupKeys(record);
if (record.getHarvestedFrom() != null)
record.setWeight(metadataRecord.getWeight(record.getHarvestedFrom().getBaseWeight()));
// assign new keys
record.setTitles(encapsulator.getTitles());
record.setIsbns(encapsulator.getIsbns());
record.setIssns(encapsulator.getIssns());
record.setIsmns(encapsulator.getIsmns());
record.setCnb(encapsulator.getCnbs());
record.setPublicationYear(encapsulator.getPublicationYear());
record.setPhysicalFormats(harvestedRecordFormatDAO.getFormatsFromEnums(formatEnums));
record.setAuthorAuthKey(encapsulator.getAuthorAuthKey());
record.setAuthorString(encapsulator.getAuthorString());
record.setScale(encapsulator.getScale());
record.setUuid(encapsulator.getUuid());
record.setPages(encapsulator.getPages());
record.setIssnSeries(encapsulator.getIssnSeries());
record.setIssnSeriesOrder(encapsulator.getIssnSeriesOrder());
record.setOclcs(encapsulator.getOclcs());
record.setLanguages(metadataRecord.getLanguages());
record.setClusterId(encapsulator.getClusterId());
record.setRaw001Id(encapsulator.getRaw001Id());
record.setSourceInfoG(encapsulator.getSourceInfoG());
record.setSourceInfoX(encapsulator.getSourceInfoX());
record.setSourceInfoT(encapsulator.getSourceInfoT());
record.setEans(encapsulator.getEans());
record.setShortTitles(encapsulator.getShortTitles());
record.setPublisherNumbers(metadataRecord.getPublisherNumber());
record.setTemporalDedupHash(computedHash);
}
record.setDedupKeysHash(computedHash);
if (record.getOaiTimestamp() != null && record.getTemporalOldOaiTimestamp() != null && !record.getOaiTimestamp().equals(record.getTemporalOldOaiTimestamp())) {
oaiTimestampChanged = true;
} else {
oaiTimestampChanged = false;
}
// decide whether record should be deduplicated
if (dedupKeysChanged) {
// new record or change in keys
record.setNextDedupFlag(true);
} else {
// key are equal
if (oaiTimestampChanged) {
// neither keys neither oai timestamp changed,
// don't deduplicate
record.setNextDedupFlag(false);
} else {
// keys are same but timestamp changed
// keep previsous dedup flag
// this may happen during repeated harvesting before
}
}
return record;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum in project RecordManager2 by moravianlibrary.
the class MarcRecordImplTest method getDetectedFormatListTest.
@Test
public void getDetectedFormatListTest() throws Exception {
MarcRecordImpl mri;
MetadataRecord metadataRecord;
List<String> data = new ArrayList<>();
List<HarvestedRecordFormatEnum> hrf = new ArrayList<>();
// Books
data.add("000 00000000");
data.add("006 a");
hrf.add(HarvestedRecordFormatEnum.BOOKS);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Periodicals
data.add("000 0000000i");
hrf.add(HarvestedRecordFormatEnum.PERIODICALS);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Articles
data.add("000 0000000a");
hrf.add(HarvestedRecordFormatEnum.ARTICLES);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Maps
data.add("000 000000000");
data.add("245 $hkartografický dokument");
hrf.add(HarvestedRecordFormatEnum.MAPS);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Musical
data.add("000 00000000");
data.add("336 $bntv");
hrf.add(HarvestedRecordFormatEnum.MUSICAL_SCORES);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Vusial documents
data.add("000 00000000");
data.add("338 $bgasd");
hrf.add(HarvestedRecordFormatEnum.VISUAL_DOCUMENTS);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Microforms
data.add("000 0000000");
data.add("337 $bh");
hrf.add(HarvestedRecordFormatEnum.OTHER_MICROFORMS);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Braill
data.add("000 00000000");
data.add("007 fb");
data.add("245 $hhmatové písmo");
hrf.add(HarvestedRecordFormatEnum.BLIND_BRAILLE);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Audio
data.add("000 00000000");
data.add("300 $fanaloaSg$amagnetofonová kazeta");
hrf.add(HarvestedRecordFormatEnum.AUDIO_CASSETTE);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Video
data.add("000 000000000");
data.add("007 vlllv");
hrf.add(HarvestedRecordFormatEnum.VIDEO_DVD);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Kit
data.add("000 00000000");
data.add("006 o");
data.add("007 o");
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Object
data.add("000 00000000");
data.add("008 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzd");
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Mix document
data.add("000 000000p");
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Unspecified
data.add("000 00000000");
data.add("337 $bx");
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
// Nothing
data.add("000 00000000");
hrf.add(HarvestedRecordFormatEnum.OTHER_OTHER);
mri = MarcRecordFactory.recordFactory(data);
metadataRecord = metadataFactory.getMetadataRecord(mri);
Assert.assertEquals(metadataRecord.getDetectedFormatList().toString(), hrf.toString());
data.clear();
hrf.clear();
}
Aggregations