use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.
the class HashingDedupKeyParser method computeHashValue.
/**
* Compute SHA1 hash of deduplication keys from given {@link DedupKeysencapsulator}
* @param encapsulator
* @return
*/
protected String computeHashValue(final DedupKeysencapsulator encapsulator) {
try {
// change of hash function also requires changes in database row
MessageDigest md = MessageDigest.getInstance("SHA-1");
for (Title t : encapsulator.getTitles()) {
md.update(t.getTitleStr().getBytes("utf-8"));
}
for (Isbn i : encapsulator.getIsbns()) {
md.update(i.getIsbn().byteValue());
}
for (Issn i : encapsulator.getIssns()) {
md.update(i.getIssn().getBytes());
}
for (Ismn i : encapsulator.getIsmns()) {
md.update(i.getIsmn().byteValue());
}
for (Cnb c : encapsulator.getCnbs()) {
md.update(c.getCnb().getBytes());
}
if (encapsulator.getPublicationYear() != null) {
md.update(encapsulator.getPublicationYear().byteValue());
}
for (HarvestedRecordFormat hrfe : encapsulator.getFormats()) {
md.update(hrfe.getName().getBytes());
}
if (encapsulator.getAuthorAuthKey() != null) {
md.update(encapsulator.getAuthorAuthKey().getBytes());
}
if (encapsulator.getAuthorString() != null) {
md.update(encapsulator.getAuthorString().getBytes());
}
if (encapsulator.getScale() != null) {
md.update(encapsulator.getScale().byteValue());
}
if (encapsulator.getUuid() != null) {
md.update(encapsulator.getUuid().getBytes());
}
if (encapsulator.getPages() != null) {
md.update(encapsulator.getPages().byteValue());
}
if (encapsulator.getIssnSeries() != null) {
md.update(encapsulator.getIssnSeries().getBytes());
}
if (encapsulator.getIssnSeriesOrder() != null) {
md.update(encapsulator.getIssnSeriesOrder().getBytes());
}
for (Oclc o : encapsulator.getOclcs()) {
md.update(o.getOclcStr().getBytes());
}
for (String l : encapsulator.getLanguages()) {
md.update(l.getBytes());
}
if (encapsulator.getClusterId() != null) {
md.update(encapsulator.getClusterId().getBytes());
}
if (encapsulator.getRaw001Id() != null) {
md.update(encapsulator.getRaw001Id().getBytes());
}
if (encapsulator.getSourceInfoT() != null) {
md.update(encapsulator.getSourceInfoT().getBytes());
}
if (encapsulator.getSourceInfoX() != null) {
md.update(encapsulator.getSourceInfoX().getBytes());
}
if (encapsulator.getSourceInfoG() != null) {
md.update(encapsulator.getSourceInfoG().getBytes());
}
for (Ean ean : encapsulator.getEans()) {
md.update(ean.getEan().byteValue());
}
for (PublisherNumber publisherNumber : encapsulator.getPublisherNumbers()) {
md.update(publisherNumber.getPublisherNumber().getBytes("utf-8"));
}
for (ShortTitle st : encapsulator.getShortTitles()) {
md.update(st.getShortTitleStr().getBytes("utf-8"));
}
byte[] hash = md.digest();
StringBuilder sb = new StringBuilder();
for (byte b : hash) {
sb.append(String.format("%02x", b));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
// should never be thrown, SHA-1 is required by Java specification
} catch (UnsupportedEncodingException uee) {
throw new DedupKeyParserException("Uncoding problems in hash computation", uee);
}
return "";
}
use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.
the class MarcXmlParserTest method testRecordNLK.
/**
* test record with Alphanumeric field names
*/
@Test
public void testRecordNLK() {
InputStream is = this.getClass().getResourceAsStream("/records/marcxml/NLK-192047.xml");
MarcRecord marc = parser.parseRecord(is);
MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
Assert.assertNotNull(marc);
Assert.assertEquals(marc.getField("020", 'a'), "3-540-08474-6");
Assert.assertEquals(marc.getField("HGS", 'a'), "0");
Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(1978));
Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
Title expectedTitle = new Title();
expectedTitle.setTitleStr("Cardiomyopathy and myocardial biopsy /");
expectedTitle.setOrderInRecord(1L);
Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
}
use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.
the class MarcXmlParserTest method testRecordKFBZ.
@Test
public void testRecordKFBZ() {
InputStream is = this.getClass().getResourceAsStream("/records/marcxml/KFBZ-kpw0120405.xml");
MarcRecord marc = parser.parseRecord(is);
MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
Assert.assertNotNull(marc);
Assert.assertEquals(marc.getField("020", 'a'), "80-200-0358-4");
Title expectedTitle = new Title();
expectedTitle.setTitleStr("Česká nedělní postila = Postilla " + "de tempore Bohemica : vyloženie svatých čtení nedělních /");
expectedTitle.setOrderInRecord(1L);
Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(1992));
Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
}
use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.
the class MarcXmlParserTest method testMZKRecordMZK.
@Test
public void testMZKRecordMZK() {
InputStream is = this.getClass().getResourceAsStream("/records/marcxml/MZK01-001439241.xml");
MarcRecord marc = parser.parseRecord(is);
MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
Assert.assertNotNull(marc);
Assert.assertEquals(marc.getField("040", 'a'), "BOA001");
Title expectedTitle = new Title();
expectedTitle.setTitleStr("Česká republika : města a obce " + "České republiky : tradice, historie, památky, " + "turistika, současnost /");
expectedTitle.setOrderInRecord(1L);
Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
List<String> fields650 = marc.getFields("650", " ", 'a', 'z');
Assert.assertEquals(fields650.size(), 4);
Assert.assertTrue(fields650.contains("obce Česko"));
Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(2014));
Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
DataFieldMatcher matcher = field -> field.getIndicator1() == ' ' && field.getIndicator2() == '7';
marc.getFields("072", matcher, " ", 'x');
}
use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.
the class DedupSkatKeysProcessor method process.
@Override
public List<HarvestedRecord> process(List<Long> item) throws Exception {
if (item == null || item.size() < 2) {
return Collections.emptyList();
}
// get skatRecord from list
HarvestedRecord skatRec = harvestedRecordDao.get(item.get(0));
// get other records from list
Set<HarvestedRecord> ordinaryRecords = new HashSet<>();
item.subList(1, item.size()).stream().forEach(i -> ordinaryRecords.add(harvestedRecordDao.get(i)));
List<Title> expectedTitles = skatRec.getTitles();
Set<HarvestedRecord> toBeMerged = new HashSet<>();
// decision is based on similarity of titles
for (HarvestedRecord currentRec : ordinaryRecords) {
for (Title currentTitle : currentRec.getTitles()) {
for (Title expectedTitle : expectedTitles) {
if (StringUtils.simmilarTitleMatch(currentTitle, expectedTitle, 70, 8)) {
toBeMerged.add(currentRec);
}
}
}
}
if (toBeMerged.isEmpty()) {
return Collections.emptyList();
}
List<Long> tobeMergedIds = new ArrayList<>();
tobeMergedIds.add(skatRec.getId());
toBeMerged.stream().forEach(r -> tobeMergedIds.add(r.getId()));
// pass ids to be merged into parent
return super.process(tobeMergedIds);
}
Aggregations