Search in sources :

Example 6 with Title

use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.

the class HashingDedupKeyParser method computeHashValue.

/**
 * Compute SHA1 hash of deduplication keys from given {@link DedupKeysencapsulator}
 * @param encapsulator
 * @return
 */
protected String computeHashValue(final DedupKeysencapsulator encapsulator) {
    try {
        // change of hash function also requires changes in database row
        MessageDigest md = MessageDigest.getInstance("SHA-1");
        for (Title t : encapsulator.getTitles()) {
            md.update(t.getTitleStr().getBytes("utf-8"));
        }
        for (Isbn i : encapsulator.getIsbns()) {
            md.update(i.getIsbn().byteValue());
        }
        for (Issn i : encapsulator.getIssns()) {
            md.update(i.getIssn().getBytes());
        }
        for (Ismn i : encapsulator.getIsmns()) {
            md.update(i.getIsmn().byteValue());
        }
        for (Cnb c : encapsulator.getCnbs()) {
            md.update(c.getCnb().getBytes());
        }
        if (encapsulator.getPublicationYear() != null) {
            md.update(encapsulator.getPublicationYear().byteValue());
        }
        for (HarvestedRecordFormat hrfe : encapsulator.getFormats()) {
            md.update(hrfe.getName().getBytes());
        }
        if (encapsulator.getAuthorAuthKey() != null) {
            md.update(encapsulator.getAuthorAuthKey().getBytes());
        }
        if (encapsulator.getAuthorString() != null) {
            md.update(encapsulator.getAuthorString().getBytes());
        }
        if (encapsulator.getScale() != null) {
            md.update(encapsulator.getScale().byteValue());
        }
        if (encapsulator.getUuid() != null) {
            md.update(encapsulator.getUuid().getBytes());
        }
        if (encapsulator.getPages() != null) {
            md.update(encapsulator.getPages().byteValue());
        }
        if (encapsulator.getIssnSeries() != null) {
            md.update(encapsulator.getIssnSeries().getBytes());
        }
        if (encapsulator.getIssnSeriesOrder() != null) {
            md.update(encapsulator.getIssnSeriesOrder().getBytes());
        }
        for (Oclc o : encapsulator.getOclcs()) {
            md.update(o.getOclcStr().getBytes());
        }
        for (String l : encapsulator.getLanguages()) {
            md.update(l.getBytes());
        }
        if (encapsulator.getClusterId() != null) {
            md.update(encapsulator.getClusterId().getBytes());
        }
        if (encapsulator.getRaw001Id() != null) {
            md.update(encapsulator.getRaw001Id().getBytes());
        }
        if (encapsulator.getSourceInfoT() != null) {
            md.update(encapsulator.getSourceInfoT().getBytes());
        }
        if (encapsulator.getSourceInfoX() != null) {
            md.update(encapsulator.getSourceInfoX().getBytes());
        }
        if (encapsulator.getSourceInfoG() != null) {
            md.update(encapsulator.getSourceInfoG().getBytes());
        }
        for (Ean ean : encapsulator.getEans()) {
            md.update(ean.getEan().byteValue());
        }
        for (PublisherNumber publisherNumber : encapsulator.getPublisherNumbers()) {
            md.update(publisherNumber.getPublisherNumber().getBytes("utf-8"));
        }
        for (ShortTitle st : encapsulator.getShortTitles()) {
            md.update(st.getShortTitleStr().getBytes("utf-8"));
        }
        byte[] hash = md.digest();
        StringBuilder sb = new StringBuilder();
        for (byte b : hash) {
            sb.append(String.format("%02x", b));
        }
        return sb.toString();
    } catch (NoSuchAlgorithmException e) {
    // should never be thrown, SHA-1 is required by Java specification
    } catch (UnsupportedEncodingException uee) {
        throw new DedupKeyParserException("Uncoding problems in hash computation", uee);
    }
    return "";
}
Also used : Issn(cz.mzk.recordmanager.server.model.Issn) Title(cz.mzk.recordmanager.server.model.Title) ShortTitle(cz.mzk.recordmanager.server.model.ShortTitle) Ismn(cz.mzk.recordmanager.server.model.Ismn) UnsupportedEncodingException(java.io.UnsupportedEncodingException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Oclc(cz.mzk.recordmanager.server.model.Oclc) Ean(cz.mzk.recordmanager.server.model.Ean) Isbn(cz.mzk.recordmanager.server.model.Isbn) ShortTitle(cz.mzk.recordmanager.server.model.ShortTitle) Cnb(cz.mzk.recordmanager.server.model.Cnb) HarvestedRecordFormat(cz.mzk.recordmanager.server.model.HarvestedRecordFormat) MessageDigest(java.security.MessageDigest) PublisherNumber(cz.mzk.recordmanager.server.model.PublisherNumber)

Example 7 with Title

use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.

the class MarcXmlParserTest method testRecordNLK.

/**
 * test record with Alphanumeric field names
 */
@Test
public void testRecordNLK() {
    InputStream is = this.getClass().getResourceAsStream("/records/marcxml/NLK-192047.xml");
    MarcRecord marc = parser.parseRecord(is);
    MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
    Assert.assertNotNull(marc);
    Assert.assertEquals(marc.getField("020", 'a'), "3-540-08474-6");
    Assert.assertEquals(marc.getField("HGS", 'a'), "0");
    Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(1978));
    Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
    Title expectedTitle = new Title();
    expectedTitle.setTitleStr("Cardiomyopathy and myocardial biopsy /");
    expectedTitle.setOrderInRecord(1L);
    Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
}
Also used : InputStream(java.io.InputStream) Title(cz.mzk.recordmanager.server.model.Title) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 8 with Title

use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.

the class MarcXmlParserTest method testRecordKFBZ.

@Test
public void testRecordKFBZ() {
    InputStream is = this.getClass().getResourceAsStream("/records/marcxml/KFBZ-kpw0120405.xml");
    MarcRecord marc = parser.parseRecord(is);
    MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
    Assert.assertNotNull(marc);
    Assert.assertEquals(marc.getField("020", 'a'), "80-200-0358-4");
    Title expectedTitle = new Title();
    expectedTitle.setTitleStr("Česká nedělní postila = Postilla " + "de tempore Bohemica : vyloženie svatých čtení nedělních /");
    expectedTitle.setOrderInRecord(1L);
    Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
    Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(1992));
    Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
}
Also used : InputStream(java.io.InputStream) Title(cz.mzk.recordmanager.server.model.Title) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 9 with Title

use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.

the class MarcXmlParserTest method testMZKRecordMZK.

@Test
public void testMZKRecordMZK() {
    InputStream is = this.getClass().getResourceAsStream("/records/marcxml/MZK01-001439241.xml");
    MarcRecord marc = parser.parseRecord(is);
    MetadataRecord metadataRecord = metadataFactory.getMetadataRecord(marc);
    Assert.assertNotNull(marc);
    Assert.assertEquals(marc.getField("040", 'a'), "BOA001");
    Title expectedTitle = new Title();
    expectedTitle.setTitleStr("Česká republika : města a obce " + "České republiky : tradice, historie, památky, " + "turistika, současnost /");
    expectedTitle.setOrderInRecord(1L);
    Assert.assertEquals(metadataRecord.getTitle().get(0), expectedTitle);
    List<String> fields650 = marc.getFields("650", " ", 'a', 'z');
    Assert.assertEquals(fields650.size(), 4);
    Assert.assertTrue(fields650.contains("obce Česko"));
    Assert.assertEquals(metadataRecord.getPublicationYear(), new Long(2014));
    Assert.assertEquals(metadataRecord.getDetectedFormatList().size(), 1);
    Assert.assertEquals(metadataRecord.getDetectedFormatList().get(0), HarvestedRecordFormatEnum.BOOKS);
    DataFieldMatcher matcher = field -> field.getIndicator1() == ' ' && field.getIndicator2() == '7';
    marc.getFields("072", matcher, " ", 'x');
}
Also used : MetadataRecordFactory(cz.mzk.recordmanager.server.metadata.MetadataRecordFactory) List(java.util.List) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) Assert(org.testng.Assert) Autowired(org.springframework.beans.factory.annotation.Autowired) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest) HarvestedRecordFormatEnum(cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum) Title(cz.mzk.recordmanager.server.model.Title) InputStream(java.io.InputStream) InputStream(java.io.InputStream) Title(cz.mzk.recordmanager.server.model.Title) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 10 with Title

use of cz.mzk.recordmanager.server.model.Title in project RecordManager2 by moravianlibrary.

the class DedupSkatKeysProcessor method process.

@Override
public List<HarvestedRecord> process(List<Long> item) throws Exception {
    if (item == null || item.size() < 2) {
        return Collections.emptyList();
    }
    // get skatRecord from list
    HarvestedRecord skatRec = harvestedRecordDao.get(item.get(0));
    // get other records from list
    Set<HarvestedRecord> ordinaryRecords = new HashSet<>();
    item.subList(1, item.size()).stream().forEach(i -> ordinaryRecords.add(harvestedRecordDao.get(i)));
    List<Title> expectedTitles = skatRec.getTitles();
    Set<HarvestedRecord> toBeMerged = new HashSet<>();
    // decision is based on similarity of titles
    for (HarvestedRecord currentRec : ordinaryRecords) {
        for (Title currentTitle : currentRec.getTitles()) {
            for (Title expectedTitle : expectedTitles) {
                if (StringUtils.simmilarTitleMatch(currentTitle, expectedTitle, 70, 8)) {
                    toBeMerged.add(currentRec);
                }
            }
        }
    }
    if (toBeMerged.isEmpty()) {
        return Collections.emptyList();
    }
    List<Long> tobeMergedIds = new ArrayList<>();
    tobeMergedIds.add(skatRec.getId());
    toBeMerged.stream().forEach(r -> tobeMergedIds.add(r.getId()));
    // pass ids to be merged into parent
    return super.process(tobeMergedIds);
}
Also used : ArrayList(java.util.ArrayList) Title(cz.mzk.recordmanager.server.model.Title) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) HashSet(java.util.HashSet)

Aggregations

Title (cz.mzk.recordmanager.server.model.Title)11 ShortTitle (cz.mzk.recordmanager.server.model.ShortTitle)4 Test (org.testng.annotations.Test)4 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)3 MetadataRecord (cz.mzk.recordmanager.server.metadata.MetadataRecord)3 InputStream (java.io.InputStream)3 ArrayList (java.util.ArrayList)3 Cnb (cz.mzk.recordmanager.server.model.Cnb)2 Ean (cz.mzk.recordmanager.server.model.Ean)2 HarvestedRecordFormat (cz.mzk.recordmanager.server.model.HarvestedRecordFormat)2 HarvestedRecordFormatEnum (cz.mzk.recordmanager.server.model.HarvestedRecordFormat.HarvestedRecordFormatEnum)2 Isbn (cz.mzk.recordmanager.server.model.Isbn)2 Ismn (cz.mzk.recordmanager.server.model.Ismn)2 Issn (cz.mzk.recordmanager.server.model.Issn)2 Oclc (cz.mzk.recordmanager.server.model.Oclc)2 DataField (org.marc4j.marc.DataField)2 MetadataRecordFactory (cz.mzk.recordmanager.server.metadata.MetadataRecordFactory)1 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)1 PublisherNumber (cz.mzk.recordmanager.server.model.PublisherNumber)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1