Search in sources :

Example 36 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class RegenerateDedupKeysTest method simpleTest.

@Test
public void simpleTest() throws Exception {
    Job job = jobRegistry.getJob(Constants.JOB_ID_REGEN_DEDUP_KEYS);
    JobParameters jobParams = new JobParameters();
    jobLauncher.run(job, jobParams);
    HarvestedRecord rec1 = harvestedRecordDao.get(21L);
    HarvestedRecord rec2 = harvestedRecordDao.get(22L);
    Assert.assertNotNull(rec1);
    Assert.assertNotNull(rec2);
    Assert.assertEquals(rec1.getPublicationYear(), new Long(1993));
    Assert.assertEquals(rec2.getPublicationYear(), new Long(1929));
}
Also used : JobParameters(org.springframework.batch.core.JobParameters) Job(org.springframework.batch.core.Job) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 37 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ImportRecordsJobTest method testSimpleImportISO2709.

@Test
public void testSimpleImportISO2709() throws Exception {
    Job job = jobRegistry.getJob(Constants.JOB_ID_IMPORT);
    Map<String, JobParameter> params = new HashMap<>();
    params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(300L));
    params.put(Constants.JOB_PARAM_IN_FILE, new JobParameter(testFileISO1));
    params.put(Constants.JOB_PARAM_FORMAT, new JobParameter("iso"));
    JobParameters jobParams = new JobParameters(params);
    jobLauncher.run(job, jobParams);
    HarvestedRecord insertedRecord = harvestedRecordDao.findByIdAndHarvestConfiguration("000000146", 300L);
    Assert.assertNotNull(insertedRecord);
}
Also used : HashMap(java.util.HashMap) JobParameters(org.springframework.batch.core.JobParameters) Job(org.springframework.batch.core.Job) JobParameter(org.springframework.batch.core.JobParameter) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 38 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class OAIHarvestJobTest method testDeleteRecord.

@Test
public void testDeleteRecord() throws Exception {
    reset(httpClient);
    InputStream response0 = this.getClass().getResourceAsStream("/sample/IdentifyNonstandardGranularity.xml");
    InputStream response1 = this.getClass().getResourceAsStream("/sample/ListRecordsNLKdeleted1.xml");
    InputStream response2 = this.getClass().getResourceAsStream("/sample/ListRecordsNLKdeleted2.xml");
    expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=Identify")).andReturn(response0);
    expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=ListRecords&metadataPrefix=marc21")).andReturn(response1);
    expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=ListRecords&resumptionToken=xaiutmvy00003")).andReturn(response2);
    replay(httpClient);
    final Long confID = 301L;
    Map<String, JobParameter> params = new HashMap<>();
    params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(confID));
    JobExecution exec = jobExecutor.execute(Constants.JOB_ID_HARVEST, new JobParameters(params));
    Assert.assertEquals(exec.getExitStatus(), ExitStatus.COMPLETED);
    OAIHarvestConfiguration config = configDao.get(confID);
    HarvestedRecord record = recordDao.findByIdAndHarvestConfiguration("111111", config);
    Assert.assertNotNull(record, "Record not stored.");
    Assert.assertNotNull(record.getDeleted());
}
Also used : JobExecution(org.springframework.batch.core.JobExecution) OAIHarvestConfiguration(cz.mzk.recordmanager.server.model.OAIHarvestConfiguration) HashMap(java.util.HashMap) InputStream(java.io.InputStream) JobParameters(org.springframework.batch.core.JobParameters) JobParameter(org.springframework.batch.core.JobParameter) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 39 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ImportRecordsWriter method writeInner.

protected void writeInner(List<? extends List<Record>> items) throws Exception {
    for (List<Record> records : items) {
        for (Record currentRecord : records) {
            try {
                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
                marcWriter.setConverter(ISOCharConvertor.INSTANCE);
                marcWriter.write(currentRecord);
                marcWriter.close();
                // need recordId before interception
                byte[] recordContent = outStream.toByteArray();
                MetadataRecord metadata = parseMetadata(recordContent);
                String recordId = metadata.getUniqueId();
                if (regexpExtractor != null) {
                    recordId = regexpExtractor.extract(recordId);
                }
                if (harvestConfiguration.isInterceptionEnabled()) {
                    MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
                    if (interceptor != null) {
                        byte[] recordContentNew = interceptor.intercept();
                        if (!Arrays.equals(recordContent, recordContentNew)) {
                            // if record content was changed, parse metadata again
                            metadata = parseMetadata(recordContentNew);
                            // set intercepted content
                            recordContent = recordContentNew;
                        }
                    }
                }
                HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
                if (hr == null) {
                    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
                    hr = new HarvestedRecord(id);
                    // TODO detect format
                    hr.setFormat("marc21-xml");
                    hr.setHarvestedFrom(harvestConfiguration);
                }
                hr.setUpdated(new Date());
                hr.setDeleted(null);
                hr.setRawRecord(recordContent);
                harvestedRecordDao.persist(hr);
                dedupKeysParser.parse(hr, metadata);
                if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
                    logger.debug("Filtered record: " + hr.getUniqueId());
                    hr.setDeleted(new Date());
                }
                harvestedRecordDao.persist(hr);
                progress.incrementAndLogProgress();
            } catch (Exception e) {
                logger.warn("Error occured in processing record");
                throw e;
            }
        }
    }
}
Also used : MarcRecordInterceptor(cz.mzk.recordmanager.server.marc.intercepting.MarcRecordInterceptor) HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) MarcXmlWriter(org.marc4j.MarcXmlWriter) Date(java.util.Date) MarcWriter(org.marc4j.MarcWriter) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 40 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class DedupSkatKeysProcessor method process.

@Override
public List<HarvestedRecord> process(List<Long> item) throws Exception {
    if (item == null || item.size() < 2) {
        return Collections.emptyList();
    }
    // get skatRecord from list
    HarvestedRecord skatRec = harvestedRecordDao.get(item.get(0));
    // get other records from list
    Set<HarvestedRecord> ordinaryRecords = new HashSet<>();
    item.subList(1, item.size()).stream().forEach(i -> ordinaryRecords.add(harvestedRecordDao.get(i)));
    List<Title> expectedTitles = skatRec.getTitles();
    Set<HarvestedRecord> toBeMerged = new HashSet<>();
    // decision is based on similarity of titles
    for (HarvestedRecord currentRec : ordinaryRecords) {
        for (Title currentTitle : currentRec.getTitles()) {
            for (Title expectedTitle : expectedTitles) {
                if (StringUtils.simmilarTitleMatch(currentTitle, expectedTitle, 70, 8)) {
                    toBeMerged.add(currentRec);
                }
            }
        }
    }
    if (toBeMerged.isEmpty()) {
        return Collections.emptyList();
    }
    List<Long> tobeMergedIds = new ArrayList<>();
    tobeMergedIds.add(skatRec.getId());
    toBeMerged.stream().forEach(r -> tobeMergedIds.add(r.getId()));
    // pass ids to be merged into parent
    return super.process(tobeMergedIds);
}
Also used : ArrayList(java.util.ArrayList) Title(cz.mzk.recordmanager.server.model.Title) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) HashSet(java.util.HashSet)

Aggregations

HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)60 Test (org.testng.annotations.Test)20 InputStream (java.io.InputStream)19 ByteArrayInputStream (java.io.ByteArrayInputStream)18 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)16 HashMap (java.util.HashMap)16 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)15 Date (java.util.Date)14 JobParameters (org.springframework.batch.core.JobParameters)14 HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)12 JobParameter (org.springframework.batch.core.JobParameter)12 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)9 Record (org.marc4j.marc.Record)9 JobExecution (org.springframework.batch.core.JobExecution)8 Job (org.springframework.batch.core.Job)6 OAIHarvestConfiguration (cz.mzk.recordmanager.server.model.OAIHarvestConfiguration)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)4 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)3