Source code for aleph.datastructures.semanticinfo
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
"""
Definition of structures, which are used to hold informations about
catalogization process.
"""
# Imports =====================================================================
from collections import namedtuple
from remove_hairs import remove_hairs
from marcxml_parser import MARCXMLRecord
# Functions ===================================================================
def _parse_summaryRecordSysNumber(summaryRecordSysNumber):
"""
Try to parse vague, not likely machine-readable description and return
first token, which contains enough numbers in it.
"""
def number_of_digits(token):
digits = filter(lambda x: x.isdigit(), token)
return len(digits)
tokens = map(
lambda x: remove_hairs(x, r" .,:;<>(){}[]\/"),
summaryRecordSysNumber.split()
)
# pick only tokens that contains 3 digits
contains_digits = filter(lambda x: number_of_digits(x) > 3, tokens)
if not contains_digits:
return ""
return contains_digits[0]
# Structures ==================================================================
[docs]class SemanticInfo(namedtuple("SemanticInfo", ["hasAcquisitionFields",
"hasISBNAgencyFields",
"hasDescriptiveCatFields",
"hasDescriptiveCatReviewFields",
"hasSubjectCatFields",
"hasSubjectCatReviewFields",
"isClosed",
"isSummaryRecord",
"contentOfFMT",
"parsedSummaryRecordSysNumber",
"summaryRecordSysNumber"])):
"""
This structure is used to represent informations about export progress in
Aleph.
It contains informations about state of the record, so it can be tracked
from edeposit project.
See :func:`.toSemanticInfo` for details of parsing of those attributes.
Attributes:
hasAcquisitionFields (bool): Was the record aproved by acquisition?
hasISBNAgencyFields (bool): Was the record approved by ISBN agency?
hasDescriptiveCatFields (bool): Did the record get thru name
description (jmenný popis).
hasDescriptiveCatReviewFields (bool): Did the record get thru name
revision (jmenná revize).
hasSubjectCatFields (bool): Did the record get thru subject
description (věcný popis).
hasSubjectCatReviewFields (bool): Did the record get thru subject
revision (věcná revize).
isClosed (bool): Was the record closed? This sometimes happen when bad
ISBN is given by creator of the record, but different is in the
book.
isSummaryRecord (bool): Is the content of FMT == "SE"?
contentOfFMT (str, default ""): Content of FMT subrecord.
parsedSummaryRecordSysNumber (str): Same as
:attr:`summaryRecordSysNumber` but without natural language
details.
summaryRecordSysNumber (str): Identificator of the new record if
`.isClosed` is True. Format of the string is not specified and can
be different for each record.
"""
@staticmethod
[docs] def from_xml(xml):
"""
Pick informations from :class:`.MARCXMLRecord` object and use it to build
:class:`.SemanticInfo` structure.
Args:
xml (str/MARCXMLRecord): MarcXML which will be converted to
SemanticInfo. In case of str, ``<record>`` tag is required.
Returns:
structure: :class:`.SemanticInfo`.
"""
hasAcquisitionFields = False
hasISBNAgencyFields = False
hasDescriptiveCatFields = False
hasDescriptiveCatReviewFields = False
hasSubjectCatFields = False
hasSubjectCatReviewFields = False
isClosed = False
summaryRecordSysNumber = ""
parsedSummaryRecordSysNumber = ""
isSummaryRecord = False
contentOfFMT = ""
parsed = xml
if not isinstance(xml, MARCXMLRecord):
parsed = MARCXMLRecord(str(xml))
# handle FMT record
if "FMT" in parsed.controlfields:
contentOfFMT = parsed["FMT"]
if contentOfFMT == "SE":
isSummaryRecord = True
if "HLD" in parsed.datafields or "HLD" in parsed.controlfields:
hasAcquisitionFields = True
# look for catalogization fields
for status in parsed["ISTa"]:
status = status.replace(" ", "") # remove spaces
if status.startswith("jp2"):
hasDescriptiveCatFields = True
elif status.startswith("jr2"):
hasDescriptiveCatReviewFields = True
elif status.startswith("vp"):
hasSubjectCatFields = True
elif status.startswith("vr"):
hasSubjectCatReviewFields = True
elif status.startswith("ii2"):
hasISBNAgencyFields = True
# look whether the record was 'closed' by catalogizators
for status in parsed["BASa"]:
if status == "90":
isClosed = True
# detect link to 'new' record, if the old one was 'closed'
for status in parsed["PJMa"]:
if status:
summaryRecordSysNumber = status
parsedSummaryRecordSysNumber = _parse_summaryRecordSysNumber(
summaryRecordSysNumber
)
break
return SemanticInfo(
hasAcquisitionFields=hasAcquisitionFields,
hasISBNAgencyFields=hasISBNAgencyFields,
hasDescriptiveCatFields=hasDescriptiveCatFields,
hasDescriptiveCatReviewFields=hasDescriptiveCatReviewFields,
hasSubjectCatFields=hasSubjectCatFields,
hasSubjectCatReviewFields=hasSubjectCatReviewFields,
isClosed=isClosed,
isSummaryRecord=isSummaryRecord,
contentOfFMT=contentOfFMT,
parsedSummaryRecordSysNumber=parsedSummaryRecordSysNumber,
summaryRecordSysNumber=summaryRecordSysNumber,
)