|
1 | 1 | """Model classes for the dds_glossary package.""" |
2 | 2 |
|
3 | 3 | from abc import abstractmethod |
4 | | -from collections import defaultdict |
5 | 4 | from typing import ClassVar |
6 | 5 |
|
7 | 6 | from pydantic import BaseModel |
|
10 | 9 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship |
11 | 10 |
|
12 | 11 | from .enums import MemberType, SemanticRelationType |
| 12 | +from .xml import ( |
| 13 | + get_element_attribute, |
| 14 | + get_sub_element_as_str, |
| 15 | + get_sub_element_attributes, |
| 16 | + get_sub_elements_as_dict, |
| 17 | + get_sub_elements_as_dict_of_lists, |
| 18 | +) |
13 | 19 |
|
14 | 20 |
|
15 | 21 | class Dataset(BaseModel): |
@@ -43,28 +49,10 @@ class Base(DeclarativeBase): |
43 | 49 | dict[str, str]: JSONB, |
44 | 50 | dict[str, list[str]]: JSONB, |
45 | 51 | } |
46 | | - xml_namespace: ClassVar[str] = "{http://www.w3.org/XML/1998/namespace}" |
47 | 52 |
|
48 | 53 | def __eq__(self, other: object) -> bool: |
49 | 54 | return self.to_dict() == other.to_dict() # type: ignore |
50 | 55 |
|
51 | | - @staticmethod |
52 | | - def get_sub_element_text(element, tag: str, default_value: str = "") -> str: |
53 | | - """ |
54 | | - Get a sub element text from the XML element if tag exists, else return |
55 | | - default_value. |
56 | | -
|
57 | | - Args: |
58 | | - element (ElementBase): The XML element to parse. |
59 | | - tag (str): The tag to search for. |
60 | | - default_value (str): The default value to return if the tag does not exist. |
61 | | -
|
62 | | - Returns: |
63 | | - str: The sub element text if the tag exists, else the default value. |
64 | | - """ |
65 | | - sub_element = element.find(tag, namespaces=element.nsmap) |
66 | | - return sub_element.text if sub_element is not None else default_value |
67 | | - |
68 | 56 | @staticmethod |
69 | 57 | def get_in_language(attribute: dict, lang: str = "en") -> str: |
70 | 58 | """ |
@@ -158,13 +146,10 @@ def from_xml_element(cls, element) -> "ConceptScheme": |
158 | 146 | ConceptScheme: The parsed ConceptScheme instance. |
159 | 147 | """ |
160 | 148 | return ConceptScheme( |
161 | | - iri=element.get(f"{{{element.nsmap['rdf']}}}about"), |
162 | | - notation=cls.get_sub_element_text(element, "core:notation"), |
163 | | - scopeNote=cls.get_sub_element_text(element, "core:scopeNote"), |
164 | | - prefLabels={ |
165 | | - label.get(f"{cls.xml_namespace}lang"): label.text |
166 | | - for label in element.findall("core:prefLabel", namespaces=element.nsmap) |
167 | | - }, |
| 149 | + iri=get_element_attribute(element, "about"), |
| 150 | + notation=get_sub_element_as_str(element, "core:notation"), |
| 151 | + scopeNote=get_sub_element_as_str(element, "core:scopeNote"), |
| 152 | + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), |
168 | 153 | ) |
169 | 154 |
|
170 | 155 | def to_dict(self, lang: str = "en") -> dict: |
@@ -242,12 +227,7 @@ def get_concept_schemes( |
242 | 227 | Returns: |
243 | 228 | list[ConceptScheme]: The concept schemes to which the member belongs. |
244 | 229 | """ |
245 | | - scheme_iris = [ |
246 | | - scheme_element.get(f"{{{element.nsmap['rdf']}}}resource") |
247 | | - for scheme_element in element.findall( |
248 | | - "core:inScheme", namespaces=element.nsmap |
249 | | - ) |
250 | | - ] |
| 230 | + scheme_iris = get_sub_element_attributes(element, "core:inScheme", "resource") |
251 | 231 | return [ |
252 | 232 | concept_scheme |
253 | 233 | for concept_scheme in concept_schemes |
@@ -316,17 +296,11 @@ def from_xml_element( |
316 | 296 | Collection: The parsed Collection instance. |
317 | 297 | """ |
318 | 298 | return Collection( |
319 | | - iri=element.get(f"{{{element.nsmap['rdf']}}}about"), |
320 | | - notation=cls.get_sub_element_text(element, "core:notation"), |
321 | | - prefLabels={ |
322 | | - label.get(f"{cls.xml_namespace}lang"): label.text |
323 | | - for label in element.findall("core:prefLabel", namespaces=element.nsmap) |
324 | | - }, |
| 299 | + iri=get_element_attribute(element, "about"), |
| 300 | + notation=get_sub_element_as_str(element, "core:notation"), |
| 301 | + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), |
325 | 302 | concept_schemes=cls.get_concept_schemes(element, concept_schemes), |
326 | | - member_iris=[ |
327 | | - member.get(f"{{{element.nsmap['rdf']}}}resource") |
328 | | - for member in element.findall("core:member", namespaces=element.nsmap) |
329 | | - ], |
| 303 | + member_iris=get_sub_element_attributes(element, "core:member", "resource"), |
330 | 304 | ) |
331 | 305 |
|
332 | 306 | def resolve_members_from_xml(self, members: list[Member]) -> None: |
@@ -399,22 +373,13 @@ def from_xml_element( |
399 | 373 | Returns: |
400 | 374 | Concept: The parsed Concept instance. |
401 | 375 | """ |
402 | | - alt_labels = defaultdict(list) |
403 | | - for label in element.findall("core:altLabel", namespaces=element.nsmap): |
404 | | - alt_labels[label.get(f"{cls.xml_namespace}lang")].append(label.text) |
405 | 376 | return Concept( |
406 | | - iri=element.get(f"{{{element.nsmap['rdf']}}}about"), |
407 | | - identifier=cls.get_sub_element_text(element, "x_1.1:identifier"), |
408 | | - notation=cls.get_sub_element_text(element, "core:notation"), |
409 | | - prefLabels={ |
410 | | - label.get(f"{cls.xml_namespace}lang"): label.text |
411 | | - for label in element.findall("core:prefLabel", namespaces=element.nsmap) |
412 | | - }, |
413 | | - altLabels=alt_labels, |
414 | | - scopeNotes={ |
415 | | - note.get(f"{cls.xml_namespace}lang"): note.text |
416 | | - for note in element.findall("core:scopeNote", namespaces=element.nsmap) |
417 | | - }, |
| 377 | + iri=get_element_attribute(element, "about"), |
| 378 | + identifier=get_sub_element_as_str(element, "x_1.1:identifier"), |
| 379 | + notation=get_sub_element_as_str(element, "core:notation"), |
| 380 | + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), |
| 381 | + altLabels=get_sub_elements_as_dict_of_lists(element, "core:altLabel"), |
| 382 | + scopeNotes=get_sub_elements_as_dict(element, "core:scopeNote"), |
418 | 383 | concept_schemes=cls.get_concept_schemes(element, concept_schemes), |
419 | 384 | ) |
420 | 385 |
|
@@ -518,16 +483,19 @@ def from_xml_element(cls, element) -> list["SemanticRelation"]: |
518 | 483 | Returns: |
519 | 484 | list[SemanticRelation]: The parsed list of SemanticRelation instances. |
520 | 485 | """ |
| 486 | + relations: dict[SemanticRelationType, list[str]] = {} |
| 487 | + for relation_type in SemanticRelationType: |
| 488 | + relations[relation_type] = get_sub_element_attributes( |
| 489 | + element, f"core:{relation_type.value}", "resource" |
| 490 | + ) |
521 | 491 | return [ |
522 | 492 | SemanticRelation( |
523 | 493 | type=relation_type, |
524 | | - source_concept_iri=element.get(f"{{{element.nsmap['rdf']}}}about"), |
525 | | - target_concept_iri=relation.get(f"{{{element.nsmap['rdf']}}}resource"), |
526 | | - ) |
527 | | - for relation_type in SemanticRelationType |
528 | | - for relation in element.findall( |
529 | | - f"core:{relation_type.value}", namespaces=element.nsmap |
| 494 | + source_concept_iri=get_element_attribute(element, "about"), |
| 495 | + target_concept_iri=target_concept_iri, |
530 | 496 | ) |
| 497 | + for relation_type, target_concept_iris in relations.items() |
| 498 | + for target_concept_iri in target_concept_iris |
531 | 499 | ] |
532 | 500 |
|
533 | 501 | def to_dict(self) -> dict: |
|
0 commit comments