diff --git a/dds_glossary/database.py b/dds_glossary/database.py index 4150116..327b809 100644 --- a/dds_glossary/database.py +++ b/dds_glossary/database.py @@ -7,7 +7,15 @@ from sqlalchemy.orm import Session, joinedload, with_polymorphic from sqlalchemy_utils import create_database, database_exists, drop_database -from .model import Base, Collection, Concept, ConceptScheme, Member, SemanticRelation +from .model import ( + Base, + Collection, + Concept, + ConceptScheme, + Member, + ParsedDataset, + SemanticRelation, +) def init_engine( @@ -47,34 +55,23 @@ def init_engine( return engine -def save_dataset( - engine: Engine, - concept_schemes: list[ConceptScheme], - concepts: list[Concept], - collections: list[Collection], - semantic_relations: list[SemanticRelation], -) -> None: +def save_dataset(engine: Engine, parsed_dataset: ParsedDataset) -> None: """ Save a dataset in the database. Args: engine (Engine): The database engine. - concept_schemes (list[ConceptScheme]): The concept schemes. - concepts (list[Concept]): The concepts. - collections (list[Collection]): The collections. - semantic_relations (list[SemanticRelation]): The semantic relations. + parsed_dataset (ParsedDataset): The parsed dataset. """ with Session(engine) as session: - session.add_all(concept_schemes) - session.add_all(concepts) - session.add_all(collections) - session.add_all(semantic_relations) - - members: list[Member] = [] - members.extend(concepts) - members.extend(collections) - for collection in collections: - collection.resolve_members_from_xml(members) + session.add_all(parsed_dataset.concept_schemes) + session.add_all(parsed_dataset.concepts) + session.add_all(parsed_dataset.collections) + session.add_all(parsed_dataset.semantic_relations) + session.commit() + + session.add_all(parsed_dataset.in_schemes) + session.add_all(parsed_dataset.in_collections) session.commit() diff --git a/dds_glossary/model.py b/dds_glossary/model.py index 75a6225..b07528d 100644 --- a/dds_glossary/model.py +++ b/dds_glossary/model.py @@ -1,24 +1,17 @@ """Model classes for the dds_glossary package.""" from abc import abstractmethod +from dataclasses import dataclass, field from typing import ClassVar -from pydantic import BaseModel -from sqlalchemy import Column, ForeignKey, String, Table from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship +from sqlalchemy.orm import DeclarativeBase, Mapped, Relationship, mapped_column +from sqlmodel import ForeignKey, SQLModel from .enums import MemberType, SemanticRelationType -from .xml import ( - get_element_attribute, - get_sub_element_as_str, - get_sub_element_attributes, - get_sub_elements_as_dict, - get_sub_elements_as_dict_of_lists, -) -class Dataset(BaseModel): +class Dataset(SQLModel): """ Base class for the datasets. @@ -42,6 +35,29 @@ class FailedDataset(Dataset): error: str +@dataclass +class ParsedDataset: + """ + Class for the parsed datasets. + + Attributes: + concept_schemes (list[ConceptScheme]): The concept schemes of the dataset. + concepts (list[Concept]): The concepts of the dataset. + collections (list[Collection]): The collections of the dataset. + semantic_relations (list[SemanticRelation]): The semantic relations of the + dataset. + in_schemes (list[InScheme]): The in schemes of the dataset. + in_collections (list[InCollection]): The in collections of the dataset. + """ + + concept_schemes: list["ConceptScheme"] = field(default_factory=list) + concepts: list["Concept"] = field(default_factory=list) + collections: list["Collection"] = field(default_factory=list) + semantic_relations: list["SemanticRelation"] = field(default_factory=list) + in_schemes: list["InScheme"] = field(default_factory=list) + in_collections: list["InCollection"] = field(default_factory=list) + + class Base(DeclarativeBase): """Base class for all models.""" @@ -124,34 +140,15 @@ class ConceptScheme(Base): __tablename__ = "concept_schemes" iri: Mapped[str] = mapped_column(primary_key=True) - notation: Mapped[str] = mapped_column() - scopeNote: Mapped[str] = mapped_column() - prefLabels: Mapped[dict[str, str]] = mapped_column() + notation: Mapped[str] + scopeNote: Mapped[str] + prefLabels: Mapped[dict[str, str]] - members: Mapped[list["Member"]] = relationship( - "Member", + members: Mapped[list["Member"]] = Relationship( secondary="in_scheme", back_populates="concept_schemes", ) - @classmethod - def from_xml_element(cls, element) -> "ConceptScheme": - """ - Return a ConceptScheme instance from an XML element. - - Args: - element (ElementBase): The XML element to parse. - - Returns: - ConceptScheme: The parsed ConceptScheme instance. - """ - return ConceptScheme( - iri=get_element_attribute(element, "about"), - notation=get_sub_element_as_str(element, "core:notation"), - scopeNote=get_sub_element_as_str(element, "core:scopeNote"), - prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), - ) - def to_dict(self, lang: str = "en") -> dict: """ Return the ConceptScheme instance as a dictionary. @@ -190,50 +187,24 @@ class Member(Base): __tablename__ = "collection_members" iri: Mapped[str] = mapped_column(primary_key=True) - notation: Mapped[str] = mapped_column() - prefLabels: Mapped[dict[str, str]] = mapped_column() - member_type: Mapped[MemberType] = mapped_column() + notation: Mapped[str] + prefLabels: Mapped[dict[str, str]] + member_type: Mapped[MemberType] __mapper_args__ = { "polymorphic_identity": MemberType.COLLECTION_MEMBER, "polymorphic_on": "member_type", } - concept_schemes: Mapped[list[ConceptScheme]] = relationship( - "ConceptScheme", + concept_schemes: Mapped[list[ConceptScheme]] = Relationship( secondary="in_scheme", back_populates="members", ) - collections: Mapped[list["Collection"]] = relationship( - "Collection", + collections: Mapped[list["Collection"]] = Relationship( secondary="in_collection", back_populates="members", ) - @classmethod - def get_concept_schemes( - cls, - element, - concept_schemes: list[ConceptScheme], - ) -> list[ConceptScheme]: - """ - Get the concept schemes to which the member belongs. - - Args: - element (ElementBase): The XML element to parse. - concept_schemes (list[ConceptScheme]): The concept schemes to which the - member belongs. - - Returns: - list[ConceptScheme]: The concept schemes to which the member belongs. - """ - scheme_iris = get_sub_element_attributes(element, "core:inScheme", "resource") - return [ - concept_scheme - for concept_scheme in concept_schemes - if concept_scheme.iri in scheme_iris - ] - def to_dict(self, lang: str = "en") -> dict: """ Return the Member instance as a dictionary. @@ -268,10 +239,8 @@ class Collection(Member): __tablename__ = "collections" iri: Mapped[str] = mapped_column(ForeignKey(Member.iri), primary_key=True) - member_iris: list[str] = [] - members: Mapped[list[Member]] = relationship( - "Member", + members: Mapped[list[Member]] = Relationship( secondary="in_collection", back_populates="collections", ) @@ -280,41 +249,6 @@ class Collection(Member): "polymorphic_identity": MemberType.COLLECTION, } - @classmethod - def from_xml_element( - cls, - element, - concept_schemes: list[ConceptScheme], - ) -> "Collection": - """ - Return a Collection instance from an XML element. - - Args: - element (ElementBase): The XML element to parse. - - Returns: - Collection: The parsed Collection instance. - """ - return Collection( - iri=get_element_attribute(element, "about"), - notation=get_sub_element_as_str(element, "core:notation"), - prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), - concept_schemes=cls.get_concept_schemes(element, concept_schemes), - member_iris=get_sub_element_attributes(element, "core:member", "resource"), - ) - - def resolve_members_from_xml(self, members: list[Member]) -> None: - """ - Resolve the collections members from an xml element. - - Args: - members (list[Member]): The list of all available members. - - Returns: - None - """ - self.members = [member for member in members if member.iri in self.member_iris] - class Concept(Member): """ @@ -348,41 +282,14 @@ class Concept(Member): __tablename__ = "concepts" iri: Mapped[str] = mapped_column(ForeignKey(Member.iri), primary_key=True) - identifier: Mapped[str] = mapped_column() - altLabels: Mapped[dict[str, list[str]]] = mapped_column() - scopeNotes: Mapped[dict[str, str]] = mapped_column() + identifier: Mapped[str] + altLabels: Mapped[dict[str, list[str]]] + scopeNotes: Mapped[dict[str, str]] __mapper_args__ = { "polymorphic_identity": MemberType.CONCEPT, } - @classmethod - def from_xml_element( - cls, - element, - concept_schemes: list[ConceptScheme], - ) -> "Concept": - """ - Return a Concept instance from an XML element. - - Args: - element (ElementBase): The XML element to parse. - concept_schemes (ConceptScheme): The concept schemes to which the concept - belongs. - - Returns: - Concept: The parsed Concept instance. - """ - return Concept( - iri=get_element_attribute(element, "about"), - identifier=get_sub_element_as_str(element, "x_1.1:identifier"), - notation=get_sub_element_as_str(element, "core:notation"), - prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), - altLabels=get_sub_elements_as_dict_of_lists(element, "core:altLabel"), - scopeNotes=get_sub_elements_as_dict(element, "core:scopeNote"), - concept_schemes=cls.get_concept_schemes(element, concept_schemes), - ) - def to_dict(self, lang: str = "en") -> dict: """ Return the Concept instance as a dictionary. @@ -459,7 +366,7 @@ class SemanticRelation(Base): __tablename__ = "semantic_relations" - type: Mapped[SemanticRelationType] = mapped_column() + type: Mapped[SemanticRelationType] source_concept_iri: Mapped[str] = mapped_column( ForeignKey(Concept.iri), @@ -469,34 +376,8 @@ class SemanticRelation(Base): ForeignKey(Concept.iri), primary_key=True, ) - source_concept: Mapped["Concept"] = relationship(foreign_keys=[source_concept_iri]) - target_concept: Mapped["Concept"] = relationship(foreign_keys=[target_concept_iri]) - - @classmethod - def from_xml_element(cls, element) -> list["SemanticRelation"]: - """ - Return a list of SemanticRelation instances from an XML element. - - Args: - element (ElementBase): The XML element to parse. - - Returns: - list[SemanticRelation]: The parsed list of SemanticRelation instances. - """ - relations: dict[SemanticRelationType, list[str]] = {} - for relation_type in SemanticRelationType: - relations[relation_type] = get_sub_element_attributes( - element, f"core:{relation_type.value}", "resource" - ) - return [ - SemanticRelation( - type=relation_type, - source_concept_iri=get_element_attribute(element, "about"), - target_concept_iri=target_concept_iri, - ) - for relation_type, target_concept_iris in relations.items() - for target_concept_iri in target_concept_iris - ] + source_concept: Mapped[Concept] = Relationship(foreign_keys=[source_concept_iri]) + target_concept: Mapped[Concept] = Relationship(foreign_keys=[target_concept_iri]) def to_dict(self) -> dict: """ @@ -512,17 +393,69 @@ def to_dict(self) -> dict: } -in_scheme = Table( - "in_scheme", - Base.metadata, - Column("scheme_iri", String, ForeignKey(ConceptScheme.iri), primary_key=True), - Column("member_iri", String, ForeignKey(Member.iri), primary_key=True), -) +class InScheme(Base): + """ + Association table for the concept schemes and the members. + + Attributes: + scheme_iri (str): The Internationalized Resource Identifier of the concept + scheme. + member_iri (str): The Internationalized Resource Identifier of the member. + """ + + __tablename__ = "in_scheme" + + scheme_iri: Mapped[str] = mapped_column( + ForeignKey(ConceptScheme.iri), + primary_key=True, + ) + member_iri: Mapped[str] = mapped_column( + ForeignKey(Member.iri), + primary_key=True, + ) + + def to_dict(self) -> dict: + """ + Return the InScheme instance as a dictionary. + + Returns: + dict: The InScheme instance as a dictionary. + """ + return { + "scheme_iri": self.scheme_iri, + "member_iri": self.member_iri, + } + + +class InCollection(Base): + """ + Association table for the collections and the members. + Attributes: + collection_iri (str): The Internationalized Resource Identifier of the + collection. + member_iri (str): The Internationalized Resource Identifier of the member. + """ + + __tablename__ = "in_collection" -in_collection = Table( - "in_collection", - Base.metadata, - Column("collection_iri", String, ForeignKey(Collection.iri), primary_key=True), - Column("member_iri", String, ForeignKey(Member.iri), primary_key=True), -) + collection_iri: Mapped[str] = mapped_column( + ForeignKey(Collection.iri), + primary_key=True, + ) + member_iri: Mapped[str] = mapped_column( + ForeignKey(Member.iri), + primary_key=True, + ) + + def to_dict(self) -> dict: + """ + Return the InCollection instance as a dictionary. + + Returns: + dict: The InCollection instance as a dictionary. + """ + return { + "collection_iri": self.collection_iri, + "member_iri": self.member_iri, + } diff --git a/dds_glossary/services.py b/dds_glossary/services.py index e90a65c..a35e638 100644 --- a/dds_glossary/services.py +++ b/dds_glossary/services.py @@ -4,7 +4,6 @@ from typing import ClassVar from appdirs import user_data_dir -from defusedxml.lxml import parse as parse_xml from fastapi.templating import Jinja2Templates from owlready2 import get_ontology, onto_path from sqlalchemy import Engine @@ -26,15 +25,7 @@ ConceptNotFoundException, ConceptSchemeNotFoundException, ) -from .model import ( - Collection, - Concept, - ConceptScheme, - Dataset, - FailedDataset, - Member, - SemanticRelation, -) +from .model import Dataset, FailedDataset, Member from .schema import ( CollectionResponse, ConceptResponse, @@ -45,6 +36,7 @@ InitDatasetsResponse, RelationResponse, ) +from .xml import parse_dataset class GlossaryController: @@ -118,49 +110,6 @@ def get_scheme_members( """ return [member for member in members if member.member_type == member_type] - def parse_dataset( - self, - dataset_path: Path, - ) -> tuple[ - list[ConceptScheme], - list[Concept], - list[Collection], - list[SemanticRelation], - ]: - """ - Parse a dataset. - - Args: - dataset_path (Path): The dataset path. - - Returns: - tuple[list[ConceptScheme], list[Concept], list[Collection], - list[SemanticRelation]]: The concept schemes, concepts, collections, - and semantic relations. - """ - root = parse_xml(dataset_path).getroot() - concept_scheme_elements = root.findall("core:ConceptScheme", root.nsmap) - collection_elements = root.findall("core:Collection", root.nsmap) - concept_elements = root.findall("core:Concept", root.nsmap) - concept_schemes = [ - ConceptScheme.from_xml_element(concept_scheme_element) - for concept_scheme_element in concept_scheme_elements - ] - concepts = [ - Concept.from_xml_element(concept_element, concept_schemes) - for concept_element in concept_elements - ] - collections = [ - Collection.from_xml_element(collection_element, concept_schemes) - for collection_element in collection_elements - ] - semantic_relations: list[SemanticRelation] = [] - for concept_element in concept_elements: - semantic_relations.extend( - SemanticRelation.from_xml_element(concept_element) - ) - return concept_schemes, concepts, collections, semantic_relations - def init_datasets( self, reload: bool = False, @@ -183,7 +132,7 @@ def init_datasets( try: ontology = get_ontology(dataset.url).load(reload=reload) ontology.save(file=str(dataset_path), format="rdfxml") - save_dataset(self.engine, *self.parse_dataset(dataset_path)) + save_dataset(self.engine, parse_dataset(dataset_path)) saved_datasets.append( Dataset( name=dataset.name, diff --git a/dds_glossary/xml.py b/dds_glossary/xml.py index 607f8a2..877021b 100644 --- a/dds_glossary/xml.py +++ b/dds_glossary/xml.py @@ -1,8 +1,22 @@ """XML utilities for the dds_glossary package.""" from collections import defaultdict +from pathlib import Path from typing import Final +from defusedxml.lxml import parse as parse_xml + +from .enums import SemanticRelationType +from .model import ( + Collection, + Concept, + ConceptScheme, + InCollection, + InScheme, + ParsedDataset, + SemanticRelation, +) + XML_NAMESPACE: Final[str] = "{http://www.w3.org/XML/1998/namespace}" @@ -97,3 +111,169 @@ def get_sub_elements_as_dict_of_lists(element, tag: str) -> dict[str, list[str]] sub_element.text ) return sub_element_dict + + +def concept_scheme_from_xml(element) -> ConceptScheme: + """ + Create a ConceptScheme object from the XML element. + + Args: + element (ElementBase): The XML element to parse. + + Returns: + ConceptScheme: The concept scheme. + """ + return ConceptScheme( + iri=get_element_attribute(element, "about"), + notation=get_sub_element_as_str(element, "core:notation"), + scopeNote=get_sub_element_as_str(element, "core:scopeNote"), + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), + ) + + +def collection_from_xml(element) -> Collection: + """ + Return a Collection instance from an XML element. + + Args: + element (ElementBase): The XML element to parse. + + Returns: + Collection: The parsed Collection instance. + """ + return Collection( + iri=get_element_attribute(element, "about"), + notation=get_sub_element_as_str(element, "core:notation"), + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), + ) + + +def concept_from_xml(element) -> Concept: + """ + Return a Concept instance from an XML element. + + Args: + element (ElementBase): The XML element to parse. + concept_schemes (ConceptScheme): The concept schemes to which the concept + belongs. + + Returns: + Concept: The parsed Concept instance. + """ + return Concept( + iri=get_element_attribute(element, "about"), + identifier=get_sub_element_as_str(element, "x_1.1:identifier"), + notation=get_sub_element_as_str(element, "core:notation"), + prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"), + altLabels=get_sub_elements_as_dict_of_lists(element, "core:altLabel"), + scopeNotes=get_sub_elements_as_dict(element, "core:scopeNote"), + ) + + +def semantic_relations_from_xml(element) -> list["SemanticRelation"]: + """ + Return a list of SemanticRelation instances from an XML element. + + Args: + element (ElementBase): The XML element to parse. + + Returns: + list[SemanticRelation]: The parsed list of SemanticRelation instances. + """ + relations: dict[SemanticRelationType, list[str]] = {} + for relation_type in SemanticRelationType: + relations[relation_type] = get_sub_element_attributes( + element, f"core:{relation_type.value}", "resource" + ) + return [ + SemanticRelation( + type=relation_type, + source_concept_iri=get_element_attribute(element, "about"), + target_concept_iri=target_concept_iri, + ) + for relation_type, target_concept_iris in relations.items() + for target_concept_iri in target_concept_iris + ] + + +def in_scheme_from_xml(element) -> list[InScheme]: + """ + Return a list of InScheme instances from an XML element. + + Args: + element (ElementBase): The XML element to parse. + + Returns: + list[InScheme]: The parsed list of InScheme instances. + """ + concept_iri = get_element_attribute(element, "about") + scheme_iris = get_sub_element_attributes(element, "core:inScheme", "resource") + return [ + InScheme(member_iri=concept_iri, scheme_iri=scheme_iri) + for scheme_iri in scheme_iris + ] + + +def in_collection_from_xml(element) -> list[InCollection]: + """ + Return a list of InCollection instances from an XML element. + + Args: + element (ElementBase): The XML element to parse. + + Returns: + list[InCollection]: The parsed list of InCollection instances. + """ + collection_iri = get_element_attribute(element, "about") + member_iris = get_sub_element_attributes(element, "core:member", "resource") + return [ + InCollection(collection_iri=collection_iri, member_iri=member_iri) + for member_iri in member_iris + ] + + +def parse_dataset(dataset_path: Path) -> ParsedDataset: + """ + Parse a dataset. + + Args: + dataset_path (Path): The dataset path. + + Returns: + tuple[list[ConceptScheme], list[Concept], list[Collection], + list[SemanticRelation]]: The concept schemes, concepts, collections, + and semantic relations. + """ + root = parse_xml(dataset_path).getroot() + concept_scheme_elements = root.findall("core:ConceptScheme", root.nsmap) + collection_elements = root.findall("core:Collection", root.nsmap) + concept_elements = root.findall("core:Concept", root.nsmap) + concept_schemes = [ + concept_scheme_from_xml(concept_scheme_element) + for concept_scheme_element in concept_scheme_elements + ] + concepts = [ + concept_from_xml(concept_element) for concept_element in concept_elements + ] + collections = [ + collection_from_xml(collection_element) + for collection_element in collection_elements + ] + semantic_relations: list[SemanticRelation] = [] + for concept_element in concept_elements: + semantic_relations.extend(semantic_relations_from_xml(concept_element)) + member_elements = collection_elements + concept_elements + in_schemes: list[InScheme] = [] + for member_element in member_elements: + in_schemes.extend(in_scheme_from_xml(member_element)) + in_collections: list[InCollection] = [] + for member_element in member_elements: + in_collections.extend(in_collection_from_xml(member_element)) + return ParsedDataset( + concept_schemes=concept_schemes, + concepts=concepts, + collections=collections, + semantic_relations=semantic_relations, + in_schemes=in_schemes, + in_collections=in_collections, + ) diff --git a/pyproject.toml b/pyproject.toml index b916a01..7ad164a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "psycopg", "python-dotenv", "sentry-sdk[fastapi]", - "SQLAlchemy>=2.0.0", + "sqlmodel>=0.0.12", "sqlalchemy-utils", "uvicorn[standard]", ] diff --git a/tests/common.py b/tests/common.py index 7bd5260..c46f1d0 100644 --- a/tests/common.py +++ b/tests/common.py @@ -68,7 +68,6 @@ def add_collections( iri=f"collection_iri{i}", notation=f"notation{i}", prefLabels={"en": f"prefLabel{i}"}, - member_iris=member_iri_lists[i], concept_schemes=[ ( session.query(ConceptScheme) @@ -80,8 +79,12 @@ def add_collections( for i in range(len(member_iri_lists)) ] session.add_all(collections) - for collection in collections: - collection.resolve_members_from_xml(session.query(Member).all()) + session.commit() + for i, member_iri_list in enumerate(member_iri_lists): + collections[i].members = [ + session.query(Member).where(Member.iri == member_iri).one() + for member_iri in member_iri_list + ] session.commit() return [collection.to_dict() for collection in collections] diff --git a/tests/conftest.py b/tests/conftest.py index a5d5540..6da0a7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,9 +14,24 @@ from dds_glossary import __version__ from dds_glossary.database import init_engine from dds_glossary.main import create_app -from dds_glossary.model import Collection, Concept, ConceptScheme, SemanticRelation +from dds_glossary.model import ( + Collection, + Concept, + ConceptScheme, + InCollection, + InScheme, + SemanticRelation, +) from dds_glossary.schema import VersionResponse from dds_glossary.services import GlossaryController +from dds_glossary.xml import ( + collection_from_xml, + concept_from_xml, + concept_scheme_from_xml, + in_collection_from_xml, + in_scheme_from_xml, + semantic_relations_from_xml, +) @fixture(name="dir_data") @@ -39,7 +54,7 @@ def _root_element(file_rdf: Path): def _concept_scheme( root_element, # pylint: disable=redefined-outer-name ) -> ConceptScheme: - return ConceptScheme.from_xml_element( + return concept_scheme_from_xml( root_element.find("core:ConceptScheme", namespaces=root_element.nsmap) ) @@ -49,9 +64,8 @@ def _collection( root_element, # pylint: disable=redefined-outer-name concept_scheme: ConceptScheme, # pylint: disable=redefined-outer-name ) -> Collection: - return Collection.from_xml_element( - root_element.find("core:Collection", namespaces=root_element.nsmap), - [concept_scheme], + return collection_from_xml( + root_element.find("core:Collection", namespaces=root_element.nsmap) ) @@ -60,9 +74,8 @@ def _concept( root_element, # pylint: disable=redefined-outer-name concept_scheme: ConceptScheme, # pylint: disable=redefined-outer-name ) -> Concept: - return Concept.from_xml_element( - root_element.find("core:Concept", namespaces=root_element.nsmap), - [concept_scheme], + return concept_from_xml( + root_element.find("core:Concept", namespaces=root_element.nsmap) ) @@ -70,11 +83,29 @@ def _concept( def _semantic_relation( root_element, # pylint: disable=redefined-outer-name ) -> SemanticRelation: - return SemanticRelation.from_xml_element( + return semantic_relations_from_xml( + root_element.find("core:Concept", namespaces=root_element.nsmap) + )[0] + + +@fixture(name="in_scheme") +def _in_scheme( + root_element, # pylint: disable=redefined-outer-name +) -> InScheme: + return in_scheme_from_xml( root_element.find("core:Concept", namespaces=root_element.nsmap) )[0] +@fixture(name="in_collection") +def _in_collection( + root_element, # pylint: disable=redefined-outer-name +) -> InCollection: + return in_collection_from_xml( + root_element.find("core:Collection", namespaces=root_element.nsmap) + )[0] + + @fixture(name="version_response") def _version_response() -> VersionResponse: return VersionResponse(version=__version__) diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index 6cc2ed8..72471c2 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -16,7 +16,15 @@ search_database, ) from dds_glossary.enums import SemanticRelationType -from dds_glossary.model import Collection, Concept, ConceptScheme, SemanticRelation +from dds_glossary.model import ( + Collection, + Concept, + ConceptScheme, + InCollection, + InScheme, + ParsedDataset, + SemanticRelation, +) from ..common import add_collections, add_concept_schemes, add_concepts, add_relations @@ -82,7 +90,7 @@ def test_init_engine_database_exists_drop() -> None: def test_save_dataset_with_no_data(engine: Engine) -> None: """Test the save_dataset function with empty data.""" - save_dataset(engine, [], [], [], []) + save_dataset(engine, ParsedDataset()) with Session(engine) as session: assert session.query(ConceptScheme).count() == 0 assert session.query(Concept).count() == 0 @@ -95,6 +103,7 @@ def test_save_dataset_with_data(engine: Engine) -> None: concept_scheme_iri = "http://example.org/concept_scheme" concept1_iri = "http://example.org/concept1" concept2_iri = "http://example.org/concept2" + collection_iri = "http://example.org/collection1" concept_schemes = [ ConceptScheme( iri=concept_scheme_iri, @@ -123,10 +132,9 @@ def test_save_dataset_with_data(engine: Engine) -> None: ] collections = [ Collection( - iri="collection_iri0", + iri=collection_iri, notation="Collection Notation", prefLabels=[{"en": "Collection Pref Label 0"}], - member_iris=[concept1_iri, concept2_iri], ), ] semantic_relations = [ @@ -136,17 +144,38 @@ def test_save_dataset_with_data(engine: Engine) -> None: target_concept_iri=concepts[1].iri, ) ] + in_schemes = [ + InScheme(member_iri=concepts[0].iri, scheme_iri=concept_scheme_iri), + ] + in_collections = [ + InCollection(collection_iri=collections[0].iri, member_iri=concepts[0].iri), + ] - save_dataset(engine, concept_schemes, concepts, collections, semantic_relations) + save_dataset( + engine, + ParsedDataset( + concept_schemes=concept_schemes, + concepts=concepts, + collections=collections, + semantic_relations=semantic_relations, + in_schemes=in_schemes, + in_collections=in_collections, + ), + ) with Session(engine) as session: assert session.query(ConceptScheme).count() == 1 assert session.query(Concept).count() == 2 assert session.query(SemanticRelation).count() == 1 + assert session.query(Collection).count() == 1 + assert session.query(InScheme).count() == 1 assert session.query(ConceptScheme).one().iri == concept_scheme_iri assert session.query(Concept).all()[0].iri == concept1_iri assert session.query(SemanticRelation).one().source_concept_iri == concept1_iri assert session.query(SemanticRelation).one().target_concept_iri == concept2_iri + assert session.query(Collection).one().iri == collection_iri + assert session.query(InScheme).one().member_iri == concept1_iri + assert session.query(InCollection).one().collection_iri == collection_iri def test_get_concept_schemes(engine: Engine) -> None: diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 800873e..c4a6f09 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -1,13 +1,6 @@ """Tests for dds_glossary.model module.""" -from dds_glossary.enums import SemanticRelationType -from dds_glossary.model import ( - Base, - Collection, - Concept, - ConceptScheme, - SemanticRelation, -) +from dds_glossary.model import Base, Concept, ConceptScheme, SemanticRelation def test_base_eq_true(concept_scheme: ConceptScheme) -> None: @@ -111,22 +104,6 @@ def test_base_get_in_language_list_no_language_specified_no_english( assert Base.get_in_language_list(concept.altLabels) == [] -def test_concept_scheme_from_xml_element(concept_scheme: ConceptScheme) -> None: - """It should return a ConceptScheme instance from an XML element.""" - assert concept_scheme.iri == "http://data.europa.eu/xsp/cn2024/cn2024" - assert concept_scheme.notation == "CN 2024" - assert ( - concept_scheme.scopeNote - == "http://publications.europa.eu/resource/oj/JOC_2019_119_R_0001" - ) - assert concept_scheme.prefLabels == { - "en": "Combined Nomenclature, 2024 (CN 2024)", - "sk": "Kombinovaná Nomenklatúra, 2024 (KN 2024)", - "et": "Kombineeritud Nomenklatuur, 2024 (KN 2024)", - "mt": "Nomenklatura Magħquda, 2024 (NM 2024)", - } - - def test_concept_scheme_to_dict(concept_scheme: ConceptScheme) -> None: """It should return a dictionary representation of the ConceptScheme instance.""" assert concept_scheme.to_dict("sk") == { @@ -137,61 +114,6 @@ def test_concept_scheme_to_dict(concept_scheme: ConceptScheme) -> None: } -def test_collection_from_xml_element(collection: Collection) -> None: - """It should return a Collection instance from an XML element.""" - assert collection.iri == "https://example.org/collection1" - assert collection.notation == "Collection1Notation" - assert collection.prefLabels == { - "en": "Collection1PrefLabel", - } - - -def test_collection_resolve_members_from_xml( - collection: Collection, - concept: Concept, -) -> None: - """It should resolve the members of the collection from the XML file.""" - nested_collection = Collection( - iri="https://example.org/collection2", - notation="Collection2Notation", - prefLabels={ - "en": "Collection2PrefLabel", - }, - ) - collection.resolve_members_from_xml([concept, nested_collection]) - assert len(collection.members) == 2 - assert collection.members[0].to_dict() == concept.to_dict() - assert collection.members[1].to_dict() == nested_collection.to_dict() - - -def test_concept_from_xml_element(concept: Concept) -> None: - """It should return a Concept instance from an XML element.""" - assert concept.iri == "http://data.europa.eu/xsp/cn2024/020321000080" - assert concept.identifier == "020321000080" - assert concept.notation == "0203 21" - assert concept.prefLabels == { - "en": "0203 21 -- Carcases and half-carcases", - "sk": "0203 21 -- Trupy a polovičky trupov", - "et": "0203 21 -- rümbad ja poolrümbad", - "mt": "0203 21 -- Karkassi u nofs karkassi", - } - assert concept.altLabels == { - "en": [ - "-- Carcases and half-carcases", - "0203 21 -- Carcases and half-carcases", - ], - "sk": ["-- Trupy a polovičky trupov"], - "et": ["-- rümbad ja poolrümbad"], - "mt": ["-- Karkassi u nofs karkassi"], - } - assert concept.scopeNotes == { - "en": "Frozen carcases and half-carcases of swine", - "fr": "Carcasses ou demi-carcasses, de porcins, congelées", - "de": "Tierkörper oder halbe Tierkörper, von Schweinen, gefroren", - "es": "Canales o medias canales de porcinos, congeladas", - } - - def test_concept_to_dict(concept: Concept) -> None: """It should return a dictionary representation of the Concept instance.""" assert concept.to_dict("sk") == { @@ -204,21 +126,6 @@ def test_concept_to_dict(concept: Concept) -> None: } -def test_semantic_relation_from_xml_element( - semantic_relation: SemanticRelation, -) -> None: - """It should return a SemanticRelation instance from an XML element.""" - assert ( - semantic_relation.source_concept_iri - == "http://data.europa.eu/xsp/cn2024/020321000080" - ) - assert ( - semantic_relation.target_concept_iri - == "http://data.europa.eu/xsp/cn2024/020321000010" - ) - assert semantic_relation.type == SemanticRelationType.BROADER - - def test_semantic_relation_to_dict(semantic_relation: SemanticRelation) -> None: """It should return a dictionary representation of the SemanticRelation instance.""" assert semantic_relation.to_dict() == { diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index 52c6298..bd0afd7 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -3,7 +3,6 @@ from http import HTTPStatus from pathlib import Path -from pytest import MonkeyPatch from pytest import raises as pytest_raises from dds_glossary.exceptions import ( @@ -21,51 +20,16 @@ RelationResponse, ) from dds_glossary.services import GlossaryController +from dds_glossary.xml import parse_dataset from ..common import add_collections, add_concept_schemes, add_concepts, add_relations -def _init_datasets(_monkeypatch: MonkeyPatch) -> None: - _monkeypatch.setattr("dds_glossary.database.save_dataset", lambda *_, **__: None) - _monkeypatch.setattr( - GlossaryController, "parse_dataset", lambda *_, **__: ([], [], [], []) - ) - - -def test_glossary_controller_parse_dataset( - controller: GlossaryController, - file_rdf: Path, -) -> None: - """Test the GlossaryController parse_dataset method.""" - concept_scheme_iri = "http://data.europa.eu/xsp/cn2024/cn2024" - concept1_iri = "http://data.europa.eu/xsp/cn2024/020321000080" - concept2_iri = "http://data.europa.eu/xsp/cn2024/020321000010" - collection1_iri = "https://example.org/collection1" - collection2_iri = "https://example.org/collection2" - concept_schemes, concepts, collections, semantic_relations = ( - controller.parse_dataset(dataset_path=file_rdf) - ) - - assert len(concept_schemes) == 1 - assert len(concepts) == 2 - assert len(collections) == 2 - assert len(semantic_relations) == 1 - assert concept_schemes[0].iri == concept_scheme_iri - assert concepts[0].iri == concept1_iri - assert concepts[1].iri == concept2_iri - assert collections[0].iri == collection1_iri - assert collections[1].iri == collection2_iri - assert semantic_relations[0].source_concept_iri == concept1_iri - assert semantic_relations[0].target_concept_iri == concept2_iri - - def test_init_dataset_with_failed_datasets( controller: GlossaryController, - monkeypatch: MonkeyPatch, file_rdf: Path, ) -> None: """Test the GlossaryController init_datasets method with an exception.""" - _init_datasets(monkeypatch) GlossaryController.datasets = [ Dataset(name="sample.rdf", url=str(file_rdf)), Dataset(name="test.rdf", url="test.rdf"), @@ -74,18 +38,16 @@ def test_init_dataset_with_failed_datasets( response = controller.init_datasets() files = list(controller.data_dir.iterdir()) - e_schemes, e_concepts, e_collections, e_relations = controller.parse_dataset( - file_rdf - ) - a_schemes, a_concepts, a_collections, a_relations = controller.parse_dataset( - files[0] - ) + exp_parsed_dataset = parse_dataset(file_rdf) + act_parsed_dataset = parse_dataset(files[0]) assert len(files) == 1 - assert e_schemes == a_schemes - assert e_concepts == a_concepts - assert e_collections == a_collections - assert e_relations == a_relations + assert exp_parsed_dataset.concept_schemes == act_parsed_dataset.concept_schemes + assert exp_parsed_dataset.concepts == act_parsed_dataset.concepts + assert exp_parsed_dataset.collections == act_parsed_dataset.collections + assert ( + exp_parsed_dataset.semantic_relations == act_parsed_dataset.semantic_relations + ) assert response.failed_datasets == [ FailedDataset( name="test.rdf", diff --git a/tests/unit/test_xml.py b/tests/unit/test_xml.py new file mode 100644 index 0000000..eb68823 --- /dev/null +++ b/tests/unit/test_xml.py @@ -0,0 +1,126 @@ +"""Tests for dds_glossary.xml module.""" + +from pathlib import Path + +from dds_glossary.enums import SemanticRelationType +from dds_glossary.model import ( + Collection, + Concept, + ConceptScheme, + InCollection, + InScheme, + SemanticRelation, +) +from dds_glossary.xml import parse_dataset + + +def test_concept_scheme_from_xml(concept_scheme: ConceptScheme) -> None: + """It should return a ConceptScheme instance from an XML element.""" + assert concept_scheme.iri == "http://data.europa.eu/xsp/cn2024/cn2024" + assert concept_scheme.notation == "CN 2024" + assert ( + concept_scheme.scopeNote + == "http://publications.europa.eu/resource/oj/JOC_2019_119_R_0001" + ) + assert concept_scheme.prefLabels == { + "en": "Combined Nomenclature, 2024 (CN 2024)", + "sk": "Kombinovaná Nomenklatúra, 2024 (KN 2024)", + "et": "Kombineeritud Nomenklatuur, 2024 (KN 2024)", + "mt": "Nomenklatura Magħquda, 2024 (NM 2024)", + } + + +def test_collection_from_xml(collection: Collection) -> None: + """It should return a Collection instance from an XML element.""" + assert collection.iri == "https://example.org/collection1" + assert collection.notation == "Collection1Notation" + assert collection.prefLabels == { + "en": "Collection1PrefLabel", + } + + +def test_concept_from_xml(concept: Concept) -> None: + """It should return a Concept instance from an XML element.""" + assert concept.iri == "http://data.europa.eu/xsp/cn2024/020321000080" + assert concept.identifier == "020321000080" + assert concept.notation == "0203 21" + assert concept.prefLabels == { + "en": "0203 21 -- Carcases and half-carcases", + "sk": "0203 21 -- Trupy a polovičky trupov", + "et": "0203 21 -- rümbad ja poolrümbad", + "mt": "0203 21 -- Karkassi u nofs karkassi", + } + assert concept.altLabels == { + "en": [ + "-- Carcases and half-carcases", + "0203 21 -- Carcases and half-carcases", + ], + "sk": ["-- Trupy a polovičky trupov"], + "et": ["-- rümbad ja poolrümbad"], + "mt": ["-- Karkassi u nofs karkassi"], + } + assert concept.scopeNotes == { + "en": "Frozen carcases and half-carcases of swine", + "fr": "Carcasses ou demi-carcasses, de porcins, congelées", + "de": "Tierkörper oder halbe Tierkörper, von Schweinen, gefroren", + "es": "Canales o medias canales de porcinos, congeladas", + } + + +def test_semantic_relations_from_xml( + semantic_relation: SemanticRelation, +) -> None: + """It should return a SemanticRelation instance from an XML element.""" + assert ( + semantic_relation.source_concept_iri + == "http://data.europa.eu/xsp/cn2024/020321000080" + ) + assert ( + semantic_relation.target_concept_iri + == "http://data.europa.eu/xsp/cn2024/020321000010" + ) + assert semantic_relation.type == SemanticRelationType.BROADER + + +def test_in_scheme_from_xml(in_scheme: InScheme) -> None: + """It should return an InScheme instance from an XML element.""" + assert in_scheme.member_iri == "http://data.europa.eu/xsp/cn2024/020321000080" + assert in_scheme.scheme_iri == "http://data.europa.eu/xsp/cn2024/cn2024" + + +def test_in_collection_from_xml(in_collection: InCollection) -> None: + """It should return an InCollection instance from an XML element.""" + assert in_collection.collection_iri == "https://example.org/collection1" + assert in_collection.member_iri == "http://data.europa.eu/xsp/cn2024/020321000080" + + +def test_glossary_controller_parse_dataset(file_rdf: Path) -> None: + """Test the GlossaryController parse_dataset method.""" + concept_scheme_iri = "http://data.europa.eu/xsp/cn2024/cn2024" + concept1_iri = "http://data.europa.eu/xsp/cn2024/020321000080" + concept2_iri = "http://data.europa.eu/xsp/cn2024/020321000010" + collection1_iri = "https://example.org/collection1" + collection2_iri = "https://example.org/collection2" + parsed_dataset = parse_dataset(dataset_path=file_rdf) + + assert len(parsed_dataset.concept_schemes) == 1 + assert len(parsed_dataset.concepts) == 2 + assert len(parsed_dataset.collections) == 2 + assert len(parsed_dataset.semantic_relations) == 1 + assert len(parsed_dataset.in_schemes) == 4 + assert len(parsed_dataset.in_collections) == 2 + assert parsed_dataset.concept_schemes[0].iri == concept_scheme_iri + assert parsed_dataset.concepts[0].iri == concept1_iri + assert parsed_dataset.concepts[1].iri == concept2_iri + assert parsed_dataset.collections[0].iri == collection1_iri + assert parsed_dataset.collections[1].iri == collection2_iri + assert parsed_dataset.semantic_relations[0].source_concept_iri == concept1_iri + assert parsed_dataset.semantic_relations[0].target_concept_iri == concept2_iri + assert parsed_dataset.in_schemes[0].member_iri == collection1_iri + assert parsed_dataset.in_schemes[0].scheme_iri == concept_scheme_iri + assert parsed_dataset.in_schemes[2].member_iri == concept1_iri + assert parsed_dataset.in_schemes[2].scheme_iri == concept_scheme_iri + assert parsed_dataset.in_collections[0].collection_iri == collection1_iri + assert parsed_dataset.in_collections[0].member_iri == concept1_iri + assert parsed_dataset.in_collections[1].collection_iri == collection1_iri + assert parsed_dataset.in_collections[1].member_iri == collection2_iri