Source code for ezgpx.parsers.xml_parser

"""
This module contains the XMLParser class.
"""

import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path
from typing import IO

import dateutil

from ..utils import check_xml_extensions_schemas, check_xml_schema
from .parser import Parser


[docs] class XMLParser(Parser): """ XML file parser. """ def __init__( self, source: str | Path | IO[str] | IO[bytes] | bytes, xml_schema: bool = True, xml_extensions_schemas: bool = False, ) -> None: """ Initialise XML Parser instance. Args: source (str | Path | IO[str] | IO[bytes] | bytes): Path to a file or a file-like object to parse. xml_schema (bool, optional): Toggle schema verification during parsing. Defaults to True. xml_extensions_schemas (bool, optional): Toggle extensions schema verificaton durign parsing. Requires internet connection and is not guaranted to work. Defaults to False. """ # # Bytes object # if isinstance(source, bytes): # source = io.BytesIO(source) # self.xmlns: dict = { # node[0]: node[1] for _, node in ET.iterparse(source, events=["start-ns"]) # } # TODO moved to parse_gpx super().__init__(source) self.xmlns: dict = {} self.xsi_schema_location: dict = {} self.extensions_fields: dict = {} self.xml_schema: bool = xml_schema self.xml_extensions_schemas: bool = xml_extensions_schemas self.xml_tree: ET.ElementTree = None self.xml_root: ET.Element = None
[docs] def find_sub_element( self, element: ET.Element, sub_element: str ) -> ET.Element | None: # TODO Why use this function? """ Find sub-element. Args: element (ET.Element): Parsed element from GPX file. sub_element (str): Sub-element name. Returns: ET.Element | None: Sub-element. """ return element.find(sub_element, self.xmlns)
[docs] def find_text(self, element: ET.Element, sub_element: str) -> str | None: """ Find text from sub-element. Args: element (ET.Element): Parsed element from GPX file. sub_element (str): Sub-element name. Returns: str | None: Text from sub-element. """ sub_element_ = self.find_sub_element(element, sub_element) return None if sub_element_ is None else sub_element_.text
[docs] def find_int(self, element: ET.Element, sub_element: str) -> int | None: """ Find integer value from sub-element. Args: element (ET.Element): Parsed element from GPX file. sub_element (str): Sub-element name. Returns: int | None: Integer value from sub-element. """ sub_element_ = self.find_sub_element(element, sub_element) return None if sub_element_ is None else int(sub_element_.text)
[docs] def find_float(self, element: ET.Element, sub_element: str) -> float | None: """ Find float point value from sub-element. Args: element (ET.Element): Parsed element from GPX file. sub_element (str): Sub-element name. Returns: float | None: Floating point value from sub-element. """ sub_element_ = self.find_sub_element(element, sub_element) return None if sub_element_ is None else float(sub_element_.text)
[docs] def find_time(self, element: ET.Element, sub_element: str) -> datetime | None: """ Find time value from sub-element. Args: element (ET.Element): Parsed element from GPX file. sub_element (str): Sub-element name. Returns: datetime | None: Floating point value from sub-element. """ sub_element_ = self.find_sub_element(element, sub_element) return ( None if sub_element_ is None else dateutil.parser.parse(sub_element_.text) )
[docs] def xml_schemas(self): """ Check XML schemas during parsing. """ # Check XML schema if self.xml_schema: if not check_xml_schema(self.source, self.gpx.version): raise ValueError("Invalid GPX file (does not follow XML schema).") # Check XML extension schemas if self.xml_extensions_schemas: if not check_xml_extensions_schemas(self.source): raise ValueError( "Invalid GPX file (does not follow XML extensions schemas)." )