Source code for ezgpx.parsers.gpx_parser

"""
This module contains the GPXParser class.
"""

import xml.etree.ElementTree as ET
from itertools import pairwise
from pathlib import Path
from typing import IO

from ..complex_types import (
    Bounds,
    Copyright,
    Email,
    Extensions,
    Link,
    Metadata,
    Person,
    Pt,
    Ptseg,
    Rte,
    Trk,
    Trkseg,
    Wpt,
)
from .xml_parser import XMLParser


[docs] class GPXParser(XMLParser): """ GPX file parser. """ def __init__( self, source: str | Path | IO[str] | IO[bytes] | bytes, xml_schemas: bool = True, xml_extensions_schemas: bool = False, ) -> None: """ Initialise GPXParser instance. Args: source (str | Path | IO[str] | IO[bytes] | bytes): Path to a file or a file-like object to parse. xml_schemas (bool, optional): Toggle schema verification during parsing. Defaults to True. xml_extensions_schemas (bool, optional): Toggle extensions schema verificaton durign parsing. Requires internet connection connection and is not guaranted to work. Defaults to False. """ # Initialise XMLParser and parse GPX file super().__init__(source, xml_schemas, xml_extensions_schemas) def _find_precisions(self): """ Find decimal precision of any type of value in a GPX file (latitude, elevation...). Also find if the GPX file contains elevation data. """ # TODO use lat/lon from waypoints, points, track points? # TODO store coordinates as string to avoid precision? # Point tracks = self.xml_root.findall("trk", self.xmlns) segments = tracks[0].findall("trkseg", self.xmlns) if tracks else [] points = segments[0].findall("trkpt", self.xmlns) if segments else [] if points: ele_text = points[0].findtext("ele", namespaces=self.xmlns) self.ele_data = ele_text is not None self.precisions["lat_lon"] = self._find_precision(points[0].get("lat")) self.precisions["elevation"] = self._find_precision(ele_text) def _find_time_element(self) -> str | None: """ Find a time element in GPX file. Returns: str | None: Time element. """ # TODO use time from metadata, waypoints, points, track points, other? # TODO store times as string to avoid formar? # Use time from metadata metadata = self.xml_root.find("metadata", self.xmlns) if metadata is not None: time_str = metadata.findtext("time", namespaces=self.xmlns) if time_str is not None: return time_str # Use time from track point tracks = self.xml_root.findall("trk", self.xmlns) segments = tracks[0].findall("trkseg", self.xmlns) if tracks else [] points = segments[0].findall("trkpt", self.xmlns) if segments else [] if points: time_str = points[0].findtext("time", namespaces=self.xmlns) if time_str is not None: return time_str # No time element at all... return None def _parse_bounds(self, bounds: ET.Element, tag: str = "bounds") -> Bounds | None: """ Parse boundsType element. Args: bounds (ET.Element): Parsed bounds element. tag (str, Optional): XML tag. Defaults to "bounds". Returns: Bounds | None: Bounds instance. """ if bounds is None: return None return Bounds( bounds.get("minlat"), bounds.get("minlon"), bounds.get("maxlat"), bounds.get("maxlon"), tag, ) def _parse_copyright( self, copyright: ET.Element, # pylint: disable=redefined-builtin tag: str = "copyright", ) -> Copyright | None: """ Parse copyrightType element. Args: copyright (ET.Element): Parsed copyright element. tag (str, Optional): XML tag. Defaults to "copyright". Returns: Copyright | None: Copyright instance. """ if copyright is None: return None return Copyright( copyright.get("author"), copyright.findtext("year", namespaces=self.xmlns), copyright.findtext("license", namespaces=self.xmlns), tag, ) def _parse_email(self, email: ET.Element, tag: str = "email") -> Email | None: """ Parse emailType element. Args: email (ET.Element): Parsed email element. tag (str, Optional): XML tag. Defaults to "email". Returns: Email | None: Email instance. """ if email is None: return None return Email(email.get("id"), email.get("domain"), tag) def _parse_extensions( self, extensions: ET.Element, element_type: str, tag: str = "extensions" ) -> Extensions | None: """ Parse extensionsType element. Args: extensions (ET.Element): Parsed extensions element. tag (str, Optional): XML tag. Defaults to "extensions". Returns: Extensions | None: Extensions instance. """ if extensions is None: return None def construct_dict(e0): e1s = list(e0.iter())[1:] if len(e1s) > 0: d = {"attrib": dict(e0.items()), "elmts": {}} for e1 in e1s: d["elmts"][e1.tag] = construct_dict(e1) return d return {"attrib": {}, "elmts": e0.text} ext = list(extensions.iter())[1] values = {ext.tag: construct_dict(ext)} # Etensions fields are based on the first occurence of a type encountered in the file if self.extensions_fields.get(element_type) is None: self.extensions_fields[element_type] = values return Extensions(values, tag) def _parse_link(self, link: ET.Element, tag: str = "link") -> Link | None: """ Parse linkType element. Args: link (ET.Element): Parsed link element. tag (str, Optional): XML tag. Defaults to "link". Returns: Link | None: Link instance. """ if link is None: return None return Link( link.get("href"), link.findtext("text", namespaces=self.xmlns), link.findtext("type", namespaces=self.xmlns), tag, ) def _parse_metadata( self, metadata: ET.Element, tag: str = "metadata" ) -> Metadata | None: """ Parse metadataType element. Args: metadata (ET.Element): Parsed metadata element. tag (str, Optional): XML tag. Defaults to "metadata". Returns: Metadata | None: Metadata instance. """ if metadata is None: return None return Metadata( metadata.findtext("name", namespaces=self.xmlns), metadata.findtext("desc", namespaces=self.xmlns), self._parse_person(metadata.find("author", self.xmlns)), self._parse_copyright(metadata.find("copyright", self.xmlns)), [self._parse_link(ll) for ll in metadata.findall("link", self.xmlns)], self.find_time(metadata, "time"), metadata.findtext("keywords", namespaces=self.xmlns), self._parse_bounds(metadata.find("bounds", self.xmlns)), self._parse_extensions(metadata.find("extensions", self.xmlns), tag), tag, ) def _parse_person(self, person: ET.Element, tag: str = "author") -> Person | None: """ Parse personType element. Args: person (ET.Element): Parsed person element. tag (str, Optional): XML tag. Defaults to "person". Returns: Person | None: Person instance. """ if person is None: return None return Person( person.findtext("name", namespaces=self.xmlns), self._parse_email(person.find("email", self.xmlns)), self._parse_link(person.find("link", self.xmlns)), tag, ) def _parse_point_segment( self, point_segment: ET.Element, tag: str = "ptseg" ) -> Ptseg | None: """ Parse ptsegType element. Args: point_segment (ET.Element): Parsed point segment element. tag (str, optional): XML tag. Defaults to "ptseg". Returns: Ptseg | None: Ptseg instance. """ if point_segment is None: return None return Ptseg([self._parse_point(p) for p in point_segment.findall("pt")], tag) def _parse_point(self, point: ET.Element, tag: str = "pt") -> Pt | None: """ Parse ptType element. Args: point (ET.Element): Parsed point element. tag (str, Optional): XML tag. Defaults to "pt". Returns: Point | None: Point instance. """ if point is None: return None return Pt( point.get("lat"), point.get("lon"), self.find_float(point, "ele"), self.find_time(point, "time"), tag, ) def _parse_route(self, route: ET.Element, tag: str = "rte") -> Rte | None: """ Parse rteType element. Args: route (ET.Element): Parsed route element. tag (str, Optional): XML tag. Defaults to "rte". Returns: Rte | None: Rte instance. """ if route is None: return None return Rte( route.findtext("name", namespaces=self.xmlns), route.findtext("cmt", namespaces=self.xmlns), route.findtext("desc", namespaces=self.xmlns), route.findtext("src", namespaces=self.xmlns), [self._parse_link(ll) for ll in route.findall("link", self.xmlns)], self.find_int(route, "number"), route.findtext("type", namespaces=self.xmlns), self._parse_extensions(route.find("extensions", self.xmlns), tag), [ self._parse_waypoint(w, "rtept") for w in route.findall("rtept", self.xmlns) ], tag, ) def _parse_track_segment( self, track_segment: ET.Element, tag: str = "trkseg" ) -> Trkseg | None: """ Parse trksegType element. Args: track_segment (ET.Element): Parsed track segment element. tag (str, Optional): XML tag. Defaults to "trkseg". Returns: Trkseg | None: Trkseg instance. """ if track_segment is None: return None return Trkseg( [ self._parse_waypoint(track_point, "trkpt") for track_point in track_segment.findall("trkpt", self.xmlns) ], self._parse_extensions(track_segment.find("extensions", self.xmlns), tag), tag, ) def _parse_track(self, track: ET.Element, tag: str = "trk") -> Trk | None: """ Parse trkType element. Args: track (ET.Element): Parsed track element. tag (str, Optional): XML tag. Defaults to "trk". Returns: Trk | None: Trk instance. """ if track is None: return None return Trk( track.findtext("name", namespaces=self.xmlns), track.findtext("cmt", namespaces=self.xmlns), track.findtext("desc", namespaces=self.xmlns), track.findtext("src", namespaces=self.xmlns), [self._parse_link(ll) for ll in track.findall("link", self.xmlns)], self.find_int(track, "number"), track.findtext("type", namespaces=self.xmlns), self._parse_extensions(track.find("extensions", self.xmlns), tag), [self._parse_track_segment(t) for t in track.findall("trkseg", self.xmlns)], tag, ) def _parse_waypoint(self, waypoint: ET.Element, tag: str = "wpt") -> Wpt | None: """ Parse wptType element. Args: waypoint (ET.Element): Parsed waypoint element. tag (str, Optional): XML tag. Defaults to "wpt". Returns: Wpt | None: Wpt instance. """ if waypoint is None: return None return Wpt( waypoint.get("lat"), waypoint.get("lon"), self.find_float(waypoint, "ele"), self.find_time(waypoint, "time"), self.find_float(waypoint, "magvar"), self.find_float(waypoint, "geoidheight"), waypoint.findtext("name", namespaces=self.xmlns), waypoint.findtext("cmt", namespaces=self.xmlns), waypoint.findtext("desc", namespaces=self.xmlns), waypoint.findtext("src", namespaces=self.xmlns), [self._parse_link(ll) for ll in waypoint.findall("link", self.xmlns)], waypoint.findtext("sym", namespaces=self.xmlns), waypoint.findtext("type", namespaces=self.xmlns), waypoint.findtext("fix", namespaces=self.xmlns), self.find_int(waypoint, "sat"), self.find_float(waypoint, "hdop"), self.find_float(waypoint, "vdop"), self.find_float(waypoint, "pdop"), self.find_float(waypoint, "ageofdgpsdata"), self.find_float(waypoint, "dgpsid"), self._parse_extensions(waypoint.find("extensions", self.xmlns), tag), tag, ) def _parse_gpx(self): """ Parse XML properties. """ self.xmlns = { node[0]: node[1] for _, node in ET.iterparse(self.source, events=["start-ns"]) } schema_loc = self.xml_root.get( "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation" ).split(" ") schema_loc = [v for v in schema_loc if v != ""] self.xsi_schema_location = dict(pairwise(schema_loc)) self.gpx.creator = self.xml_root.attrib["creator"] self.gpx.version = self.xml_root.attrib["version"] self.gpx.metadata = self._parse_metadata( self.xml_root.find("metadata", self.xmlns) ) self.gpx.wpt = [ self._parse_waypoint(w) for w in self.xml_root.findall("wpt", self.xmlns) ] self.gpx.rte = [ self._parse_route(r) for r in self.xml_root.findall("rte", self.xmlns) ] self.gpx.trk = [ self._parse_track(t) for t in self.xml_root.findall("trk", self.xmlns) ] self.gpx.extensions = self._parse_extensions( self.xml_root.find("extensions", self.xmlns), "gpx" )
[docs] def parse(self) -> dict: """ Parse GPX file. Returns: dict: Gpx instance, precisions and time format. """ # Parse GPX file try: self.xml_tree = ET.parse(self.source) self.xml_root = self.xml_tree.getroot() self._parse_gpx() except Exception as e: raise Exception("Unable to parse GPX file.") from e # Check XML schemas self.xml_schemas() # Find precisions self._find_precisions() # Find time format self._find_time_format(self._find_time_element()) return { "gpx": self.gpx, "xmlns": self.xmlns, "xsi_schema_location": self.xsi_schema_location, "ele_data": self.ele_data, "time_data": self.time_data, "precisions": self.precisions, "time_format": self.time_format, "extensions_fields": self.extensions_fields, }