Source code for ezgpx.parsers.kml_parser

"""
This module contains the KMLParser class.
"""

import io
import warnings
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import IO

from ..complex_types import Trk, Trkseg, Wpt
from .xml_parser import XMLParser


[docs] class KMLParser(XMLParser): """ KML file parser. """ def __init__( self, source: str | Path | IO[str] | IO[bytes] | bytes, xml_schemas: bool = True, xml_extensions_schemas: bool = False, ) -> None: """ KML file parser. Args: source (str | Path | IO[str] | IO[bytes] | bytes): Path to a file or a file-like object to parse. xml_schemas (bool, optional): Toggle schema verification during parsing. Defaults to True. xml_extensions_schemas (bool, optional): Toggle extensions schema verificaton durign parsing. Requires internet connection connection and is not guaranted to work. Defaults to False. Raises: ValueError: Source is not valid. """ # Bytes object if isinstance(source, bytes): source = io.BytesIO(source) # Initialise XMLParser and parse KML file super().__init__(source, xml_schemas, xml_extensions_schemas)
[docs] def find_precisions(self): """ Find decimal precision of any type of value in a KML file (latitude, elevation...). """ # Point documents = self.xml_root.findall("Document", self.xmlns) placemarks = documents[0].findall("Placemark", self.xmlns) linestrings = placemarks[0].findall("LineString", self.xmlns) coordinates = self.find_text(linestrings[0], "coordinates") coordinates = coordinates.replace("\n", "").replace("\t", "") if coordinates[-1] == " ": coordinates = coordinates[:-1] coordinates = coordinates.split(" ") coordinates = coordinates[0].split(",") self.precisions["lat_lon"] = self._find_precision(coordinates[0]) self.precisions["elevation"] = self._find_precision(coordinates[2])
# def parse_linestring(self, linestring) -> list[str]: # """ # Parse LineString element from KML file. # Args: # placemark (ET.Element): Parsed LineString element. # Returns: # list[str]: Informations contained in the LineString element (strings of coordinates). # """ # if linestring is None: # return None # linestrings_data = [] # coordinatess = linestring.findall("opengis:coordinates", self.name_space) # for coordinates in coordinatess: # linestrings_data.append(coordinates.text) # return linestrings_data
[docs] def parse_placemark(self, placemark) -> dict | None: """ Parse Placemark element from KML file. Args: placemark (ET.Element): Parsed Placemark element. Returns: dict | None: Informations contained in the Placemark element (the name of the Placemark (str) and the contents of the LineString (list[str])). """ if placemark is None: return None placemark_data = {} placemark_data["name"] = self.find_text(placemark, "name") placemark_data["linestrings_data"] = [] linestrings = placemark.findall("LineString", self.xmlns) for linestring in linestrings: placemark_data["linestrings_data"].append( self.find_text(linestring, "coordinates") ) return placemark_data
[docs] def parse_document(self, document) -> list[dict] | None: """ Parse Document element from KML file. Args: document (ET.Element): Parsed Document element. Returns: list[dict] | None: Informations related to Placemark elements contained in the Document element. """ if document is None: return None # name = self.find_text(document, "name") placemmarks_data = [] placemarks = document.findall("Placemark", self.xmlns) for placemark in placemarks: placemmarks_data.append(self.parse_placemark(placemark)) return placemmarks_data
[docs] def parse_root_document(self): """ Parse Document elements from KML file. """ documents = self.xml_root.findall("Document", self.xmlns) for document in documents: placemarks_data = self.parse_document(document) if len(placemarks_data) == 1: placemark_data = placemarks_data[0] linestrings_data = placemark_data["linestrings_data"] trkseg = [] for coordinates in linestrings_data: coordinates = coordinates.replace("\n", "").replace("\t", "") if coordinates[-1] == " ": coordinates = coordinates[:-1] coordinates = coordinates.split(" ") trkpt = [] for point_coord in coordinates: point_coord = point_coord.split(",") trkpt.append( Wpt( tag="trkpt", lat=float(point_coord[1]), lon=float(point_coord[0]), ele=float(point_coord[2]), ) ) trkseg.append(Trkseg(trkpt=trkpt)) tracks = [Trk(name=placemark_data["name"], trkseg=trkseg)] self.gpx.trk = tracks else: warnings.warn("Oops, not yet implemented...")
[docs] def add_properties(self): """ Add missing properties (properties that do not exist in KML/KMZ files but mandatory in GPX files). """ self.gpx.creator = "ezGPX" self.gpx.xmlns = "http://www.topografix.com/GPX/1/1" self.gpx.version = "1.1" self.gpx.xmlns_xsi = "http://www.w3.org/2001/XMLSchema-instance" self.gpx.xsi_schema_location = [ "http://www.topografix.com/GPX/1/1", "http://www.topografix.com/GPX/1/1/gpx.xsd", ]
[docs] def parse(self) -> dict: """ Parse KML file. Returns: dict: Gpx, precisions and time_format. Example: >>> # TODO """ # Parse KML file try: self.xml_tree = ET.parse(self.source) self.xml_root = self.xml_tree.getroot() except Exception as err: warnings.warn( "Unexpected %s, %s.\nUnable to parse KML file.", err, type(err) ) raise # Add properties self.add_properties() # Find precisions self.find_precisions() # Check XML schemas self.xml_schemas() # Parse Document try: self.parse_root_document() except: warnings.warn("Unable to parse tracks in GPX file.") raise return { "gpx": self.gpx, "precisions": self.precisions, "time_format": self.time_format, }