"""
This module contains the KMLParser class.
"""
import io
import warnings
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import IO
from ..complex_types import Trk, Trkseg, Wpt
from .xml_parser import XMLParser
[docs]
class KMLParser(XMLParser):
"""
KML file parser.
"""
def __init__(
self,
source: str | Path | IO[str] | IO[bytes] | bytes,
xml_schemas: bool = True,
xml_extensions_schemas: bool = False,
) -> None:
"""
KML file parser.
Args:
source (str | Path | IO[str] | IO[bytes] | bytes): Path to a
file or a file-like object to parse.
xml_schemas (bool, optional): Toggle schema
verification during parsing. Defaults to True.
xml_extensions_schemas (bool, optional): Toggle extensions
schema verificaton durign parsing. Requires internet connection
connection and is not guaranted to work. Defaults to False.
Raises:
ValueError: Source is not valid.
"""
# Bytes object
if isinstance(source, bytes):
source = io.BytesIO(source)
# Initialise XMLParser and parse KML file
super().__init__(source, xml_schemas, xml_extensions_schemas)
[docs]
def find_precisions(self):
"""
Find decimal precision of any type of value in a KML file (latitude, elevation...).
"""
# Point
documents = self.xml_root.findall("Document", self.xmlns)
placemarks = documents[0].findall("Placemark", self.xmlns)
linestrings = placemarks[0].findall("LineString", self.xmlns)
coordinates = self.find_text(linestrings[0], "coordinates")
coordinates = coordinates.replace("\n", "").replace("\t", "")
if coordinates[-1] == " ":
coordinates = coordinates[:-1]
coordinates = coordinates.split(" ")
coordinates = coordinates[0].split(",")
self.precisions["lat_lon"] = self._find_precision(coordinates[0])
self.precisions["elevation"] = self._find_precision(coordinates[2])
# def parse_linestring(self, linestring) -> list[str]:
# """
# Parse LineString element from KML file.
# Args:
# placemark (ET.Element): Parsed LineString element.
# Returns:
# list[str]: Informations contained in the LineString element (strings of coordinates).
# """
# if linestring is None:
# return None
# linestrings_data = []
# coordinatess = linestring.findall("opengis:coordinates", self.name_space)
# for coordinates in coordinatess:
# linestrings_data.append(coordinates.text)
# return linestrings_data
[docs]
def parse_placemark(self, placemark) -> dict | None:
"""
Parse Placemark element from KML file.
Args:
placemark (ET.Element): Parsed Placemark
element.
Returns:
dict | None: Informations contained in the Placemark
element (the name of the Placemark (str) and the
contents of the LineString (list[str])).
"""
if placemark is None:
return None
placemark_data = {}
placemark_data["name"] = self.find_text(placemark, "name")
placemark_data["linestrings_data"] = []
linestrings = placemark.findall("LineString", self.xmlns)
for linestring in linestrings:
placemark_data["linestrings_data"].append(
self.find_text(linestring, "coordinates")
)
return placemark_data
[docs]
def parse_document(self, document) -> list[dict] | None:
"""
Parse Document element from KML file.
Args:
document (ET.Element): Parsed Document element.
Returns:
list[dict] | None: Informations related to Placemark
elements contained in the Document element.
"""
if document is None:
return None
# name = self.find_text(document, "name")
placemmarks_data = []
placemarks = document.findall("Placemark", self.xmlns)
for placemark in placemarks:
placemmarks_data.append(self.parse_placemark(placemark))
return placemmarks_data
[docs]
def parse_root_document(self):
"""
Parse Document elements from KML file.
"""
documents = self.xml_root.findall("Document", self.xmlns)
for document in documents:
placemarks_data = self.parse_document(document)
if len(placemarks_data) == 1:
placemark_data = placemarks_data[0]
linestrings_data = placemark_data["linestrings_data"]
trkseg = []
for coordinates in linestrings_data:
coordinates = coordinates.replace("\n", "").replace("\t", "")
if coordinates[-1] == " ":
coordinates = coordinates[:-1]
coordinates = coordinates.split(" ")
trkpt = []
for point_coord in coordinates:
point_coord = point_coord.split(",")
trkpt.append(
Wpt(
tag="trkpt",
lat=float(point_coord[1]),
lon=float(point_coord[0]),
ele=float(point_coord[2]),
)
)
trkseg.append(Trkseg(trkpt=trkpt))
tracks = [Trk(name=placemark_data["name"], trkseg=trkseg)]
self.gpx.trk = tracks
else:
warnings.warn("Oops, not yet implemented...")
[docs]
def add_properties(self):
"""
Add missing properties (properties that do not exist in KML/KMZ
files but mandatory in GPX files).
"""
self.gpx.creator = "ezGPX"
self.gpx.xmlns = "http://www.topografix.com/GPX/1/1"
self.gpx.version = "1.1"
self.gpx.xmlns_xsi = "http://www.w3.org/2001/XMLSchema-instance"
self.gpx.xsi_schema_location = [
"http://www.topografix.com/GPX/1/1",
"http://www.topografix.com/GPX/1/1/gpx.xsd",
]
[docs]
def parse(self) -> dict:
"""
Parse KML file.
Returns:
dict: Gpx, precisions and time_format.
Example:
>>> # TODO
"""
# Parse KML file
try:
self.xml_tree = ET.parse(self.source)
self.xml_root = self.xml_tree.getroot()
except Exception as err:
warnings.warn(
"Unexpected %s, %s.\nUnable to parse KML file.", err, type(err)
)
raise
# Add properties
self.add_properties()
# Find precisions
self.find_precisions()
# Check XML schemas
self.xml_schemas()
# Parse Document
try:
self.parse_root_document()
except:
warnings.warn("Unable to parse tracks in GPX file.")
raise
return {
"gpx": self.gpx,
"precisions": self.precisions,
"time_format": self.time_format,
}