diff --git a/osm/read_osm.py b/osm/read_osm.py index a31ccac..0a2ba0a 100644 --- a/osm/read_osm.py +++ b/osm/read_osm.py @@ -4,6 +4,8 @@ from osm.osm_types import OSMNode, OSMWay from osm.way_parser_helper import WayParserHelper from osm.xml_handler import NodeHandler, WayHandler, PercentageFile +from osm.xml_handler_et import NodeHandlerET +from osm.xml_handler_lxml import NodeHandlerLXML import utils.timer as timer from typing import Dict, List, Set, Tuple @@ -19,7 +21,7 @@ def read_file(osm_filename, configuration) -> Tuple[Dict[int, OSMNode], List[OSM nodes = _read_nodes(decompressed_content, found_node_ids) else: ways, found_node_ids = _read_ways(PercentageFile(osm_filename), parserHelper) - nodes = _read_nodes(PercentageFile(osm_filename), found_node_ids) + nodes = _read_nodes(osm_filename, found_node_ids) return nodes, ways @@ -57,13 +59,25 @@ def _read_ways(osm_file, configuration) -> Tuple[List[OSMWay], Set[int]]: @timer.timer def _read_nodes(osm_file, found_nodes) -> Dict[int, OSMNode]: - parser = xml.sax.make_parser() - n_handler = NodeHandler(found_nodes) - - parser.setContentHandler(n_handler) - if isinstance(osm_file, PercentageFile): - parser.parse(osm_file) - else: - xml.sax.parseString(osm_file, n_handler) + val = 2 # TODO: modify to change parser + + if val == 0: + osm_file = PercentageFile(osm_file) + parser = xml.sax.make_parser() + n_handler = NodeHandler(found_nodes) + + parser.setContentHandler(n_handler) + if isinstance(osm_file, PercentageFile): + parser.parse(osm_file) + else: + xml.sax.parseString(osm_file, n_handler) + + elif val == 1: + osm_file = PercentageFile(osm_file) + n_handler = NodeHandlerET(found_nodes) + n_handler.parse(osm_file) + elif val == 2: + n_handler = NodeHandlerLXML(found_nodes) + n_handler.parse(osm_file) return n_handler.nodes diff --git a/osm/xml_handler_et.py b/osm/xml_handler_et.py new file mode 100644 index 0000000..feb2735 --- /dev/null +++ b/osm/xml_handler_et.py @@ -0,0 +1,22 @@ +from typing import Set, Dict +import xml.etree.ElementTree as ET + +from osm.osm_types import OSMNode + + +class NodeHandlerET: + def __init__(self, found_nodes: Set[int]) -> None: + self.found_nodes: Set[int] = found_nodes + self.nodes: Dict[int, OSMNode] = {} + + def parse(self, osm_file) -> None: + for event, elem in ET.iterparse(osm_file): + if event == "end" and elem.tag == "node": + osm_id = int(elem.attrib["id"]) + if osm_id not in self.found_nodes: + elem.clear() + continue + self.nodes[osm_id] = OSMNode( + osm_id, float(elem.attrib["lat"]), float(elem.attrib["lon"]) + ) + elem.clear() diff --git a/osm/xml_handler_lxml.py b/osm/xml_handler_lxml.py new file mode 100644 index 0000000..a944389 --- /dev/null +++ b/osm/xml_handler_lxml.py @@ -0,0 +1,23 @@ +from typing import Set, Dict +from lxml import etree + +from osm.osm_types import OSMNode + + +class NodeHandlerLXML: + def __init__(self, found_nodes: Set[int]) -> None: + self.found_nodes: Set[int] = found_nodes + self.nodes: Dict[int, OSMNode] = {} + + def parse(self, osm_file) -> None: + context = etree.iterparse(osm_file) + for event, elem in context: + if event == "end" and elem.tag == "node": + osm_id = int(elem.attrib["id"]) + if osm_id not in self.found_nodes: + # elem.clear() + continue + self.nodes[osm_id] = OSMNode( + osm_id, float(elem.attrib["lat"]), float(elem.attrib["lon"]) + ) + # elem.clear()