1 | import re |
---|
2 | import datetime |
---|
3 | from unidecode import unidecode |
---|
4 | from xml.dom import minidom |
---|
5 | from decimal import Decimal |
---|
6 | |
---|
7 | |
---|
8 | def slugify(text, delim=u'-'): |
---|
9 | """ |
---|
10 | Generates an ASCII-only slug. |
---|
11 | from http://flask.pocoo.org/snippets/5/ |
---|
12 | """ |
---|
13 | _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') |
---|
14 | result = [] |
---|
15 | text = unidecode(text) |
---|
16 | for word in _punct_re.split(text.lower()): |
---|
17 | result.extend(word.split()) |
---|
18 | return delim.join(result) |
---|
19 | |
---|
20 | |
---|
21 | class GPXMinidomParser(object): |
---|
22 | """ |
---|
23 | GPX parser, using minidom from the base library. |
---|
24 | |
---|
25 | We need this as a workaround, as gpxpy does not handle GPX 1.1 extensions |
---|
26 | correctly right now (and we have not been able to fix it). |
---|
27 | |
---|
28 | This method is inspired by this blog post: |
---|
29 | |
---|
30 | http://castfortwo.blogspot.com.au/2014/06/ |
---|
31 | parsing-strava-gpx-file-with-python.html |
---|
32 | """ |
---|
33 | |
---|
34 | def __init__(self, gpx_path): |
---|
35 | self.gpx_path = gpx_path |
---|
36 | self.gpx = None |
---|
37 | self.tracks = {} |
---|
38 | |
---|
39 | def load_gpx(self): |
---|
40 | """ |
---|
41 | Load the given gpx file into a minidom doc, normalize it and set |
---|
42 | self.gpx to the document root so we can reuse it later on |
---|
43 | """ |
---|
44 | doc = minidom.parse(self.gpx_path) |
---|
45 | doc.normalize() |
---|
46 | self.gpx = doc.documentElement |
---|
47 | |
---|
48 | def parse_tracks(self): |
---|
49 | """ |
---|
50 | Loop over all the tracks found in the gpx, parsing them |
---|
51 | """ |
---|
52 | for trk in self.gpx.getElementsByTagName('trk'): |
---|
53 | self.parse_track(trk) |
---|
54 | |
---|
55 | def parse_track(self, trk): |
---|
56 | """ |
---|
57 | Parse the given track, extracting all the information and putting it |
---|
58 | into a dict where the key is the track name and the value is a list |
---|
59 | of data for the the different segments and points in the track. |
---|
60 | |
---|
61 | All the data is saved in self.tracks |
---|
62 | """ |
---|
63 | name = trk.getElementsByTagName('name')[0].firstChild.data |
---|
64 | if name not in self.tracks: |
---|
65 | self.tracks[name] = [] |
---|
66 | |
---|
67 | for trkseg in trk.getElementsByTagName('trkseg'): |
---|
68 | for trkpt in trkseg.getElementsByTagName('trkpt'): |
---|
69 | lat = Decimal(trkpt.getAttribute('lat')) |
---|
70 | lon = Decimal(trkpt.getAttribute('lon')) |
---|
71 | |
---|
72 | # There could happen there is no elevation data |
---|
73 | ele = trkpt.getElementsByTagName('ele') |
---|
74 | if ele: |
---|
75 | ele = Decimal(ele[0].firstChild.data) |
---|
76 | else: |
---|
77 | ele = None |
---|
78 | |
---|
79 | rfc3339 = trkpt.getElementsByTagName('time')[0].firstChild.data |
---|
80 | try: |
---|
81 | t = datetime.datetime.strptime( |
---|
82 | rfc3339, '%Y-%m-%dT%H:%M:%S.%fZ') |
---|
83 | except ValueError: |
---|
84 | t = datetime.datetime.strptime( |
---|
85 | rfc3339, '%Y-%m-%dT%H:%M:%SZ') |
---|
86 | |
---|
87 | hr = None |
---|
88 | cad = None |
---|
89 | atemp = None |
---|
90 | extensions = trkpt.getElementsByTagName('extensions') |
---|
91 | if extensions: |
---|
92 | extensions = extensions[0] |
---|
93 | trkPtExt = extensions.getElementsByTagName( |
---|
94 | 'gpxtpx:TrackPointExtension')[0] |
---|
95 | if trkPtExt: |
---|
96 | hr_ext = trkPtExt.getElementsByTagName('gpxtpx:hr') |
---|
97 | cad_ext = trkPtExt.getElementsByTagName('gpxtpx:cad') |
---|
98 | atemp_ext = trkPtExt.getElementsByTagName( |
---|
99 | 'gpxtpx:atemp') |
---|
100 | if hr_ext: |
---|
101 | hr = Decimal(hr_ext[0].firstChild.data) |
---|
102 | if cad_ext: |
---|
103 | cad = Decimal(cad_ext[0].firstChild.data) |
---|
104 | if atemp_ext: |
---|
105 | atemp = Decimal(atemp_ext[0].firstChild.data) |
---|
106 | |
---|
107 | self.tracks[name].append({ |
---|
108 | 'lat': lat, |
---|
109 | 'lon': lon, |
---|
110 | 'ele': ele, |
---|
111 | 'time': t, |
---|
112 | 'hr': hr, |
---|
113 | 'cad': cad, |
---|
114 | 'atemp': atemp}) |
---|