source: OpenWorkouts-current/ow/models/bulk.py @ 715671f

current
Last change on this file since 715671f was 715671f, checked in by Borja Lopez <borja@…>, 5 years ago

(#77) Bulk workouts upload:

  • Added methods to extract files from compressed bulk files, then load workouts from those files.
  • Added a task to process/load workouts from "not loaded" bulk files
  • Added full tests coverage
  • Property mode set to 100644
File size: 7.0 KB
Line 
1import os
2import logging
3from shutil import unpack_archive, rmtree
4from uuid import uuid1
5from datetime import datetime, timezone
6
7import pytz
8from repoze.folder import Folder
9from pyramid.security import Allow, Deny, Everyone, ALL_PERMISSIONS
10from fitparse.utils import FitHeaderError, FitEOFError
11from gpxpy.gpx import GPXXMLSyntaxException
12
13from ow.utilities import create_blob
14from ow.models.workout import Workout
15
16log = logging.getLogger(__name__)
17
18
19class BulkFile(Folder):
20
21    """
22    Object that maps to a compressed file uploaded by a user to upload several
23    workout tracking files at once.
24    """
25
26    __parent__ = __name__ = None
27
28    def __acl__(self):
29        """
30        Owner of the compressed file has full permissions to the file, as well
31        as system admins. Everybody else has no permissions.
32        """
33        permissions = [
34            (Allow, str(self.uid), 'view'),
35            (Allow, str(self.uid), 'edit'),
36            (Allow, str(self.uid), 'delete'),
37            (Deny, Everyone, ALL_PERMISSIONS)
38        ]
39        return permissions
40
41    def __init__(self, **kw):
42        super(BulkFile, self).__init__()
43        self.bfid = uuid1()
44        self.uid = kw['uid']  # required, so let it blow if none is given
45        self.uploaded = kw.get('uploaded', datetime.now(timezone.utc))
46        self.compressed_file = kw.get('compressed_file', None)  # Blob
47        self.file_name = ''  # unicode string
48        self.file_type = ''  # unicode string
49        self.loaded = False  # datetime when workouts have been loaded
50        self.workout_ids = []  # ids of the workouts loaded from this file
51        self.loaded_info = {}  # per-file information (loaded or not, errors..)
52
53    def _in_timezone(self, timezone, value):
54        """
55        Return a string representation of the given value  date and time,
56        localized into the given timezone
57        """
58        _value = value.astimezone(pytz.timezone(timezone))
59        return _value.strftime('%d/%m/%Y %H:%M (%Z)')
60
61    def uploaded_in_timezone(self, timezone):
62        return self._in_timezone(timezone, self.uploaded)
63
64    def loaded_in_timezone(self, timezone):
65        if self.loaded:
66            return self._in_timezone(timezone, self.loaded)
67        return ''
68
69    def extract(self, tmp_path, path):
70        """
71        Extract the files contained in this bulk/compressed file into the
72        given path.
73        """
74        if self.compressed_file is None:
75            return []
76
77        # save the blob into a temporal file
78        tmp_file_path = os.path.join(tmp_path, self.file_name)
79        with open(tmp_file_path, 'wb') as tmp_file:
80            with self.compressed_file.open() as blob:
81                tmp_file.write(blob.read())
82        # extract
83        unpack_archive(tmp_file_path, path)
84        # remove temporary file
85        os.remove(tmp_file_path)
86        # analyze the extracted contents, return some data
87        extracted = []
88        if os.path.exists(path):
89            for extracted_file in os.listdir(path):
90                extracted.append(os.path.join(path, extracted_file))
91        return extracted
92
93    def load(self, root, tmp_path):
94        user = root.get_user_by_uid(self.uid)
95        # extract
96        tmp_extract_path = os.path.join(tmp_path, str(self.bfid))
97        log.info(self.file_name + ' extracting to ' + tmp_extract_path)
98        extracted = self.extract(tmp_path, tmp_extract_path)
99        log.info(self.file_name + ' ' + str(len(extracted)) +
100                 ' files extracted')
101
102        # loop over extracted files and create the workouts, taking
103        # care of duplicates. Store some stats/info in a dict, so we can
104        # keep that somewhere, to show to the user later on
105        for extracted_file in extracted:
106            base_file_name = os.path.basename(extracted_file)
107            log_header = self.file_name + '/' + base_file_name
108            log.info(log_header + ' loading file')
109
110            file_extension = os.path.splitext(base_file_name)[1].strip('.')
111
112            # gpx files are text, but fit files are binary files
113            open_mode = 'r'
114            binary = False
115            if file_extension == 'fit':
116                open_mode = 'rb'
117                binary = True
118
119            with open(extracted_file, open_mode) as f_obj:
120                blob = create_blob(
121                    f_obj.read(), file_extension=file_extension, binary=binary)
122
123            workout = Workout()
124            workout.tracking_file = blob
125            workout.tracking_filetype = file_extension
126
127            try:
128                workout.load_from_file()
129            except (FitHeaderError, FitEOFError, GPXXMLSyntaxException) as e:
130                log.error(log_header + ' error loading tracking file ')
131                log.error(e)
132                self.loaded_info[base_file_name] = {
133                    'loaded': False,
134                    'error': 'tracking file load error',
135                    'workout': None,
136                }
137            else:
138                # check for duplicates
139                # hashed is not "complete" for a workout that has not been
140                # added yet, as it does not have the owner set, so we have to
141                # "build it"
142                hashed = str(self.uid) + workout.hashed
143                duplicate = root.get_workout_by_hash(hashed)
144                if duplicate:
145                    log.warning(
146                        log_header +
147                        ' cannot create workout, possible duplicate')
148                    self.loaded_info[base_file_name] = {
149                        'loaded': False,
150                        'error': 'Possible duplicate workout',
151                        'workout': None,
152                    }
153                else:
154                    # add the workout only if no errors happened
155                    user.add_workout(workout)
156                    log.info(log_header + ' workout added')
157                    self.loaded_info[base_file_name] = {
158                        'loaded': True,
159                        'error': None,
160                        'workout': workout.workout_id,
161                    }
162                    self.workout_ids.append(workout.workout_id)
163
164        # clean-up, we have to check if the temporary directory exists,
165        # as extract() won't create such directory if the compressed file
166        # is empty
167        if os.path.exists(tmp_extract_path):
168            rmtree(tmp_extract_path)
169
170        # mark this bulk file as loaded
171        self.loaded = datetime.now(timezone.utc)
172
173
174class BulkFiles(Folder):
175
176    """
177    Container for bulk upload compressed files
178    """
179
180    __parent__ = __name__ = None
181
182    def __acl__(self):
183        """
184        Everybody can view, super users can edit
185        """
186        permissions = [
187            (Allow, Everyone, 'view'),
188            (Allow, 'admins', 'edit'),
189            (Deny, Everyone, ALL_PERMISSIONS)
190        ]
191        return permissions
192
193    def add_bulk_file(self, bulk_file):
194        self[str(bulk_file.bfid)] = bulk_file
195
196    def get_by_uid(self, uid):
197        """
198        Return bulk files owned by the given uid
199        """
200        return [self[bf] for bf in self if self[bf].uid == str(uid)]
Note: See TracBrowser for help on using the repository browser.