Changeset 715671f in OpenWorkouts-current


Ignore:
Timestamp:
Oct 13, 2019, 4:40:50 PM (4 years ago)
Author:
Borja Lopez <borja@…>
Branches:
current
Children:
93b23a6
Parents:
fcf0ef3
Message:

(#77) Bulk workouts upload:

  • Added methods to extract files from compressed bulk files, then load workouts from those files.
  • Added a task to process/load workouts from "not loaded" bulk files
  • Added full tests coverage
Files:
10 added
9 edited

Legend:

Unmodified
Added
Removed
  • .gitignore

    rfcf0ef3 r715671f  
    4747var/spool
    4848var/run/mail-queue-processor.lock
     49var/run/workout-bulk-loading.lock
    4950
    5051# log files
  • development.ini

    rfcf0ef3 r715671f  
    3939mail.username = noreply@openworkouts.org
    4040mail.password = PASSWORD
     41
     42workouts.bulk_loading_lock = %(here)s/var/run/workout-bulk-loading.lock
     43workouts.bulk_tmp_path = %(here)s/var/tmp/bulk
    4144
    4245
  • ow/models/bulk.py

    rfcf0ef3 r715671f  
     1import os
     2import logging
     3from shutil import unpack_archive, rmtree
    14from uuid import uuid1
    2 from datetime import datetime, timedelta, timezone
    3 
     5from datetime import datetime, timezone
     6
     7import pytz
    48from repoze.folder import Folder
    59from pyramid.security import Allow, Deny, Everyone, ALL_PERMISSIONS
    6 
    7 import pytz
     10from fitparse.utils import FitHeaderError, FitEOFError
     11from gpxpy.gpx import GPXXMLSyntaxException
     12
     13from ow.utilities import create_blob
     14from ow.models.workout import Workout
     15
     16log = logging.getLogger(__name__)
    817
    918
     
    4049        self.loaded = False  # datetime when workouts have been loaded
    4150        self.workout_ids = []  # ids of the workouts loaded from this file
     51        self.loaded_info = {}  # per-file information (loaded or not, errors..)
    4252
    4353    def _in_timezone(self, timezone, value):
     
    5666            return self._in_timezone(timezone, self.loaded)
    5767        return ''
     68
     69    def extract(self, tmp_path, path):
     70        """
     71        Extract the files contained in this bulk/compressed file into the
     72        given path.
     73        """
     74        if self.compressed_file is None:
     75            return []
     76
     77        # save the blob into a temporal file
     78        tmp_file_path = os.path.join(tmp_path, self.file_name)
     79        with open(tmp_file_path, 'wb') as tmp_file:
     80            with self.compressed_file.open() as blob:
     81                tmp_file.write(blob.read())
     82        # extract
     83        unpack_archive(tmp_file_path, path)
     84        # remove temporary file
     85        os.remove(tmp_file_path)
     86        # analyze the extracted contents, return some data
     87        extracted = []
     88        if os.path.exists(path):
     89            for extracted_file in os.listdir(path):
     90                extracted.append(os.path.join(path, extracted_file))
     91        return extracted
     92
     93    def load(self, root, tmp_path):
     94        user = root.get_user_by_uid(self.uid)
     95        # extract
     96        tmp_extract_path = os.path.join(tmp_path, str(self.bfid))
     97        log.info(self.file_name + ' extracting to ' + tmp_extract_path)
     98        extracted = self.extract(tmp_path, tmp_extract_path)
     99        log.info(self.file_name + ' ' + str(len(extracted)) +
     100                 ' files extracted')
     101
     102        # loop over extracted files and create the workouts, taking
     103        # care of duplicates. Store some stats/info in a dict, so we can
     104        # keep that somewhere, to show to the user later on
     105        for extracted_file in extracted:
     106            base_file_name = os.path.basename(extracted_file)
     107            log_header = self.file_name + '/' + base_file_name
     108            log.info(log_header + ' loading file')
     109
     110            file_extension = os.path.splitext(base_file_name)[1].strip('.')
     111
     112            # gpx files are text, but fit files are binary files
     113            open_mode = 'r'
     114            binary = False
     115            if file_extension == 'fit':
     116                open_mode = 'rb'
     117                binary = True
     118
     119            with open(extracted_file, open_mode) as f_obj:
     120                blob = create_blob(
     121                    f_obj.read(), file_extension=file_extension, binary=binary)
     122
     123            workout = Workout()
     124            workout.tracking_file = blob
     125            workout.tracking_filetype = file_extension
     126
     127            try:
     128                workout.load_from_file()
     129            except (FitHeaderError, FitEOFError, GPXXMLSyntaxException) as e:
     130                log.error(log_header + ' error loading tracking file ')
     131                log.error(e)
     132                self.loaded_info[base_file_name] = {
     133                    'loaded': False,
     134                    'error': 'tracking file load error',
     135                    'workout': None,
     136                }
     137            else:
     138                # check for duplicates
     139                # hashed is not "complete" for a workout that has not been
     140                # added yet, as it does not have the owner set, so we have to
     141                # "build it"
     142                hashed = str(self.uid) + workout.hashed
     143                duplicate = root.get_workout_by_hash(hashed)
     144                if duplicate:
     145                    log.warning(
     146                        log_header +
     147                        ' cannot create workout, possible duplicate')
     148                    self.loaded_info[base_file_name] = {
     149                        'loaded': False,
     150                        'error': 'Possible duplicate workout',
     151                        'workout': None,
     152                    }
     153                else:
     154                    # add the workout only if no errors happened
     155                    user.add_workout(workout)
     156                    log.info(log_header + ' workout added')
     157                    self.loaded_info[base_file_name] = {
     158                        'loaded': True,
     159                        'error': None,
     160                        'workout': workout.workout_id,
     161                    }
     162                    self.workout_ids.append(workout.workout_id)
     163
     164        # clean-up, we have to check if the temporary directory exists,
     165        # as extract() won't create such directory if the compressed file
     166        # is empty
     167        if os.path.exists(tmp_extract_path):
     168            rmtree(tmp_extract_path)
     169
     170        # mark this bulk file as loaded
     171        self.loaded = datetime.now(timezone.utc)
    58172
    59173
  • ow/schemas/bulk.py

    rfcf0ef3 r715671f  
    1 from formencode import Schema, validators
     1from formencode import Schema
    22
    33from ow.schemas.blob import FieldStorageBlob
  • ow/tasks/run.py

    rfcf0ef3 r715671f  
    33from ow.tasks.manager import TasksManager
    44from ow.tasks.mail import queue_processor
     5from ow.tasks.bulk import process_compressed_files
    56
    67
     
    1011    # "register" the tasks
    1112    tasks_manager.add_task('send_emails', queue_processor)
     13    tasks_manager.add_task('bulk_import', process_compressed_files)
    1214
    1315    if len(sys.argv) != 3:
  • ow/templates/bulk_files.pt

    rfcf0ef3 r715671f  
    2020    <div class="bulk-files ow-forms">
    2121      <a href="" class="back"
    22          tal:attributes="href request.resource_url(context)"
     22         tal:attributes="href request.resource_url(context, 'add-bulk-file')"
    2323         i18n:translate="">Back</a>
    2424      <h2 i18n:translate="">Multiple file uploads</h2>
     
    3636          <p>
    3737            <tal:loaded tal:condition="bulk_file.loaded">
    38               <span i18n:translate="">File loaded on></span>
     38              <span i18n:translate="">File loaded on</span>
    3939              <span tal:content="bulk_file.loaded_in_timezone(context.timezone)"></span>
     40              <ul>
     41                <tal:loaded_files tal:repeat="file_name bulk_file.loaded_info.keys()">
     42                  <li tal:define="file_info bulk_file.loaded_info[file_name]">
     43                    <span tal:content="file_name"></span>
     44                    <tal:file_loaded tal:condition="file_info['loaded']">
     45                      <span i18n:translate="">Loaded correctly</span>
     46                      <a href="" tal:attributes="href request.resource_url(context, file_info['workout'])"
     47                         i18n:translate="">See workout</a>
     48                    </tal:file_loaded>
     49                    <tal:file_not_loaded tal:condition="not file_info['loaded']">
     50                      <span i18n:translate="">Couldn't be loaded:</span>
     51                      <span tal:content="file_info['error']"></span>
     52                    </tal:file_not_loaded>
     53                  </li>
     54                </tal:loaded_files>
     55              </ul>
    4056            </tal:loaded>
    4157            <tal:not_loaded tal:condition="not bulk_file.loaded">
     
    4460          </p>
    4561
    46           <tal:workouts tal:condition="bulk_file.loaded and bulk_file.workout_ids">
    47             <ul class="bulk-loaded-workouts">
    48               <tal:workout tal:repeat="workout bulk_file.workouts">
    49                 <li>
    50                   <span tal:content="workout.start_in_timezone(context.timezone)"></span>,
    51                   <span tal:content="workout.sport"></span>,
    52                   <span tal:content="workout.title"></span>
    53                 </li>
    54               </tal:workout>
    55             </ul>
    56           </tal:workouts>
    57 
     62          <!--!
     63               <tal:workouts tal:condition="bulk_file.loaded and bulk_file.workout_ids">
     64               <ul class="bulk-loaded-workouts">
     65               <tal:workout tal:repeat="workout bulk_file.workouts">
     66               <li>
     67               <span tal:content="workout.start_in_timezone(context.timezone)"></span>,
     68               <span tal:content="workout.sport"></span>,
     69               <span tal:content="workout.title"></span>
     70               </li>
     71               </tal:workout>
     72               </ul>
     73               </tal:workouts>
     74          -->
    5875          <tal:no_workouts tal:condition="bulk_file.loaded and not bulk_file.workout_ids">
    5976            <p><span i18n:translate>No workouts have been loaded from this file</span></p>
  • ow/views/bulk.py

    rfcf0ef3 r715671f  
    1 from pyramid.httpexceptions import HTTPFound, HTTPNotFound
     1from pyramid.httpexceptions import HTTPFound
    22from pyramid.view import view_config
    3 from pyramid.response import Response
    43from pyramid_simpleform import Form
    54from pyramid_simpleform.renderers import FormRenderer
     
    2827    # our blob storage validator.
    2928    # dirty fix until formencode fixes its api.is_empty method
    30     if isinstance(request.POST.get('tracking_file', None), bytes):
    31         request.POST['tracking_file'] = ''
     29    if isinstance(request.POST.get('compressed_file', None), bytes):
     30        request.POST['compressed_file'] = ''
    3231
    3332    form = Form(request, schema=BulkFileSchema())
     
    5453
    5554
    56 
    5755@view_config(
    5856    context=User,
  • production.ini

    rfcf0ef3 r715671f  
    2929mail.username = USERNAME
    3030mail.password = PASSWORD
     31
     32# workout bulk loading tasks
     33workouts.bulk_loading_lock = %(here)s/var/run/workout-bulk-loading.lock
     34workouts.bulk_tmp_path = %(here)s/var/tmp/bulk
    3135
    3236
  • staging.ini

    rfcf0ef3 r715671f  
    2929mail.username = USERNAME
    3030mail.password = PASSWORD
     31
     32# workout bulk loading tasks
     33workouts.bulk_loading_lock = %(here)s/var/run/workout-bulk-loading.lock
     34workouts.bulk_tmp_path = %(here)s/var/tmp/bulk
    3135
    3236
Note: See TracChangeset for help on using the changeset viewer.