From 39062f35eb06c40e9ba03a38734b58b4c5bd16eb Mon Sep 17 00:00:00 2001
From: Nathaniel Kofalt <nathaniel@kofalt.com>
Date: Fri, 20 May 2016 15:56:40 -0500
Subject: [PATCH] Add /jobs/x/retry

---
 api/api.py           |  1 +
 api/jobs/handlers.py | 40 ++++++++++++++++++++++++++++++++++++++++
 api/jobs/jobs.py     |  3 +++
 api/jobs/queue.py    | 13 +++++++++++++
 4 files changed, 57 insertions(+)

diff --git a/api/api.py b/api/api.py
index 74b1acba..b1959619 100644
--- a/api/api.py
+++ b/api/api.py
@@ -209,6 +209,7 @@ routes = [
         webapp2.Route(r'/reap',             JobsHandler, handler_method='reap_stale', methods=['POST']),
         webapp2.Route(r'/add',              JobsHandler, handler_method='add', methods=['POST']),
         webapp2.Route(r'/<:[^/]+>',         JobHandler,  name='job'),
+        webapp2.Route(r'/<:[^/]+>/retry',   JobHandler,  name='job', handler_method='retry', methods=['POST']),
     ]),
     webapp2.Route(r'/api/gears',             GearsHandler),
     webapp2_extras.routes.PathPrefixRoute(r'/api/gears', [
diff --git a/api/jobs/handlers.py b/api/jobs/handlers.py
index 1309845f..35eb8f3c 100644
--- a/api/jobs/handlers.py
+++ b/api/jobs/handlers.py
@@ -267,3 +267,43 @@ class JobHandler(base.RequestHandler):
 
         j = Job.get(_id)
         Queue.mutate(j, self.request.json)
+
+    def retry(self, _id):
+        """
+        .. http:post:: /api/jobs/(jid)/retry
+
+            Retry a job.
+
+            The job must have a state of 'failed', and must not have already been retried.
+            Returns the id of the new, generated job.
+
+            :statuscode 200: no error
+
+            **Example request**:
+
+            .. sourcecode:: http
+
+                POST /api/jobs/3/retry HTTP/1.1
+
+            **Example response**:
+
+            .. sourcecode:: http
+
+                HTTP/1.1 200 OK
+                Vary: Accept-Encoding
+                Content-Type: application/json; charset=utf-8
+                {
+                    "_id": "573cb66b135d87002660597c"
+                }
+        """
+
+        j = Job.get(_id)
+
+        # Permission check
+        if not self.superuser_request:
+            for x in j.inputs:
+                j.inputs[x].check_access(self.uid, 'ro')
+            j.destination.check_access(self.uid, 'rw')
+
+        new_id = Queue.retry(j, force=True)
+        return { "_id": new_id }
diff --git a/api/jobs/jobs.py b/api/jobs/jobs.py
index 22163c7a..4d4cb72d 100644
--- a/api/jobs/jobs.py
+++ b/api/jobs/jobs.py
@@ -94,6 +94,9 @@ class Job(object):
     @classmethod
     def get(cls, _id):
         doc = config.db.jobs.find_one({'_id': bson.ObjectId(_id)})
+        if doc is None:
+            raise Exception('Job not found')
+
         return cls.load(doc)
 
     def map(self):
diff --git a/api/jobs/queue.py b/api/jobs/queue.py
index d63b97e2..a989a812 100644
--- a/api/jobs/queue.py
+++ b/api/jobs/queue.py
@@ -66,6 +66,7 @@ class Queue(object):
 
         # If the job did not succeed, check to see if job should be retried.
         if 'state' in mutation and mutation['state'] == 'failed':
+            job.state = 'failed'
             Queue.retry(job)
 
     @staticmethod
@@ -79,6 +80,16 @@ class Queue(object):
             log.info('Permanently failed job %s (after %d attempts)' % (job._id, job.attempt))
             return
 
+        if job.state != 'failed':
+            raise Exception('Can only retry a job that is failed')
+
+        # Race condition: jobs should only be marked as failed once a new job has been spawned for it (if any).
+        # No transactions in our database, so we can't do that.
+        # Instead, make a best-hope attempt.
+        check = config.db.jobs.find_one({'previous_job_id': job._id })
+        if check is not None:
+            found = Job.load(check)
+            raise Exception('Job ' + job._id + ' has already been retried as ' + str(found._id))
 
         new_job = copy.deepcopy(job)
         new_job._id = None
@@ -94,6 +105,8 @@ class Queue(object):
         new_id = new_job.insert()
         log.info('respawned job %s as %s (attempt %d)' % (job._id, new_id, new_job.attempt))
 
+        return new_id
+
     @staticmethod
     def start_job(tags=None):
         """
-- 
GitLab