tracing.py 14.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2019, The TensorFlow Federated Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for instrumenting code with timing and tracing data.

This module provides several functions for preserving trace context across
various boundaries, namely between asyncio and regular python code:

19
  * wrap_coroutine_in_trace_context wraps a coroutine such that it
20
    inherits the ambient trace context. It should be used when executing a
21
22
    coroutine that should inherit trace context from the current thread or
    task.
23
24
25
26
27
28
29
30
31
32
33
  * EventLoops should use the Task factory provided by
    propagate_trace_context_task_factory by calling
    `set_task_factory(propagate_trace_context_task_factory)`.
"""

import abc
import asyncio
import contextlib
import functools
import inspect
import random
34
35
import sys
import threading
36
import time
37
from typing import Any, ContextManager, Dict, Generator, Generic, Iterator, List, Optional, Tuple, TypeVar, Union
38
39
40
41
42
43

from absl import logging

from tensorflow_federated.python.common_libs import py_typecheck


44
45
class TracedSpan():
  """The trace was wrapping a non-function span.
46

47
48
  This value will be given back from `TracingProvider::span`'s first `yield`
  if the trace was being used to wrap a `span` rather than a whole function.
49
  """
50
  pass
51
52


53
54
class TracedFunctionReturned():
  """The traced function returned successfully.
55

56
57
58
59
  This value will be given back from `TracingProvider::span`'s first `yield`
  if the function being traced returned normally. The return value will be kept
  in the `value` field.
  """
60

61
62
  def __init__(self, value):
    self.value = value
63
64


65
66
class TracedFunctionThrew():
  """The traced function threw an exception.
67

68
69
70
  This value will be given back from `TracingProvider::span`'s first `yield`
  if the function being traced threw an exception.
  """
71

72
73
74
75
  def __init__(self, error_type, error_value, traceback):
    self.error_type = error_type
    self.error_value = error_value
    self.traceback = traceback
76
77


78
TraceResult = Union[TracedSpan, TracedFunctionReturned, TracedFunctionThrew]
79

80
T = TypeVar('T')
81
82


83
84
class TracingProvider(Generic[T], metaclass=abc.ABCMeta):
  """Abstract base class for tracers."""
Taylor Cramer's avatar
Taylor Cramer 已提交
85

86
  @abc.abstractmethod
87
88
89
90
91
92
93
94
95
96
97
  def span(
      self,
      scope: str,
      sub_scope: str,
      nonce: int,
      parent_span_yield: Optional[T],
      fn_args: Optional[Tuple[Any, ...]],
      fn_kwargs: Optional[Dict[str, Any]],
      trace_opts: Dict[str, Any],
  ) -> Generator[T, TraceResult, None]:
    """Create a new tracing span.
98
99

    Args:
100
101
102
103
104
105
106
107
108
109
110
111
112
      scope: String name of the scope, often the class name.
      sub_scope: String name of the sub-scope, often the function name.
      nonce: Number used to correlate tracing messages relating to the same
        function invocation.
      parent_span_yield: The value yielded by the most recently started (and not
        exited) call to `span` on this `TracingProvider` on the current
        `asyncio.Task` or thread (when running outside of an async context).
      fn_args: When this tracing provider wraps a function, this will be a tuple
        containing all of the non-keyword function arguments.
      fn_kwargs: When this tracing provider wraps a function, this will be a
        dict containing all of the keyword function arguments.
      trace_opts: User-provided options to the span constructor.
        `TracingProvider`s should ignore unknown options.
113
114

    Returns:
115
116
117
118
      A `Generator` which will be immediately started and run up until it
      yields for the first time. The value yielded by this `Generator`
      will be passed on to nested calls to `span`. When the spanned code ends,
      a `TraceResult` will be passed back through the `yield`.
119
    """
120
121
122
123
124
125
126
127
128
129
    raise NotImplementedError

  def wrap_rpc(self, parent_span_yield: Optional[T]) -> ContextManager[None]:
    """Wrap an RPC call so that it can carry over the `parent_span_yield`."""
    del parent_span_yield
    return _null_context()

  def receive_rpc(self) -> Optional[T]:
    """Unpack `parent_span_yield` from the receiving end of an RPC."""
    return None
130
131
132
133
134
135
136
137
138


class LoggingTracingProvider(TracingProvider):
  """Implements TracingProvider and outputs the results via logging.

  This implementation does not require storing additional trace context state,
  so most methods are no-ops.
  """

139
140
  def __init__(self):  # pylint: disable=super-init-not-called
    pass
141

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  def span(
      self,
      scope: str,
      sub_scope: str,
      nonce: int,
      parent_span_yield: Optional[None],
      fn_args: Optional[Tuple[Any, ...]],
      fn_kwargs: Optional[Dict[str, Any]],
      trace_opts: Dict[str, Any],
  ) -> Generator[None, TraceResult, None]:
    assert parent_span_yield is None
    del parent_span_yield, fn_args, fn_kwargs, trace_opts
    start_time = time.time()
    logging.debug('(%s) Entering %s.%s', nonce, scope, sub_scope)
    yield None
    logging.debug('(%s) Exiting %s.%s. Elapsed time %f', nonce, scope,
                  sub_scope,
                  time.time() - start_time)
160
161


162
_global_tracing_providers = [LoggingTracingProvider()]
163
164


165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def trace(fn=None, **trace_kwargs):
  """Delegates to the current global `TracingProvider`.

  Note that this function adds a layer of indirection so that the decoration
  happens when the method is executed. This is necessary so that the current
  TracingProvider is used.

  Args:
    fn: Function to decorate.
    **trace_kwargs: Tracing options. Supported options differ by tracing
      provider.

  Returns:
    Decorated instance of fn.
  """
  if fn is None:
    return functools.partial(trace, **trace_kwargs)

183
  scope, sub_scope = _func_to_class_and_method(fn)
184

185
186
187
  # Note: in a classic "what color is your function" situation,
  # we unfortunately have to duplicate the wrapping of the
  # underlying function in order to cover both the sync and async cases.
188
  if inspect.iscoroutinefunction(fn):
189

190
    @functools.wraps(fn)
191
    async def async_trace(*fn_args, **fn_kwargs):
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
      # Produce the span generator
      span_gen = _span_generator(
          scope, sub_scope, trace_kwargs, fn_args=fn_args, fn_kwargs=fn_kwargs)
      # Run up until the first yield
      next(span_gen)
      completed = False
      # Run the underlying function, recording the resulting value or exception
      # and passing it back to the span generator
      try:
        result = await fn(*fn_args, **fn_kwargs)
        completed = True
        try:
          span_gen.send(TracedFunctionReturned(result))
        except StopIteration:
          pass
        return result
      except:
        if not completed:
          error_type, error_value, traceback = sys.exc_info()
          try:
            span_gen.send(
                TracedFunctionThrew(error_type, error_value, traceback))
          except StopIteration:
            pass
        raise
217
218
219

    return async_trace
  else:
220

221
    @functools.wraps(fn)
222
    def sync_trace(*fn_args, **fn_kwargs):
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
      span_gen = _span_generator(
          scope, sub_scope, trace_kwargs, fn_args=fn_args, fn_kwargs=fn_kwargs)
      next(span_gen)
      completed = False
      try:
        result = fn(*fn_args, **fn_kwargs)
        completed = True
        try:
          span_gen.send(TracedFunctionReturned(result))
        except StopIteration:
          pass
        return result
      except:
        if not completed:
          error_type, error_value, traceback = sys.exc_info()
          try:
            span_gen.send(
                TracedFunctionThrew(error_type, error_value, traceback))
          except StopIteration:
            pass
        raise
244
245

    return sync_trace
246
247


248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# The code below manages the active "span yields" for a task or thread.
# Here's a quick summary of how that works.
#
# A "span yield" is a value `yield`ed by the `TracingProvider.span` function.
# The span yields for the current encompassing span need to be tracked so that
# they can be passed to new calls to `span` as the `parent_span_yield`.
#
# Typically, these would be tracked with a thread-local. However, async tasks
# can interleave on a single thread, so it makes more sense for them to track
# "task locals".
#
# `_current_span_yields` and `_set_span_yields` below handle the logic of
# tracking these spans. If we're in an async context, they'll read and write
# to the current async tasks, but fall back to using a thread local if we're
# in a synchronous context.
263

264
265
# A single yielded value for each currently-active TracingProvider.
SpanYields = List[Any]
266

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

class ThreadLocalSpanYields(threading.local):
  """The span set for the current thread.

  This is only used when outside of an async context.
  """

  def __init__(self):
    super().__init__()
    self._span_yields: Optional[SpanYields] = None

  def set(self, span_yields: Optional[SpanYields]):
    self._span_yields = span_yields

  def get(self) -> Optional[SpanYields]:
    return self._span_yields


_non_async_span_yields = ThreadLocalSpanYields()


def _current_task() -> Optional[asyncio.Task]:
289
290
291
  """Get the current running task, or `None` if no task is running."""
  # Note: `current_task` returns `None` if there is no current task, but it
  # throws if no currently running async loop.
292
  try:
293
294
295
296
297
298
    # asyncio.Task.current_task was moved to asyncio.current_task() starting
    # with 3.9, but previoulsy was asyncio.Task.current() until Python 3.6.
    if sys.version_info[1] >= 7:
      return asyncio.current_task()
    else:
      return asyncio.Task.current_task()
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  except RuntimeError:
    return None


def _current_span_yields() -> SpanYields:
  """Returns the current parent span yield list."""
  task = _current_task()
  if task is None:
    # There is no current task, so we're not running in an async context.
    # Grab the spans from the current thread.
    spans = _non_async_span_yields.get()
  else:
    spans = getattr(task, 'trace_span_yields', None)
  if spans is None:
    spans = [None for _ in range(len(_global_tracing_providers))]
  assert len(_global_tracing_providers) == len(spans)
  return spans


def _set_span_yields(span_yields: Optional[SpanYields]):
  """Sets the current parent span list."""
  task = _current_task()
  if task is None:
    # There is no current task, so we're not running in an async context.
    # Set the spans for the current thread.
    _non_async_span_yields.set(span_yields)
  else:
    setattr(task, 'trace_span_yields', span_yields)


@contextlib.contextmanager
def _with_span_yields(span_yields: Optional[SpanYields]):
  """Context manager which sets and unsets the current parent span list."""
  old_span_yields = _current_span_yields()
  _set_span_yields(span_yields)
  yield None
  _set_span_yields(old_span_yields)


@contextlib.contextmanager
def span(scope, sub_scope, **trace_opts):
  """Creates a `ContextManager` that wraps the code in question with a span."""
  span_gen = _span_generator(scope, sub_scope, trace_opts)
  next(span_gen)
  yield
344
345
346
347
  try:
    span_gen.send(TracedSpan())
  except StopIteration:
    pass
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379


def _span_generator(scope,
                    sub_scope,
                    trace_opts,
                    fn_args=None,
                    fn_kwargs=None) -> Generator[None, TraceResult, None]:
  """Wraps up all the `TracingProvider.span` generators into one."""
  # Create a nonce so that all of the traces from this span can be associated
  # with one another.
  nonce = random.randrange(1000000000)
  # Call `span` on all the global `TraceProvider`s and run it up until `yield`.
  span_generators = []
  new_span_yields: SpanYields = []
  for tp, parent_span_yield in zip(_global_tracing_providers,
                                   _current_span_yields()):
    new_span_gen = tp.span(scope, sub_scope, nonce, parent_span_yield, fn_args,
                           fn_kwargs, trace_opts)
    new_span_yield = next(new_span_gen)
    span_generators.append(new_span_gen)
    new_span_yields.append(new_span_yield)
  # Set the values yielded by the `span` calls above to be the current span
  # yields, and yield so that the function can be run to completion.
  with _with_span_yields(new_span_yields):
    result = yield None
  # Send the result of the function to all of the generators so that they can
  # complete.
  for span_gen in reversed(span_generators):
    try:
      span_gen.send(result)
    except StopIteration:
      pass
Taylor Cramer's avatar
Taylor Cramer 已提交
380

381

382
def propagate_trace_context_task_factory(loop, coro):
383
384
385
386
387
388
  """Creates a new task on `loop` to run `coro`, inheriting current spans."""
  child_task = asyncio.tasks.Task(coro, loop=loop)
  trace_span_yields = _current_span_yields()
  setattr(child_task, 'trace_span_yields', trace_span_yields)
  return child_task

389

390
391
392
def wrap_coroutine_in_current_trace_context(coro):
  """Wraps the coroutine in the currently active span."""
  trace_span_yields = _current_span_yields()
393

394
395
396
  async def _wrapped():
    with _with_span_yields(trace_span_yields):
      return await coro
397

398
  return _wrapped()
399
400


401
402
403
404
405
406
407
408
@contextlib.contextmanager
def wrap_rpc_in_trace_context():
  """Attempts to record the trace context into the enclosed RPC call."""
  with contextlib.ExitStack() as stack:
    for tp, parent_span_yield in zip(_global_tracing_providers,
                                     _current_span_yields()):
      stack.enter_context(tp.wrap_rpc(parent_span_yield))
    yield None
409
410


411
412
413
414
415
416
@contextlib.contextmanager
def with_trace_context_from_rpc():
  """Attempts to pick up the trace context from the receiving RPC call."""
  span_yields_from_rpc = [tp.receive_rpc() for tp in _global_tracing_providers]
  with _with_span_yields(span_yields_from_rpc):
    yield None
Taylor Cramer's avatar
Taylor Cramer 已提交
417

418
419
420

def add_tracing_provider(tracing_provider: TracingProvider):
  """Add to the global list of tracing providers."""
421
  py_typecheck.check_type(tracing_provider, TracingProvider)
422
423
424
425
426
427
428
429
430
431
432
  global _global_tracing_providers
  _global_tracing_providers.append(tracing_provider)


def set_tracing_providers(tracing_providers: List[TracingProvider]):
  """Set the global list of tracing providers, replacing any existing."""
  py_typecheck.check_type(tracing_providers, list)
  for tp in tracing_providers:
    py_typecheck.check_type(tp, TracingProvider)
  global _global_tracing_providers
  _global_tracing_providers = tracing_providers
433
434


435
436
def _func_to_class_and_method(fn) -> Tuple[str, str]:
  """Returns the names of the function's class and method."""
437
438
439
440
441
  split = fn.__qualname__.split('.')
  if len(split) >= 2:
    class_name = split[-2]
    method_name = split[-1]
  else:
442
    module_name = fn.__module__
443
444
445
    class_name = module_name.split('.')[-1]
    method_name = fn.__name__
  return class_name, method_name
446
447
448
449
450
451
452
453


@contextlib.contextmanager
def _null_context() -> Iterator[None]:
  # TODO(b/154533346)
  # This should move to `contextlib.nullcontext` once TFF's minimum
  # Python version moves up to 3.7,
  yield None