Skip to content
37 changes: 26 additions & 11 deletions Objects/genobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "pycore_gc.h" // _PyGC_CLEAR_FINALIZED()
#include "pycore_modsupport.h" // _PyArg_CheckPositional()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt
#include "pycore_opcode_utils.h" // RESUME_AFTER_YIELD_FROM
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_*
#include "pycore_pyerrors.h" // _PyErr_ClearExcState()
Expand Down Expand Up @@ -327,23 +328,37 @@ gen_close_iter(PyObject *yf)
}

static inline bool
is_resume(_Py_CODEUNIT *instr)
is_resume(_PyInterpreterFrame *frame, uint8_t *oparg_p)
{
uint8_t code = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.code);
return (
code == RESUME ||
code == RESUME_CHECK ||
code == INSTRUMENTED_RESUME
);
PyCodeObject *code = _PyFrame_GetCode(frame);
int offset = frame->instr_ptr - _PyCode_CODE(code);
Comment thread
brandtbucher marked this conversation as resolved.
Outdated
uint8_t opcode = _Py_GetBaseOpcode(code, offset);
uint8_t oparg = frame->instr_ptr->op.arg;
#ifdef _Py_TIER2
if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor = _Py_GetExecutor(code, sizeof(_Py_CODEUNIT) * offset);
opcode = _PyOpcode_Deopt[executor->vm_data.opcode];
oparg = executor->vm_data.oparg;
Py_DECREF(executor);
}
#endif
if (opcode == RESUME) {
*oparg_p = oparg;
return true;
}
return false;
}

PyObject *
_PyGen_yf(PyGenObject *gen)
{
if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) {
_PyInterpreterFrame *frame = &gen->gi_iframe;
assert(is_resume(frame->instr_ptr));
Comment thread
brandtbucher marked this conversation as resolved.
Comment thread
brandtbucher marked this conversation as resolved.
assert((frame->instr_ptr->op.arg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM);
#ifndef NDEBUG
uint8_t oparg;
assert(is_resume(frame, &oparg));
assert((oparg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM);
#endif
return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame));
}
return NULL;
Expand Down Expand Up @@ -372,11 +387,11 @@ gen_close(PyGenObject *gen, PyObject *args)
Py_DECREF(yf);
}
_PyInterpreterFrame *frame = &gen->gi_iframe;
if (is_resume(frame->instr_ptr)) {
uint8_t oparg;
if (is_resume(frame, &oparg)) {
/* We can safely ignore the outermost try block
* as it is automatically generated to handle
* StopIteration. */
int oparg = frame->instr_ptr->op.arg;
if (oparg & RESUME_OPARG_DEPTH1_MASK) {
// RESUME after YIELD_VALUE and exception depth is 1
assert((oparg & RESUME_OPARG_LOCATION_MASK) != RESUME_AT_FUNC_START);
Expand Down
64 changes: 31 additions & 33 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,7 @@ add_to_trace(
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
OPT_STAT_INC(trace_stack_overflow); \
trace_length = 0; \
goto done; \
return 0; \
} \
assert(func == NULL || func->func_code == (PyObject *)code); \
trace_stack[trace_stack_depth].func = func; \
Expand Down Expand Up @@ -550,6 +549,7 @@ translate_bytecode_to_trace(
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
bool jump_seen = false;

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
Expand All @@ -568,7 +568,6 @@ translate_bytecode_to_trace(
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
uint32_t target = 0;

top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
// Need space for _DEOPT
Expand All @@ -577,6 +576,13 @@ translate_bytecode_to_trace(
uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;

if (!progress_needed && instr == initial_instr) {
// We have looped around to the start:
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
goto done;
}

DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);

if (opcode == ENTER_EXECUTOR) {
Expand All @@ -603,30 +609,21 @@ translate_bytecode_to_trace(
/* Special case the first instruction,
* so that we can guarantee forward progress */
if (progress_needed) {
progress_needed = false;
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
initial_instr = instr;
if (opcode == JUMP_BACKWARD) {
ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
}
continue;
}
else {
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_EXIT(opcode));
assert(!OPCODE_HAS_DEOPT(opcode));
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_EXIT(opcode));
assert(!OPCODE_HAS_DEOPT(opcode));
}

if (OPCODE_HAS_EXIT(opcode)) {
// Make space for exit code
// Make space for side exit and final _EXIT_TRACE:
RESERVE_RAW(2, "_EXIT_TRACE");
max_length--;
}
if (OPCODE_HAS_ERROR(opcode)) {
// Make space for error code
// Make space for error stub and final _EXIT_TRACE:
RESERVE_RAW(2, "_ERROR_POP_N");
max_length--;
}
switch (opcode) {
Expand Down Expand Up @@ -672,19 +669,18 @@ translate_bytecode_to_trace(
}

case JUMP_BACKWARD:
ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
_Py_FALLTHROUGH;
case JUMP_BACKWARD_NO_INTERRUPT:
{
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
if (target == initial_instr) {
/* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
else {
instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg;
if (jump_seen) {
OPT_STAT_INC(inner_loop);
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
goto done;
}
goto done;
jump_seen = true;
goto top;
}

case JUMP_FORWARD:
Expand Down Expand Up @@ -892,23 +888,25 @@ translate_bytecode_to_trace(
assert(instr->op.code == POP_TOP);
instr++;
}
top:
// Jump here after _PUSH_FRAME or likely branches.
progress_needed = false;
} // End for (;;)

done:
while (trace_stack_depth > 0) {
TRACE_STACK_POP();
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
if (progress_needed || trace_length < 5) {
// Skip short traces where we can't even translate a single instruction:
if (progress_needed) {
OPT_STAT_INC(trace_too_short);
DPRINTF(2,
"No trace for %s (%s:%d) at byte offset %d (%s)\n",
"No trace for %s (%s:%d) at byte offset %d (no progress)\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
progress_needed ? "no progress" : "too short");
2 * INSTR_IP(initial_instr, code));
return 0;
}
if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
Expand Down