Skip to content
Merged
2,536 changes: 1,280 additions & 1,256 deletions Include/internal/pycore_uop_ids.h

Large diffs are not rendered by default.

114 changes: 114 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

148 changes: 148 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,6 +3390,154 @@ def testfunc(args):
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_int_add_inplace_unique_lhs(self):
# a * b produces a unique compact int; adding c reuses it in place
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += a * b + c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 10000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops)

def test_int_add_inplace_unique_rhs(self):
# a * b produces a unique compact int on the right side of +
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c + a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 10000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)

def test_int_add_no_inplace_non_unique(self):
# Both operands of a + b are locals — neither is unique,
# so the first add uses the regular op. But total += (a+b)
# has a unique RHS (result of a+b), so it uses _INPLACE_RIGHT.
def testfunc(args):
a, b, n = args
total = 0
for _ in range(n):
total += a + b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 5000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# a + b: both are locals, no inplace
self.assertIn("_BINARY_OP_ADD_INT", uops)
# total += result: result is unique RHS
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
# No LHS inplace variant for the first add
self.assertNotIn("_BINARY_OP_ADD_INT_INPLACE", uops)

def test_int_add_inplace_small_int_result(self):
# When the result is a small int, the inplace path falls back
# to _PyCompactLong_Add. Verify correctness (no singleton corruption).
def testfunc(args):
a, b, n = args
total = 0
for _ in range(n):
total += a * b + 1 # a*b=6, +1=7, small int
return total

res, ex = self._run_with_optimizer(testfunc, (2, 3, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 7)
# Verify small int singletons are not corrupted
self.assertEqual(7, 3 + 4)

def test_int_subtract_inplace_unique_lhs(self):
# a * b produces a unique compact int; subtracting c reuses it
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += a * b - c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 1000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 5000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops)

def test_int_subtract_inplace_unique_rhs(self):
# a * b produces a unique compact int on the right of -
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c - a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 10000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 4000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT", uops)

def test_int_multiply_inplace_unique_lhs(self):
# (a + b) produces a unique compact int; multiplying by c reuses it
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += (a + b) * c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 20000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE", uops)

def test_int_multiply_inplace_unique_rhs(self):
# (a + b) produces a unique compact int on the right side of *
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c * (a + b)
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 20000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT", uops)

def test_int_inplace_chain_propagation(self):
# a * b + c * d: both products are unique, the + reuses one;
# result of + is also unique for the subsequent +=
def testfunc(args):
a, b, c, d, n = args
total = 0
for _ in range(n):
total += a * b + c * d
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, 5, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 26000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
inplace_add = (
"_BINARY_OP_ADD_INT_INPLACE" in uops
or "_BINARY_OP_ADD_INT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace_add,
"Expected an inplace add for unique intermediate results")

def test_load_attr_instance_value(self):
def testfunc(n):
class C():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimize compact integer arithmetic in the JIT by mutating
uniquely-referenced operands in place, avoiding allocation of a new int
object. Speeds up the pyperformance ``spectral_norm`` benchmark by ~10%.
57 changes: 57 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,63 @@ dummy_func(
macro(BINARY_OP_SUBTRACT_INT) =
_GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT + _POP_TOP_INT + _POP_TOP_INT;

// Inplace compact int ops: mutate the uniquely-referenced operand
// when possible. The op handles decref of TARGET internally so
// the following _POP_TOP_INT becomes _POP_TOP_NOP. Tier 2 only.
tier2 op(_BINARY_OP_ADD_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, +, _PyCompactLong_Add);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be ERROR_IF instead? The only way this can be null after the compactlong_add operation is that it fails?

Same for below.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The non-inplace _BINARY_OP_ADD_INT uses EXIT_IF as well. The _PyCompactLong_Add can error for two reasons: OOM and the result of the add being non-compact (e.g. requiring more than one digit). I think for the latter we want the EXIT_IF?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yeah that's fine then ok. thanks!

res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, -, _PyCompactLong_Subtract);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, *, _PyCompactLong_Multiply);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_ADD_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, +, _PyCompactLong_Add);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, -, _PyCompactLong_Subtract);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, *, _PyCompactLong_Multiply);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
EXIT_IF(!PyFloat_CheckExact(left_o));
Expand Down
Loading
Loading