python · Fidget-Spinner · Apr 3, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
@@ -3390,6 +3390,154 @@ def testfunc(args):
         uops = get_opnames(ex)
         self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
 
+    def test_int_add_inplace_unique_lhs(self):
+        # a * b produces a unique compact int; adding c reuses it in place
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += a * b + c
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 10000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops)
+
+    def test_int_add_inplace_unique_rhs(self):
+        # a * b produces a unique compact int on the right side of +
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += c + a * b
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 10000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
+
+    def test_int_add_no_inplace_non_unique(self):
+        # Both operands of a + b are locals — neither is unique,
+        # so the first add uses the regular op. But total += (a+b)
+        # has a unique RHS (result of a+b), so it uses _INPLACE_RIGHT.
+        def testfunc(args):
+            a, b, n = args
+            total = 0
+            for _ in range(n):
+                total += a + b
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3000, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 5000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # a + b: both are locals, no inplace
+        self.assertIn("_BINARY_OP_ADD_INT", uops)
+        # total += result: result is unique RHS
+        self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
+        # No LHS inplace variant for the first add
+        self.assertNotIn("_BINARY_OP_ADD_INT_INPLACE", uops)
+
+    def test_int_add_inplace_small_int_result(self):
+        # When the result is a small int, the inplace path falls back
+        # to _PyCompactLong_Add. Verify correctness (no singleton corruption).
+        def testfunc(args):
+            a, b, n = args
+            total = 0
+            for _ in range(n):
+                total += a * b + 1  # a*b=6, +1=7, small int
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2, 3, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 7)
+        # Verify small int singletons are not corrupted
+        self.assertEqual(7, 3 + 4)
+
+    def test_int_subtract_inplace_unique_lhs(self):
+        # a * b produces a unique compact int; subtracting c reuses it
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += a * b - c
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3, 1000, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 5000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops)
+
+    def test_int_subtract_inplace_unique_rhs(self):
+        # a * b produces a unique compact int on the right of -
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += c - a * b
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3, 10000, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 4000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT", uops)
+
+    def test_int_multiply_inplace_unique_lhs(self):
+        # (a + b) produces a unique compact int; multiplying by c reuses it
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += (a + b) * c
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 20000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE", uops)
+
+    def test_int_multiply_inplace_unique_rhs(self):
+        # (a + b) produces a unique compact int on the right side of *
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0
+            for _ in range(n):
+                total += c * (a + b)
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 20000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT", uops)
+
+    def test_int_inplace_chain_propagation(self):
+        # a * b + c * d: both products are unique, the + reuses one;
+        # result of + is also unique for the subsequent +=
+        def testfunc(args):
+            a, b, c, d, n = args
+            total = 0
+            for _ in range(n):
+                total += a * b + c * d
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, 5, TIER2_THRESHOLD))
+        self.assertEqual(res, TIER2_THRESHOLD * 26000)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        inplace_add = (
+            "_BINARY_OP_ADD_INT_INPLACE" in uops
+            or "_BINARY_OP_ADD_INT_INPLACE_RIGHT" in uops
+        )
+        self.assertTrue(inplace_add,
+            "Expected an inplace add for unique intermediate results")
+
     def test_load_attr_instance_value(self):
         def testfunc(n):
             class C():

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-20-00-00.gh-issue-146306.C45609.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-20-00-00.gh-issue-146306.C45609.rst
@@ -0,0 +1,3 @@
+Optimize compact integer arithmetic in the JIT by mutating
+uniquely-referenced operands in place, avoiding allocation of a new int
+object. Speeds up the pyperformance ``spectral_norm`` benchmark by ~10%.
@@ -708,6 +708,63 @@ dummy_func(
         macro(BINARY_OP_SUBTRACT_INT) =
             _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT + _POP_TOP_INT + _POP_TOP_INT;
 
+        // Inplace compact int ops: mutate the uniquely-referenced operand
+        // when possible. The op handles decref of TARGET internally so
+        // the following _POP_TOP_INT becomes _POP_TOP_NOP. Tier 2 only.
+        tier2 op(_BINARY_OP_ADD_INT_INPLACE, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, left, +, _PyCompactLong_Add);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, left, -, _PyCompactLong_Subtract);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, left, *, _PyCompactLong_Multiply);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_ADD_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, right, +, _PyCompactLong_Add);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, right, -, _PyCompactLong_Subtract);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
+            INT_INPLACE_OP(left, right, right, *, _PyCompactLong_Multiply);
+            EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
+            res = _int_inplace_res;
+            l = left;
+            r = right;
+            INPUTS_DEAD();
+        }
+
         op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             EXIT_IF(!PyFloat_CheckExact(left_o));