diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 748309b54593a1..86a8acbc29b08f 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2181,12 +2181,37 @@ def test_family_member_needs_transform_only_when_shape_changes(self): output = self.generate_tables(input) self.assert_slot_map_lines( output, - "[OP_RAW] = {1, 1, {0}}", - "[OP_RAW_SPECIALIZED] = {1, 0, {0}}", + "[OP_RAW] = {1, 0, {0}}", + "[OP_RAW_SPECIALIZED] = {1, 1, {0}}", "[OP_TYPED] = {1, 0, {0}}", "[OP_TYPED_SPECIALIZED] = {1, 0, {0}}", ) + def test_record_transform_generated_from_recording_uop(self): + input = """ + tier2 op(_RECORD_TOS, (tos -- tos)) { + RECORD_VALUE(PyStackRef_AsPyObjectBorrow(tos)); + } + tier2 op(_RECORD_TOS_TYPE, (tos -- tos)) { + RECORD_VALUE(Py_TYPE(PyStackRef_AsPyObjectBorrow(tos))); + } + op(_DO_STUFF, (tos -- res)) { + res = tos; + } + macro(OP) = _RECORD_TOS + _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assertIn("_PyOpcode_RecordTransform_TOS_TYPE", output) + self.assertIn("tos = PyStackRef_FromPyObjectBorrow(recorded_value);", output) + self.assertIn( + "transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos));", + output, + ) + self.assertIn("return _PyOpcode_RecordTransform_TOS_TYPE(value);", output) + self.assertNotIn("record_trace_transform_to_type", output) + def test_family_member_maps_positional_recorders_to_family_slots(self): input = """ tier2 op(_RECORD_TOS, (sub -- sub)) { @@ -2227,8 +2252,8 @@ def test_family_member_maps_non_positional_recorders_by_stack_shape(self): output = self.generate_tables(input) self.assert_slot_map_lines( output, - "[OP] = {1, 1, {0}}", - "[OP_SPECIALIZED] = {1, 0, {0}}", + "[OP] = {1, 0, {0}}", + "[OP_SPECIALIZED] = {1, 1, {0}}", ) def test_family_head_records_union_of_member_recorders(self): @@ -2243,7 +2268,12 @@ def test_family_head_records_union_of_member_recorders(self): macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF; family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; """ + analysis = self.analyze_input(input) output = self.generate_tables(input) + self.assertEqual( + analysis.families["OP"].member_record_names, + ("_RECORD_TOS",), + ) self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output) self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output) self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}") @@ -2277,12 +2307,12 @@ def test_family_detects_base_and_specialized_recording_difference(self): ), ["_RECORD_TOS_TYPE"], ) - self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output) - self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output) + self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output) + self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output) self.assert_slot_map_lines( output, - "[OP] = {1, 1, {0}}", - "[OP_SPECIALIZED] = {1, 0, {0}}", + "[OP] = {1, 0, {0}}", + "[OP_SPECIALIZED] = {1, 1, {0}}", ) def test_family_head_falls_back_for_missing_member_slots(self): diff --git a/Python/optimizer.c b/Python/optimizer.c index 11658fca0da595..8e709da4bbb45e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -660,44 +660,6 @@ is_terminator(const _PyUOpInstruction *uop) ); } -static PyObject * -record_trace_transform_to_type(PyObject *value) -{ - PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value)); - Py_DECREF(value); - return tp; -} - -/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver. - * If it is a generator, return its function object; otherwise return NULL. - */ -static PyObject * -record_trace_transform_gen_func(PyObject *value) -{ - PyObject *func = NULL; - if (PyGen_Check(value)) { - _PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj; - if (!PyStackRef_IsNull(f)) { - func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f)); - } - } - Py_DECREF(value); - return func; -} - -/* _RECORD_BOUND_METHOD records the raw callable. - * Keep it only for bound methods; otherwise return NULL. - */ -static PyObject * -record_trace_transform_bound_method(PyObject *value) -{ - if (Py_TYPE(value) == &PyMethod_Type) { - return value; - } - Py_DECREF(value); - return NULL; -} - /* Returns 1 on success (added to trace), 0 on trace end. */ // gh-142543: inlining this function causes stack overflows diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h index 13996b30b9c03c..6384b35de168a7 100644 --- a/Python/record_functions.c.h +++ b/Python/record_functions.c.h @@ -258,18 +258,55 @@ const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = { [_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS, }; +static PyObject * +_PyOpcode_RecordTransform_NOS_TYPE(PyObject *recorded_value) +{ + PyObject *transformed_value = NULL; + _PyStackRef nos; + nos = PyStackRef_FromPyObjectBorrow(recorded_value); + transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(nos)); + Py_XINCREF(transformed_value); + Py_DECREF(recorded_value); + return transformed_value; +} + +static PyObject * +_PyOpcode_RecordTransform_TOS_TYPE(PyObject *recorded_value) +{ + PyObject *transformed_value = NULL; + _PyStackRef tos; + tos = PyStackRef_FromPyObjectBorrow(recorded_value); + transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos)); + Py_XINCREF(transformed_value); + Py_DECREF(recorded_value); + return transformed_value; +} + +static PyObject * +_PyOpcode_RecordTransform_BOUND_METHOD(PyObject *recorded_value) +{ + PyObject *transformed_value = NULL; + _PyStackRef callable; + callable = PyStackRef_FromPyObjectBorrow(recorded_value); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + if (Py_TYPE(callable_o) == &PyMethod_Type) { + transformed_value = (PyObject *)callable_o; + Py_XINCREF(transformed_value); + } + Py_DECREF(recorded_value); + return transformed_value; +} + PyObject * _PyOpcode_RecordTransformValue(int uop, PyObject *value) { switch (uop) { - case _RECORD_TOS_TYPE: case _RECORD_NOS_TYPE: - return record_trace_transform_to_type(value); - case _RECORD_NOS_GEN_FUNC: - case _RECORD_3OS_GEN_FUNC: - return record_trace_transform_gen_func(value); + return _PyOpcode_RecordTransform_NOS_TYPE(value); + case _RECORD_TOS_TYPE: + return _PyOpcode_RecordTransform_TOS_TYPE(value); case _RECORD_BOUND_METHOD: - return record_trace_transform_bound_method(value); + return _PyOpcode_RecordTransform_BOUND_METHOD(value); default: return value; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 414ca18be4654c..139f6c346d27ea 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -322,6 +322,17 @@ class Family: size: str members: list[Instruction] + @property + def member_record_names(self) -> tuple[str, ...]: + seen: set[str] = set() + names: list[str] = [] + for member in self.members: + for part in member.parts: + if part.properties.records_value and part.name not in seen: + seen.add(part.name) + names.append(part.name) + return tuple(names) + def dump(self, indent: str) -> None: print(indent, self.name, "= ", ", ".join([m.name for m in self.members])) diff --git a/Tools/cases_generator/record_function_generator.py b/Tools/cases_generator/record_function_generator.py index 6f518ffdcf2ac2..bb6980cbf24fe1 100644 --- a/Tools/cases_generator/record_function_generator.py +++ b/Tools/cases_generator/record_function_generator.py @@ -3,7 +3,9 @@ from analyzer import ( Analysis, + Family, Instruction, + Uop, analyze_files, CodeSection, ) @@ -19,7 +21,6 @@ from cwriter import CWriter from tier1_generator import write_uop, Emitter, declare_variable -from typing import TextIO from lexer import Token from stack import Stack, Storage @@ -28,26 +29,18 @@ # Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h. MAX_RECORDED_VALUES = 3 -# Map `_RECORD_*` uops to the helper that converts a raw family-recorded -# value to the form the specialized member consumes. -_RECORD_TRANSFORM_HELPERS: dict[str, str] = { - "_RECORD_TOS_TYPE": "record_trace_transform_to_type", - "_RECORD_NOS_TYPE": "record_trace_transform_to_type", - "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func", - "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func", - "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method", -} - # Recorder uops whose slot kind differs from the leading word of their name. _RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = { "_RECORD_BOUND_METHOD": "CALLABLE", } -class RecorderEmitter(Emitter): - def __init__(self, out: CWriter): +class RecordValueEmitter(Emitter): + def __init__(self, out: CWriter, target: str, incref: str): super().__init__(out, {}) self._replacers["RECORD_VALUE"] = self.record_value + self.target = target + self.incref = incref def record_value( self, @@ -57,13 +50,13 @@ def record_value( storage: Storage, inst: Instruction | None, ) -> bool: - lparen = next(tkn_iter) + next(tkn_iter) self.out.start_line() - self.emit("*recorded_value = (PyObject *)") + self.emit(f"{self.target} = (PyObject *)") emit_to(self.out, tkn_iter, "RPAREN") next(tkn_iter) # Semi colon self.emit(";\n") - self.emit("Py_INCREF(*recorded_value);\n") + self.emit(f"{self.incref}({self.target});\n") return True @@ -80,36 +73,35 @@ def get_instruction_record_names(inst: Instruction) -> list[str]: def get_family_record_names( - family_head: Instruction, - family_members: list[Instruction], + family: Family, instruction_records: dict[str, list[str]], record_slot_keys: dict[str, str], ) -> list[str]: - member_records = [instruction_records[m.name] for m in family_members] - all_member_names = {n for names in member_records for n in names} + family_record_names = set(family.member_record_names) + family_record_names.update(instruction_records[family.name]) records: list[str] = [] slot_index: dict[str, int] = {} def add(name: str) -> None: kind = record_slot_keys[name] - # Prefer the raw recorder if any member uses it; otherwise the given form. + # Prefer the raw recorder if any family instruction uses it. raw = f"_RECORD_{kind}" - source = raw if raw in all_member_names else name + source = raw if raw in family_record_names else name existing = slot_index.get(kind) if existing is None: slot_index[kind] = len(records) records.append(source) elif records[existing] != source: raise ValueError( - f"Family {family_head.name} has incompatible recorders for " + f"Family {family.name} has incompatible recorders for " f"slot {kind}: {records[existing]} and {source}" ) - for names in member_records: - for name in names: + for member in family.members: + for name in instruction_records[member.name]: add(name) # Family head supplies any slots no member exercises. - for name in instruction_records[family_head.name]: + for name in instruction_records[family.name]: if record_slot_keys[name] not in slot_index: slot_index[record_slot_keys[name]] = len(records) records.append(name) @@ -121,10 +113,11 @@ def get_record_consumer_layout( source_records: list[str], own_records: list[str], record_slot_keys: dict[str, str], -) -> tuple[list[int], int]: +) -> tuple[list[int], int, list[str]]: used = [False] * len(source_records) slot_map: list[int] = [] transform_mask = 0 + transform_names: list[str] = [] for i, own in enumerate(own_records): own_kind = record_slot_keys[own] for j, src in enumerate(source_records): @@ -133,13 +126,43 @@ def get_record_consumer_layout( slot_map.append(j) if src != own: transform_mask |= 1 << i + if own not in transform_names: + transform_names.append(own) break else: raise ValueError( f"Instruction {inst_name} has no compatible family slot for " f"{own} in {source_records}" ) - return slot_map, transform_mask + return slot_map, transform_mask, transform_names + + +def get_record_transform_input(uop: Uop) -> str: + inputs = [var for var in uop.stack.inputs if var.used] + if len(inputs) != 1 or inputs[0].is_array(): + raise ValueError( + f"Recorder transform for {uop.name} needs exactly one scalar input" + ) + return inputs[0].name + + +def generate_record_transform_function(uop: Uop, out: CWriter) -> None: + input_name = get_record_transform_input(uop) + out.emit("static PyObject *\n") + out.emit(f"_PyOpcode_RecordTransform{uop.name[7:]}(PyObject *recorded_value)\n") + out.emit("{\n") + out.emit("PyObject *transformed_value = NULL;\n") + for var in uop.stack.inputs: + if var.used: + declare_variable(var, out) + out.emit(f"{input_name} = PyStackRef_FromPyObjectBorrow(recorded_value);\n") + emitter = RecordValueEmitter(out, "transformed_value", "Py_XINCREF") + emitter.emit_tokens(uop, Storage(Stack(), [], [], 0, False), None, False) + out.start_line() + out.emit("Py_DECREF(recorded_value);\n") + out.emit("return transformed_value;\n") + out.emit("}\n\n") + def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: CWriter) -> None: write_header(__file__, filenames, out.out) @@ -151,7 +174,7 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C """ ) args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, PyObject **recorded_value" - emitter = RecorderEmitter(out) + emitter = RecordValueEmitter(out, "*recorded_value", "Py_INCREF") nop = analysis.instructions["NOP"] for uop in analysis.uops.values(): if not uop.properties.records_value: @@ -179,8 +202,7 @@ def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: record_slot_keys = {name: get_record_slot_kind(name) for name in record_uop_names} family_record_table = { family.name: get_family_record_names( - analysis.instructions[family.name], - family.members, + family, instruction_records, record_slot_keys, ) @@ -190,12 +212,12 @@ def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: record_table: dict[str, list[str]] = {} record_consumer_table: dict[str, tuple[list[int], int]] = {} record_function_indexes: dict[str, int] = {} + record_transform_names: list[str] = [] for inst in analysis.instructions.values(): own_records = instruction_records[inst.name] # TRACE_RECORD runs before execution, but specialization may rewrite - # the opcode before translation. Record the raw family shape (union - # of head + members) so any opcode in the family can be translated - # from the same recorded layout. + # the opcode before translation. Use the shared family recording shape + # so any opcode in the family can be translated from the same layout. family = inst.family or analysis.families.get(inst.name) records = family_record_table[family.name] if family is not None else own_records if not records: @@ -210,9 +232,13 @@ def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: if name not in record_function_indexes: record_function_indexes[name] = len(record_function_indexes) + 1 if own_records: - record_consumer_table[inst.name] = get_record_consumer_layout( + slots, mask, transform_names = get_record_consumer_layout( inst.name, records, own_records, record_slot_keys ) + record_consumer_table[inst.name] = (slots, mask) + for name in transform_names: + if name not in record_transform_names: + record_transform_names.append(name) for name, index in record_function_indexes.items(): out.emit(f"#define {name}_INDEX {index}\n") @@ -240,34 +266,29 @@ def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: for name in record_function_indexes: out.emit(f" [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n") out.emit("};\n") - generate_record_transform_dispatcher(record_uop_names, out) + out.emit("\n") + for name in record_transform_names: + generate_record_transform_function(analysis.uops[name], out) + generate_record_transform_dispatcher(record_transform_names, out) def generate_record_transform_dispatcher( - record_uop_names: list[str], out: CWriter + transform_names: list[str], out: CWriter ) -> None: """Emit a switch that converts a family-recorded value for a recorder uop. - Only `_RECORD_*` uops that need conversion get a case; the default - returns the input value unchanged. Helpers live in Python/optimizer.c. + Only `_RECORD_*` uops that need conversion get a case; the default returns + the input value unchanged. """ - cases: dict[str, list[str]] = {} - for record_name in record_uop_names: - helper = _RECORD_TRANSFORM_HELPERS.get(record_name) - if helper is None: - continue - cases.setdefault(helper, []).append(record_name) - out.emit("\n") out.emit( "PyObject *\n" "_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n" "{\n" ) out.emit(" switch (uop) {\n") - for helper, names in cases.items(): - for name in names: - out.emit(f" case {name}:\n") - out.emit(f" return {helper}(value);\n") + for name in transform_names: + out.emit(f" case {name}:\n") + out.emit(f" return _PyOpcode_RecordTransform{name[7:]}(value);\n") out.emit(" default:\n") out.emit(" return value;\n") out.emit(" }\n")