tools/gen-cpydiff.py: Improve stdout vs stderr interleaving.

In the syntax_space cpydiff, all the warnings were shown after the other output. This is because the output always showed all of stdout first and all of stdout second. By running Python in unbuffered mode and using `stderr=STDOUT`, the two streams are interleaved in exactly the order they're printed, so the SyntaxWarnings are interleaved with the other output. By using the `encoding=` argument of Popen, the need to explicitly convert to utf-8 is avoided. The encoding of the input also becomes utf-8 in this case, which all the test cases are (well, they're all ASCII, I think). As in `run-tests.py`, setting PYTHONIOENCODING ensures the Python interpreter's input and output are in utf-8, which is not always the case, especially on Windows systems. I spot-checked the generated doc pages and they all seemed to make sense still. Signed-off-by: Jeff Epler <jepler@gmail.com>
2025-07-21 21:11:12 +02:00 · 2025-05-09 21:26:26 +02:00
parent 2f97d1dd28
commit 605eda158d
1 changed files with 16 additions and 8 deletions
--- a/tools/gen-cpydiff.py
+++ b/tools/gen-cpydiff.py
@@ -45,6 +45,12 @@ else:
    CPYTHON3 = os.getenv("MICROPY_CPYTHON3", "python3")
    MICROPYTHON = os.getenv("MICROPY_MICROPYTHON", "../ports/unix/build-standard/micropython")

+# Set PYTHONIOENCODING so that CPython will use utf-8 on systems which set another encoding in the locale
+os.environ["PYTHONIOENCODING"] = "utf-8"
+
+# Set PYTHONUNBUFFERED so that CPython will interleave stdout & stderr without buffering
+os.environ["PYTHONUNBUFFERED"] = "a non-empty string"
+
 TESTPATH = "../tests/cpydiff"
 DOCPATH = "../docs/genrst"
 SRCDIR = "../docs/differences"
@@ -111,7 +117,7 @@ def run_tests(tests):
    results = []
    for test in tests:
        test_fullpath = os.path.join(TESTPATH, test.name)
-        with open(test_fullpath, "rb") as f:
+        with open(test_fullpath, "r") as f:
            input_py = f.read()

        process = subprocess.Popen(
@@ -119,20 +125,22 @@ def run_tests(tests):
            shell=True,
            stdout=subprocess.PIPE,
            stdin=subprocess.PIPE,
-            stderr=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            encoding="utf-8",
        )
-        output_cpy = [com.decode("utf8") for com in process.communicate(input_py)]
+        output_cpy = process.communicate(input_py)[0]

        process = subprocess.Popen(
            MICROPYTHON,
            shell=True,
            stdout=subprocess.PIPE,
            stdin=subprocess.PIPE,
-            stderr=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            encoding="utf-8",
        )
-        output_upy = [com.decode("utf8") for com in process.communicate(input_py)]
+        output_upy = process.communicate(input_py)[0]

-        if output_cpy[0] == output_upy[0] and output_cpy[1] == output_upy[1]:
+        if output_cpy == output_upy:
            print("Error: Test has same output in CPython vs MicroPython: " + test_fullpath)
            same_results = True
        else:
@@ -246,9 +254,9 @@ def gen_rst(results):
            rst.write("**Workaround:** " + output.workaround + "\n\n")

        rst.write("Sample code::\n\n" + indent(output.code, TAB) + "\n")
-        output_cpy = indent("".join(output.output_cpy[0:2]), TAB).rstrip()
+        output_cpy = indent(output.output_cpy, TAB).rstrip()
        output_cpy = ("::\n\n" if output_cpy != "" else "") + output_cpy
-        output_upy = indent("".join(output.output_upy[0:2]), TAB).rstrip()
+        output_upy = indent(output.output_upy, TAB).rstrip()
        output_upy = ("::\n\n" if output_upy != "" else "") + output_upy
        table = gen_table([["CPy output:", output_cpy], ["uPy output:", output_upy]])
        rst.write(table)