flake8 formatting

This commit is contained in:
JanLJL
2025-03-05 10:19:10 +01:00
parent 5cd6b2cf9d
commit 02716e7b41
8 changed files with 80 additions and 83 deletions

View File

@@ -11,7 +11,7 @@ from ruamel.yaml import YAML
from osaca.db_interface import import_benchmark_output, sanity_check
from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64, ParserX86, ParserX86ATT, ParserX86Intel
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT, ParserX86Intel
from osaca.semantics import (
INSTR_FLAGS,
ArchSemantics,
@@ -355,7 +355,7 @@ def inspect(args, output_file=sys.stdout):
(arch, syntax)
for arch in archs_to_try
for syntax in syntaxes_to_try
if (syntax != None) == (MachineModel.get_isa_for_arch(arch) == "x86")
if (syntax is not None) == (MachineModel.get_isa_for_arch(arch) == "x86")
]
# Parse file.

View File

@@ -1,8 +1,5 @@
#!/usr/bin/env python3
import string
import re
import pyparsing as pp
from osaca.parser import ParserX86
@@ -34,11 +31,11 @@ class ParserX86ATT(ParserX86):
InstructionForm(
mnemonic="mov",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
)
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
)
],
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
@@ -51,11 +48,11 @@ class ParserX86ATT(ParserX86):
InstructionForm(
mnemonic="mov",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
)
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
)
],
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python3
import pyparsing as pp
import re
import string
import unicodedata
from osaca.parser import ParserX86
@@ -25,6 +23,7 @@ NON_ASCII_PRINTABLE_CHARACTERS = "".join(
if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn")
)
# References:
# ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf.
# Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170.
@@ -146,16 +145,18 @@ class ParserX86Intel(ParserX86):
# A hack to help with comparison instruction: if the instruction is in the model, and has
# exactly two sources, swap its operands.
if (model and
not has_destination and
len(instruction_form.operands) == 2
if (
model
and not has_destination
and len(instruction_form.operands) == 2
and not isa_model.get_instruction(
mnemonic,
instruction_form.operands
) and not arch_model.get_instruction(
mnemonic,
instruction_form.operands
)):
)
):
instruction_form.operands.reverse()
# If the instruction has a well-known data type, append a suffix.
@@ -175,7 +176,6 @@ class ParserX86Intel(ParserX86):
instruction_form.mnemonic = suffixed_mnemonic
break
def construct_parser(self):
"""Create parser for x86 Intel ISA."""
# Numeric literal.
@@ -353,12 +353,15 @@ class ParserX86Intel(ParserX86):
(pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign")
+ integer_number | identifier
).setResultsName(self.immediate_id)
pre_displacement = pp.Group(integer_number + pp.Literal("+")
pre_displacement = pp.Group(
integer_number + pp.Literal("+")
).setResultsName(self.immediate_id)
indexed = pp.Group(
index_register.setResultsName("index")
+ pp.Optional(pp.Literal("*")
+ scale.setResultsName("scale"))
+ pp.Optional(
pp.Literal("*")
+ scale.setResultsName("scale")
)
).setResultsName("indexed")
register_expression = pp.Group(
pp.Literal("[")
@@ -370,7 +373,7 @@ class ParserX86Intel(ParserX86):
+ pp.Literal("+")
+ indexed).setResultsName("base_and_indexed")
^ indexed
).setResultsName("non_displacement")
).setResultsName("non_displacement")
+ pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement"))
+ pp.Literal("]")
).setResultsName("register_expression")
@@ -472,7 +475,7 @@ class ParserX86Intel(ParserX86):
pp.CaselessKeyword("ALIAS")
| pp.CaselessKeyword("ALIGN")
| pp.CaselessKeyword("ASSUME")
#| pp.CaselessKeyword("BYTE")
# | pp.CaselessKeyword("BYTE")
| pp.CaselessKeyword("CATSTR")
| pp.CaselessKeyword("COMM")
| pp.CaselessKeyword("COMMENT")
@@ -482,7 +485,7 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("DQ")
| pp.CaselessKeyword("DT")
| pp.CaselessKeyword("DW")
#| pp.CaselessKeyword("DWORD")
# | pp.CaselessKeyword("DWORD")
| pp.CaselessKeyword("ECHO")
| pp.CaselessKeyword("END")
| pp.CaselessKeyword("ENDP")
@@ -491,14 +494,14 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("EVEN")
| pp.CaselessKeyword("EXTRN")
| pp.CaselessKeyword("EXTERNDEF")
#| pp.CaselessKeyword("FWORD")
# | pp.CaselessKeyword("FWORD")
| pp.CaselessKeyword("GROUP")
| pp.CaselessKeyword("INCLUDE")
| pp.CaselessKeyword("INCLUDELIB")
| pp.CaselessKeyword("INSTR")
| pp.CaselessKeyword("INVOKE")
| pp.CaselessKeyword("LABEL")
#| pp.CaselessKeyword("MMWORD")
# | pp.CaselessKeyword("MMWORD")
| pp.CaselessKeyword("OPTION")
| pp.CaselessKeyword("ORG")
| pp.CaselessKeyword("PAGE")
@@ -507,27 +510,27 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("PROTO")
| pp.CaselessKeyword("PUBLIC")
| pp.CaselessKeyword("PUSHCONTEXT")
#| pp.CaselessKeyword("QWORD")
#| pp.CaselessKeyword("REAL10")
#| pp.CaselessKeyword("REAL4")
#| pp.CaselessKeyword("REAL8")
# | pp.CaselessKeyword("QWORD")
# | pp.CaselessKeyword("REAL10")
# | pp.CaselessKeyword("REAL4")
# | pp.CaselessKeyword("REAL8")
| pp.CaselessKeyword("RECORD")
#| pp.CaselessKeyword("SBYTE")
#| pp.CaselessKeyword("SDWORD")
# | pp.CaselessKeyword("SBYTE")
# | pp.CaselessKeyword("SDWORD")
| pp.CaselessKeyword("SEGMENT")
| pp.CaselessKeyword("SIZESTR")
| pp.CaselessKeyword("STRUCT")
| pp.CaselessKeyword("SUBSTR")
| pp.CaselessKeyword("SUBTITLE")
#| pp.CaselessKeyword("SWORD")
#| pp.CaselessKeyword("TBYTE")
# | pp.CaselessKeyword("SWORD")
# | pp.CaselessKeyword("TBYTE")
| pp.CaselessKeyword("TEXTEQU")
| pp.CaselessKeyword("TITLE")
| pp.CaselessKeyword("TYPEDEF")
| pp.CaselessKeyword("UNION")
#| pp.CaselessKeyword("WORD")
#| pp.CaselessKeyword("XMMWORD")
#| pp.CaselessKeyword("YMMWORD")
# | pp.CaselessKeyword("WORD")
# | pp.CaselessKeyword("XMMWORD")
# | pp.CaselessKeyword("YMMWORD")
)
self.directive = pp.Group(
pp.Optional(~directive_keywords + directive_identifier)

View File

@@ -1,7 +1,5 @@
#!/usr/bin/env python3
"""Semantics opbject responsible for architecture specific semantic operations"""
from dis import Instruction
import sys
import warnings
from itertools import chain

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python3
import copy
import os
import signal
import time
from itertools import chain
from multiprocessing import Manager, Process, cpu_count

View File

@@ -1,11 +1,8 @@
#!/usr/bin/env python3
from collections import OrderedDict
from enum import Enum
from functools import partial
from osaca.parser import get_parser
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.memory import MemoryOperand
@@ -13,6 +10,7 @@ from osaca.parser.register import RegisterOperand
COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"}
# State of marker matching.
# No: we have determined that the code doesn't match the marker.
# Partial: so far the code matches the marker, but we have not reached the end of the marker yet.
@@ -173,6 +171,7 @@ def get_marker(isa, syntax="ATT", comment=""):
return start_marker, end_marker
def match_line(parser, line, marker_line):
"""
Returns whether `line` matches `marker_line`.
@@ -198,6 +197,7 @@ def match_line(parser, line, marker_line):
else:
return Matching.No
def match_operands(line_operands, marker_line_operands):
if len(line_operands) != len(marker_line_operands):
return False
@@ -207,6 +207,7 @@ def match_operands(line_operands, marker_line_operands):
zip(line_operands, marker_line_operands)
)
def match_operand(line_operand, marker_line_operand):
if (
isinstance(line_operand, ImmediateOperand)
@@ -221,14 +222,15 @@ def match_operand(line_operand, marker_line_operand):
):
return True
if (
isinstance(line_operand, MemoryOperand)
and isinstance(marker_line_operand, MemoryOperand)
and match_operand(line_operand.base, marker_line_operand.base)
and match_operand(line_operand.offset, line_operand.offset)
):
isinstance(line_operand, MemoryOperand)
and isinstance(marker_line_operand, MemoryOperand)
and match_operand(line_operand.base, marker_line_operand.base)
and match_operand(line_operand.offset, line_operand.offset)
):
return True
return False
def match_parameters(parser, line_parameters, marker_line_parameters):
"""
Returns whether `line_parameters` matches `marker_line_parameters`.
@@ -238,13 +240,10 @@ def match_parameters(parser, line_parameters, marker_line_parameters):
:return: Matching. In case of partial match, `marker_line_parameters` is modified and should be
reused for matching the next line in the parsed assembly code.
"""
line_parameter_count = len(line_parameters)
marker_line_parameter_count = len(marker_line_parameters)
# The elements of `marker_line_parameters` are consumed as they are matched.
for line_parameter in line_parameters:
if not marker_line_parameters:
break;
break
marker_line_parameter = marker_line_parameters[0]
if not match_parameter(parser, line_parameter, marker_line_parameter):
return Matching.No
@@ -254,6 +253,7 @@ def match_parameters(parser, line_parameters, marker_line_parameters):
else:
return Matching.Full
def match_parameter(parser, line_parameter, marker_line_parameter):
if line_parameter.lower() == marker_line_parameter.lower():
return True

View File

@@ -6,8 +6,6 @@ Unit tests for x86 Intel assembly parser
import os
import unittest
from pyparsing import ParseException
from osaca.parser import ParserX86Intel, InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand
@@ -134,13 +132,13 @@ class TestParserX86Intel(unittest.TestCase):
self.assertEqual(parsed_4.mnemonic, "mov")
self.assertEqual(parsed_4.operands[0],
RegisterOperand(name="EAX"))
self.assertEqual(parsed_4.operands[1],
MemoryOperand(offset=ImmediateOperand(
identifier="cur_elements$",
value=104
),
base=RegisterOperand(name="RBP")))
self.assertEqual(
parsed_4.operands[1],
MemoryOperand(
offset=ImmediateOperand(identifier="cur_elements$", value=104),
base=RegisterOperand(name="RBP")
)
)
self.assertEqual(parsed_5.mnemonic, "mov")
self.assertEqual(parsed_5.operands[0],
MemoryOperand(offset=ImmediateOperand(value=24),
@@ -252,18 +250,21 @@ class TestParserX86Intel(unittest.TestCase):
self.assertEqual(parsed[0].line_number, 1)
# Check specifically that the values of the symbols defined by "=" were correctly
# propagated.
self.assertEqual(parsed[69],
InstructionForm(mnemonic="mov",
operands=[MemoryOperand(
base=RegisterOperand("RBP"),
offset=ImmediateOperand(
value=4,
identifier="r$1"
)
),
ImmediateOperand(value=0)],
line="\tmov\tDWORD PTR r$1[rbp], 0",
line_number=73))
self.assertEqual(
parsed[69],
InstructionForm(
mnemonic="mov",
operands=[
MemoryOperand(
base=RegisterOperand("RBP"),
offset=ImmediateOperand(value=4, identifier="r$1")
),
ImmediateOperand(value=0)
],
line="\tmov\tDWORD PTR r$1[rbp], 0",
line_number=73
)
)
# Check a few lines to make sure that we produced something reasonable.
self.assertEqual(parsed[60],
InstructionForm(mnemonic="mov",

View File

@@ -314,11 +314,11 @@ class TestSemanticTools(unittest.TestCase):
def test_src_dst_assignment_x86_intel(self):
for instruction_form in self.kernel_x86_intel:
with self.subTest(instruction_form=instruction_form):
if instruction_form.semantic_operands is not None:
self.assertTrue("source" in instruction_form.semantic_operands)
self.assertTrue("destination" in instruction_form.semantic_operands)
self.assertTrue("src_dst" in instruction_form.semantic_operands)
with self.subTest(instruction_form=instruction_form):
if instruction_form.semantic_operands is not None:
self.assertTrue("source" in instruction_form.semantic_operands)
self.assertTrue("destination" in instruction_form.semantic_operands)
self.assertTrue("src_dst" in instruction_form.semantic_operands)
def test_src_dst_assignment_AArch64(self):
for instruction_form in self.kernel_AArch64: