mirror of
https://github.com/micropython/micropython.git
synced 2025-07-21 04:51:12 +02:00
tools/mpy_ld.py: Resolve fixed-address symbols if requested.
This commit lets mpy_ld.py resolve symbols not only from the object files involved in the linking process, or from compiler-supplied static libraries, but also from a list of symbols referenced by an absolute address (usually provided by the system's ROM). This is needed for ESP8266 targets as some C stdlib functions are provided by the MCU's own ROM code to reduce the final code footprint, and therefore those functions' implementation was removed from the compiler's support libraries. This means that unless `LINK_RUNTIME` is set (which lets tooling look at more libraries to resolve symbols) the build process will fail as tooling is unaware of the ROM symbols' existence. With this change, fixed-address symbols can be exposed to the symbol resolution step when performing natmod linking. If there are symbols coming in from a fixed-address symbols list and internal code or external libraries, the fixed-address symbol address will take precedence in all cases. Although this is - in theory - also working for the whole range of ESP32 MCUs, testing is currently limited to Xtensa processors and the example natmods' makefiles only make use of this commit's changes for the ESP8266 target. Natmod builds can set the MPY_EXTERN_SYM_FILE variable pointing to a linkerscript file containing a series of symbols (weak or strong) at a fixed address; these symbols will then be used by the MicroPython linker when packaging the natmod. If a different natmod build method is used (eg. custom CMake scripts), `tools/mpy_ld.py` can now accept a command line parameter called `--externs` (or its short variant `-e`) that contains the path of a linkerscript file with the fixed-address symbols to use when performing the linking process. The linkerscript file parser can handle a very limited subset of binutils's linkerscript syntax, namely just block comments, strong symbols, and weak symbols. Each symbol must be in its own line for the parser to succeed, empty lines or comment blocks are skipped. For an example of what this parser was meant to handle, you can look at `ports/esp8266/boards/eagle.rom.addr.v6.ld` and follow its format. The natmod developer documentation is also updated to reflect the new command line argument accepted by `mpy_ld.py` and the use cases for the changes introduced by this commit. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
committed by
Damien George
parent
9174cffc47
commit
bf2005de9e
@@ -81,7 +81,14 @@ Linker limitation: the native module is not linked against the symbol table of t
|
||||
full MicroPython firmware. Rather, it is linked against an explicit table of exported
|
||||
symbols found in ``mp_fun_table`` (in ``py/nativeglue.h``), that is fixed at firmware
|
||||
build time. It is thus not possible to simply call some arbitrary HAL/OS/RTOS/system
|
||||
function, for example.
|
||||
function, for example, unless that resides at a fixed address. In that case, the path
|
||||
of a linkerscript containing a series of symbol names and their fixed address can be
|
||||
passed to ``mpy_ld.py`` via the ``--externs`` command line argument. That way symbols
|
||||
appearing in the linkerscript will take precedence over what is provided from object
|
||||
files, but at the moment the object files' implementation will still reside in the
|
||||
final MPY file. The linkerscript parser is limited in its capabilities, and is
|
||||
currently used only for parsing the ESP8266 port ROM symbols list (see
|
||||
``ports/esp8266/boards/eagle.rom.addr.v6.ld``).
|
||||
|
||||
New symbols can be added to the end of the table and the firmware rebuilt.
|
||||
The symbols also need to be added to ``tools/mpy_ld.py``'s ``fun_table`` dict in the
|
||||
|
@@ -172,6 +172,9 @@ endif
|
||||
endif
|
||||
MPY_LD_FLAGS += $(addprefix -l, $(LIBGCC_PATH) $(LIBM_PATH))
|
||||
endif
|
||||
ifneq ($(MPY_EXTERN_SYM_FILE),)
|
||||
MPY_LD_FLAGS += --externs "$(realpath $(MPY_EXTERN_SYM_FILE))"
|
||||
endif
|
||||
|
||||
CFLAGS += $(CFLAGS_EXTRA)
|
||||
|
||||
|
@@ -402,6 +402,7 @@ class LinkEnv:
|
||||
self.known_syms = {} # dict of symbols that are defined
|
||||
self.unresolved_syms = [] # list of unresolved symbols
|
||||
self.mpy_relocs = [] # list of relocations needed in the output .mpy file
|
||||
self.externs = {} # dict of externally-defined symbols
|
||||
|
||||
def check_arch(self, arch_name):
|
||||
if arch_name != self.arch.name:
|
||||
@@ -491,10 +492,14 @@ def populate_got(env):
|
||||
sym = got_entry.sym
|
||||
if hasattr(sym, "resolved"):
|
||||
sym = sym.resolved
|
||||
sec = sym.section
|
||||
addr = sym["st_value"]
|
||||
got_entry.sec_name = sec.name
|
||||
got_entry.link_addr += sec.addr + addr
|
||||
if sym.name in env.externs:
|
||||
got_entry.sec_name = ".external.fixed_addr"
|
||||
got_entry.link_addr = env.externs[sym.name]
|
||||
else:
|
||||
sec = sym.section
|
||||
addr = sym["st_value"]
|
||||
got_entry.sec_name = sec.name
|
||||
got_entry.link_addr += sec.addr + addr
|
||||
|
||||
# Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined
|
||||
got_list = sorted(
|
||||
@@ -520,6 +525,9 @@ def populate_got(env):
|
||||
dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size
|
||||
elif got_entry.sec_name == ".external.mp_fun_table":
|
||||
dest = got_entry.sym.mp_fun_table_offset
|
||||
elif got_entry.sec_name == ".external.fixed_addr":
|
||||
# Fixed-address symbols should not be relocated.
|
||||
continue
|
||||
elif got_entry.sec_name.startswith(".text"):
|
||||
dest = ".text"
|
||||
elif got_entry.sec_name.startswith(".rodata"):
|
||||
@@ -1207,6 +1215,9 @@ def link_objects(env, native_qstr_vals_len):
|
||||
sym.section = env.obj_table_section
|
||||
elif sym.name in env.known_syms:
|
||||
sym.resolved = env.known_syms[sym.name]
|
||||
elif sym.name in env.externs:
|
||||
# Fixed-address symbols do not need pre-processing.
|
||||
continue
|
||||
else:
|
||||
if sym.name in fun_table:
|
||||
sym.section = mp_fun_table_sec
|
||||
@@ -1214,6 +1225,15 @@ def link_objects(env, native_qstr_vals_len):
|
||||
else:
|
||||
undef_errors.append("{}: undefined symbol: {}".format(sym.filename, sym.name))
|
||||
|
||||
for sym in env.externs:
|
||||
if sym in env.known_syms:
|
||||
log(
|
||||
LOG_LEVEL_1,
|
||||
"Symbol {} is a fixed-address symbol at {:08x} and is also provided from an object file".format(
|
||||
sym, env.externs[sym]
|
||||
),
|
||||
)
|
||||
|
||||
if undef_errors:
|
||||
raise LinkError("\n".join(undef_errors))
|
||||
|
||||
@@ -1456,6 +1476,9 @@ def do_link(args):
|
||||
log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals))
|
||||
env = LinkEnv(args.arch)
|
||||
try:
|
||||
if args.externs:
|
||||
env.externs = parse_linkerscript(args.externs)
|
||||
|
||||
# Load object files
|
||||
for fn in args.files:
|
||||
with open(fn, "rb") as f:
|
||||
@@ -1484,6 +1507,50 @@ def do_link(args):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parse_linkerscript(source):
|
||||
# This extracts fixed-address symbol lists from linkerscripts, only parsing
|
||||
# a small subset of all possible directives. Right now the only
|
||||
# linkerscript file this is really tested against is the ESP8266's builtin
|
||||
# ROM functions list ($SDK/ld/eagle.rom.addr.v6.ld).
|
||||
#
|
||||
# The parser should be able to handle symbol entries inside ESP-IDF's ROM
|
||||
# symbol lists for the ESP32 range of MCUs as well (see *.ld files in
|
||||
# $SDK/components/esp_rom/<name>/).
|
||||
|
||||
symbols = {}
|
||||
|
||||
LINE_REGEX = re.compile(
|
||||
r'^(?P<weak>PROVIDE\()?' # optional weak marker start
|
||||
r'(?P<symbol>[a-zA-Z_]\w*)' # symbol name
|
||||
r'=0x(?P<address>[\da-fA-F]{1,8})*' # symbol address
|
||||
r'(?(weak)\));$', # optional weak marker end and line terminator
|
||||
re.ASCII,
|
||||
)
|
||||
|
||||
inside_comment = False
|
||||
for line in (line.strip() for line in source.readlines()):
|
||||
if line.startswith('/*') and not inside_comment:
|
||||
if not line.endswith('*/'):
|
||||
inside_comment = True
|
||||
continue
|
||||
if inside_comment:
|
||||
if line.endswith('*/'):
|
||||
inside_comment = False
|
||||
continue
|
||||
if line.startswith('//'):
|
||||
continue
|
||||
match = LINE_REGEX.match(''.join(line.split()))
|
||||
if not match:
|
||||
continue
|
||||
tokens = match.groupdict()
|
||||
symbol = tokens['symbol']
|
||||
address = int(tokens['address'], 16)
|
||||
if symbol in symbols:
|
||||
raise ValueError(f"Symbol {symbol} already defined")
|
||||
symbols[symbol] = address
|
||||
return symbols
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
@@ -1500,6 +1567,13 @@ def main():
|
||||
cmd_parser.add_argument(
|
||||
"--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)"
|
||||
)
|
||||
cmd_parser.add_argument(
|
||||
"--externs",
|
||||
"-e",
|
||||
type=argparse.FileType("rt"),
|
||||
default=None,
|
||||
help="linkerscript providing fixed-address symbols to augment symbol resolution",
|
||||
)
|
||||
cmd_parser.add_argument("files", nargs="+", help="input files")
|
||||
args = cmd_parser.parse_args()
|
||||
|
||||
|
Reference in New Issue
Block a user