contrib: Parse ELF directly for symbol and security checks

Instead of the ever-messier text parsing of the output of the readelf
tool (which is clearly meant for human consumption not to be machine
parseable), parse the ELF binaries directly.

Add a small dependency-less ELF parser specific to the checks.

This is slightly more secure, too, because it removes potential
ambiguity due to misparsing and changes in the output format of `elfread`. It
also allows for stricter and more specific ELF format checks in the future.

This removes the build-time dependency for `readelf`.

It passes the test-security-check for me locally, though I haven't
checked on all platforms.
This commit is contained in:
Wladimir J. van der Laan
2020-11-20 09:15:44 +01:00
parent fdd068507d
commit 634f6ec4eb
6 changed files with 413 additions and 169 deletions

View File

@@ -11,10 +11,11 @@ Example usage:
find ../gitian-builder/build -type f -executable | xargs python3 contrib/devtools/symbol-check.py
'''
import subprocess
import re
import sys
import os
from typing import List, Optional, Tuple
from typing import List, Optional
import pixie
# Debian 8 (Jessie) EOL: 2020. https://wiki.debian.org/DebianReleases#Production_Releases
#
@@ -50,7 +51,6 @@ IGNORE_EXPORTS = {
'_edata', '_end', '__end__', '_init', '__bss_start', '__bss_start__', '_bss_end__', '__bss_end__', '_fini', '_IO_stdin_used', 'stdin', 'stdout', 'stderr',
'environ', '_environ', '__environ',
}
READELF_CMD = os.getenv('READELF', '/usr/bin/readelf')
CPPFILT_CMD = os.getenv('CPPFILT', '/usr/bin/c++filt')
OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump')
OTOOL_CMD = os.getenv('OTOOL', '/usr/bin/otool')
@@ -76,11 +76,11 @@ ELF_ALLOWED_LIBRARIES = {
'libdl.so.2' # programming interface to dynamic linker
}
ARCH_MIN_GLIBC_VER = {
'80386': (2,1),
'X86-64': (2,2,5),
'ARM': (2,4),
'AArch64':(2,17),
'RISC-V': (2,27)
pixie.EM_386: (2,1),
pixie.EM_X86_64: (2,2,5),
pixie.EM_ARM: (2,4),
pixie.EM_AARCH64:(2,17),
pixie.EM_RISCV: (2,27)
}
MACHO_ALLOWED_LIBRARIES = {
@@ -140,29 +140,6 @@ class CPPFilt(object):
self.proc.stdout.close()
self.proc.wait()
def read_symbols(executable, imports=True) -> List[Tuple[str, str, str]]:
'''
Parse an ELF executable and return a list of (symbol,version, arch) tuples
for dynamic, imported symbols.
'''
p = subprocess.Popen([READELF_CMD, '--dyn-syms', '-W', '-h', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
(stdout, stderr) = p.communicate()
if p.returncode:
raise IOError('Could not read symbols for {}: {}'.format(executable, stderr.strip()))
syms = []
for line in stdout.splitlines():
line = line.split()
if 'Machine:' in line:
arch = line[-1]
if len(line)>7 and re.match('[0-9]+:$', line[0]):
(sym, _, version) = line[7].partition('@')
is_import = line[6] == 'UND'
if version.startswith('@'):
version = version[1:]
if is_import == imports:
syms.append((sym, version, arch))
return syms
def check_version(max_versions, version, arch) -> bool:
if '_' in version:
(lib, _, ver) = version.rpartition('_')
@@ -174,36 +151,30 @@ def check_version(max_versions, version, arch) -> bool:
return False
return ver <= max_versions[lib] or lib == 'GLIBC' and ver <= ARCH_MIN_GLIBC_VER[arch]
def elf_read_libraries(filename) -> List[str]:
p = subprocess.Popen([READELF_CMD, '-d', '-W', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
(stdout, stderr) = p.communicate()
if p.returncode:
raise IOError('Error opening file')
libraries = []
for line in stdout.splitlines():
tokens = line.split()
if len(tokens)>2 and tokens[1] == '(NEEDED)':
match = re.match(r'^Shared library: \[(.*)\]$', ' '.join(tokens[2:]))
if match:
libraries.append(match.group(1))
else:
raise ValueError('Unparseable (NEEDED) specification')
return libraries
def check_imported_symbols(filename) -> bool:
elf = pixie.load(filename)
cppfilt = CPPFilt()
ok = True
for sym, version, arch in read_symbols(filename, True):
if version and not check_version(MAX_VERSIONS, version, arch):
for symbol in elf.dyn_symbols:
if not symbol.is_import:
continue
sym = symbol.name.decode()
version = symbol.version.decode() if symbol.version is not None else None
if version and not check_version(MAX_VERSIONS, version, elf.hdr.e_machine):
print('{}: symbol {} from unsupported version {}'.format(filename, cppfilt(sym), version))
ok = False
return ok
def check_exported_symbols(filename) -> bool:
elf = pixie.load(filename)
cppfilt = CPPFilt()
ok = True
for sym,version,arch in read_symbols(filename, False):
if arch == 'RISC-V' or sym in IGNORE_EXPORTS:
for symbol in elf.dyn_symbols:
if not symbol.is_export:
continue
sym = symbol.name.decode()
if elf.hdr.e_machine == pixie.EM_RISCV or sym in IGNORE_EXPORTS:
continue
print('{}: export of symbol {} not allowed'.format(filename, cppfilt(sym)))
ok = False
@@ -211,9 +182,11 @@ def check_exported_symbols(filename) -> bool:
def check_ELF_libraries(filename) -> bool:
ok = True
for library_name in elf_read_libraries(filename):
if library_name not in ELF_ALLOWED_LIBRARIES:
print('{}: NEEDED library {} is not allowed'.format(filename, library_name))
elf = pixie.load(filename)
for library_name in elf.query_dyn_tags(pixie.DT_NEEDED):
assert(isinstance(library_name, bytes))
if library_name.decode() not in ELF_ALLOWED_LIBRARIES:
print('{}: NEEDED library {} is not allowed'.format(filename, library_name.decode()))
ok = False
return ok