_elffile.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. """
  2. ELF file parser.
  3. This provides a class ``ELFFile`` that parses an ELF executable in a similar
  4. interface to ``ZipFile``. Only the read interface is implemented.
  5. Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
  6. ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
  7. """
  8. import enum
  9. import os
  10. import struct
  11. from typing import IO, Optional, Tuple
  12. class ELFInvalid(ValueError):
  13. pass
  14. class EIClass(enum.IntEnum):
  15. C32 = 1
  16. C64 = 2
  17. class EIData(enum.IntEnum):
  18. Lsb = 1
  19. Msb = 2
  20. class EMachine(enum.IntEnum):
  21. I386 = 3
  22. S390 = 22
  23. Arm = 40
  24. X8664 = 62
  25. AArc64 = 183
  26. class ELFFile:
  27. """
  28. Representation of an ELF executable.
  29. """
  30. def __init__(self, f: IO[bytes]) -> None:
  31. self._f = f
  32. try:
  33. ident = self._read("16B")
  34. except struct.error:
  35. raise ELFInvalid("unable to parse identification")
  36. magic = bytes(ident[:4])
  37. if magic != b"\x7fELF":
  38. raise ELFInvalid(f"invalid magic: {magic!r}")
  39. self.capacity = ident[4] # Format for program header (bitness).
  40. self.encoding = ident[5] # Data structure encoding (endianness).
  41. try:
  42. # e_fmt: Format for program header.
  43. # p_fmt: Format for section header.
  44. # p_idx: Indexes to find p_type, p_offset, and p_filesz.
  45. e_fmt, self._p_fmt, self._p_idx = {
  46. (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
  47. (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
  48. (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
  49. (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
  50. }[(self.capacity, self.encoding)]
  51. except KeyError:
  52. raise ELFInvalid(
  53. f"unrecognized capacity ({self.capacity}) or "
  54. f"encoding ({self.encoding})"
  55. )
  56. try:
  57. (
  58. _,
  59. self.machine, # Architecture type.
  60. _,
  61. _,
  62. self._e_phoff, # Offset of program header.
  63. _,
  64. self.flags, # Processor-specific flags.
  65. _,
  66. self._e_phentsize, # Size of section.
  67. self._e_phnum, # Number of sections.
  68. ) = self._read(e_fmt)
  69. except struct.error as e:
  70. raise ELFInvalid("unable to parse machine and section information") from e
  71. def _read(self, fmt: str) -> Tuple[int, ...]:
  72. return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
  73. @property
  74. def interpreter(self) -> Optional[str]:
  75. """
  76. The path recorded in the ``PT_INTERP`` section header.
  77. """
  78. for index in range(self._e_phnum):
  79. self._f.seek(self._e_phoff + self._e_phentsize * index)
  80. try:
  81. data = self._read(self._p_fmt)
  82. except struct.error:
  83. continue
  84. if data[self._p_idx[0]] != 3: # Not PT_INTERP.
  85. continue
  86. self._f.seek(data[self._p_idx[1]])
  87. return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
  88. return None