diff options
Diffstat (limited to 'src/plugins/elf_extractor.c')
-rw-r--r-- | src/plugins/elf_extractor.c | 802 |
1 files changed, 802 insertions, 0 deletions
diff --git a/src/plugins/elf_extractor.c b/src/plugins/elf_extractor.c new file mode 100644 index 0000000..379f615 --- /dev/null +++ b/src/plugins/elf_extractor.c @@ -0,0 +1,802 @@ +/* + This file is part of libextractor. + Copyright (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. + */ +/** + * @file plugins/gif_extractor.c + * @brief plugin to support GIF files + * @author Christian Grothoff + */ +#include "platform.h" +#include "extractor.h" +#include "pack.h" +#include <stdint.h> + + +typedef uint32_t Elf32_Addr; +typedef uint16_t Elf32_Half; +typedef uint32_t Elf32_Off; +typedef int32_t Elf32_Sword; +typedef uint32_t Elf32_Word; + +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; + +/* first 4 bytes of the ELF header */ +static char elfMagic[] = { 0x7f, 'E', 'L', 'F' }; + +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_NIDENT 16 + +typedef struct +{ + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; /* offset of the section header table */ + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phensize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; /* size of each entry in SH table */ + Elf32_Half e_shnum; /* how many entries in section header table */ + Elf32_Half e_shstrndx; /* section header's sh_name member is index into this string table! */ +} Elf32_Ehdr; + +/* elf-header minus e_ident */ +#define ELF_HEADER_SIZE sizeof (Elf32_Ehdr) + +#define ELF_HEADER_FIELDS(p) \ + & (p)->e_type, \ + &(p)->e_machine, \ + &(p)->e_version, \ + &(p)->e_entry, \ + &(p)->e_phoff, \ + &(p)->e_shoff, \ + &(p)->e_flags, \ + &(p)->e_ehsize, \ + &(p)->e_phensize, \ + &(p)->e_phnum, \ + &(p)->e_shentsize, \ + &(p)->e_shnum, \ + &(p)->e_shstrndx +static char *ELF_HEADER_SPECS[] = { + "hhwwwwwhhhhhh", + "HHWWWWWHHHHHH", +}; + +typedef struct +{ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phensize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* elf-header minus e_ident */ +#define ELF64_HEADER_SIZE sizeof (Elf64_Ehdr) + +#define ELF64_HEADER_FIELDS(p) \ + & (p)->e_type, \ + &(p)->e_machine, \ + &(p)->e_version, \ + &(p)->e_entry, \ + &(p)->e_phoff, \ + &(p)->e_shoff, \ + &(p)->e_flags, \ + &(p)->e_ehsize, \ + &(p)->e_phensize, \ + &(p)->e_phnum, \ + &(p)->e_shentsize, \ + &(p)->e_shnum, \ + &(p)->e_shstrndx +static char *ELF64_HEADER_SPECS[] = { + "hhwxxxwhhhhhh", + "HHWXXXWHHHHHH", +}; + + +typedef struct +{ + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; /* where loaded */ + Elf32_Off sh_offset; /* where in image (! sh_type==SHT_NOBITS) */ + Elf32_Word sh_size; /* section size in bytes */ + Elf32_Word sh_link; /* for symbol table: section header index of the associated string table! */ + Elf32_Word sh_info; /* "one greater than the symbol table index of the last local symbol _STB_LOCAL_" */ + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; +#define ELF_SECTION_SIZE 40 + +#define ELF_SECTION_FIELDS(p) \ + & (p)->sh_name, \ + &(p)->sh_type, \ + &(p)->sh_flags, \ + &(p)->sh_addr, \ + &(p)->sh_offset, \ + &(p)->sh_size, \ + &(p)->sh_link, \ + &(p)->sh_info, \ + &(p)->sh_addralign, \ + &(p)->sh_entsize +static char *ELF_SECTION_SPECS[] = { + "wwwwwwwwww", + "WWWWWWWWWW", +}; + +typedef struct +{ + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; +#define ELF_PDHR_SIZE 32 +#define ELF_PHDR_FIELDS(p) \ + & (p)->p_type, \ + &(p)->p_offset, \ + &(p)->p_vaddr, \ + &(p)->p_paddr, \ + &(p)->p_filesz, \ + &(p)->p_memsz, \ + &(p)->p_flags, \ + &(p)->p_align +static char *ELF_PHDR_SPECS[] = { + "wwwwwwww", + "WWWWWWWW", +}; + +typedef struct +{ + Elf32_Sword d_tag; + union + { + Elf32_Word d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; +#define ELF_DYN_SIZE 8 +#define ELF_DYN_FIELDS(p) \ + & (p)->d_tag, \ + &(p)->d_un +static char *ELF_DYN_SPECS[] = { + "ww", + "WW", +}; + +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +#define EM_NONE 0 +#define EM_M32 1 +#define EM_SPARC 2 +#define EM_386 3 +#define EM_68K 4 +#define EM_88K 5 +#define EM_860 7 +#define EM_MIPS 8 +#define EM_PPC 20 +#define EM_PPC64 21 +#define EM_S390 22 +#define EM_ARM 40 +#define EM_ALPHA 41 +#define EM_IA_64 50 +#define EM_X86_64 62 +#define EM_CUDA 190 + +#define ELFOSABI_NETBSD 2 +#define ELFOSABI_LINUX 3 +#define ELFOSABI_IRIX 8 +#define ELFOSABI_FREEBSD 9 +#define ELFOSABI_OPENBSD 12 + +#define EV_NONE 0 +#define EV_CURRENT 1 + +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +/* string table! */ +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +/* dynamic linking info! */ +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff + +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_MASKPROC 0xf000000 + +#define DT_NULL 0 +/* name of a needed library, offset into table + recorded in DT_STRTAB entry */ +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +/* address of the string table from where symbol + names, library names, etc for this DT come from */ +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_SYMENT 7 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +/* size of the string-table in bytes */ +#define DT_STRSZ 10 +/* fixme 11 */ +#define DT_INIT 12 +#define DT_FINI 13 +/* string-table offset giving the name of the shared object */ +#define DT_SONAME 14 +/* string-table offset of a null-terminated library search path */ +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 + + +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff + + +#define ELFCLASSNONE 0 +#define ELFCLASS32 1 +#define ELFCLASS64 2 + +#define ELFDATANONE 0 +/* little endian */ +#define ELFDATA2LSB 1 +/* big endian */ +#define ELFDATA2MSB 2 + +/** + * @param ei_data ELFDATA2LSB or ELFDATA2MSB + * @return 1 if we need to convert, 0 if not + */ +static int +getByteorder (char ei_data) +{ + if (ei_data == ELFDATA2LSB) + { +#if __BYTE_ORDER == __BIG_ENDIAN + return 1; +#else + return 0; +#endif + } + else + { +#if __BYTE_ORDER == __BIG_ENDIAN + return 0; +#else + return 1; +#endif + } +} + + +/** + * + * @return 0 on success, -1 on error + */ +static int +getSectionHdr (struct EXTRACTOR_ExtractContext *ec, + unsigned int bo, + const Elf32_Ehdr *ehdr, + Elf32_Half idx, + Elf32_Shdr *ret) +{ + ssize_t size; + uint64_t max; + int64_t off; + void *data; + + if (ehdr->e_shnum <= idx) + return -1; + max = ec->get_size (ec->cls); + if (ehdr->e_shoff + ehdr->e_shentsize * idx + sizeof (*ret) > max) + return -1; + off = ec->seek (ec->cls, + ehdr->e_shoff + ehdr->e_shentsize * idx, + SEEK_SET); + if (-1 == off) + return -1; + size = ec->read (ec->cls, + &data, + sizeof (*ret)); + if (size < sizeof (*ret)) + return -1; + EXTRACTOR_common_cat_unpack (data, + ELF_SECTION_SPECS[bo], + ELF_SECTION_FIELDS (ret)); + return 0; +} + + +/** + * + * @return 0 on success, -1 on error + */ +static int +getDynTag (struct EXTRACTOR_ExtractContext *ec, + unsigned int bo, + const Elf32_Ehdr *ehdr, + Elf32_Off off, + Elf32_Word osize, + unsigned int idx, + Elf32_Dyn *ret) +{ + ssize_t size; + uint64_t max; + int64_t soff; + void *data; + + max = ec->get_size (ec->cls); + if ( (off + osize > max) || + ((idx + 1) * ELF_DYN_SIZE > osize) ) + return -1; + if (off + idx * ELF_DYN_SIZE + sizeof (*ret) > max) + return -1; + soff = ec->seek (ec->cls, + off + idx * ELF_DYN_SIZE, + SEEK_SET); + if (-1 == soff) + return -1; + size = ec->read (ec->cls, + &data, + sizeof (*ret)); + if (size < sizeof (*ret)) + return -1; + EXTRACTOR_common_cat_unpack (data, + ELF_DYN_SPECS[bo], + ELF_DYN_FIELDS (ret)); + return 0; +} + + +/** + * + * @return 0 on success, -1 on error + */ +static int +getProgramHdr (struct EXTRACTOR_ExtractContext *ec, + unsigned int bo, + const Elf32_Ehdr *ehdr, + Elf32_Half idx, + Elf32_Phdr *ret) +{ + void *data; + ssize_t size; + int64_t off; + + if (ehdr->e_phnum <= idx) + return -1; + off = ec->seek (ec->cls, + ehdr->e_phoff + ehdr->e_phensize * idx, + SEEK_SET); + if (-1 == off) + return -1; + size = ec->read (ec->cls, + &data, + sizeof (*ret)); + if (size < sizeof (*ret)) + return -1; + EXTRACTOR_common_cat_unpack (data, + ELF_PHDR_SPECS[bo], + ELF_PHDR_FIELDS (ret)); + return 0; +} + + +/** + * @return the string (offset into data, do NOT free), NULL on error + */ +static char * +readStringTable (struct EXTRACTOR_ExtractContext *ec, + unsigned int bo, + const Elf32_Ehdr *ehdr, + Elf32_Half strTableOffset, + Elf32_Word sh_name) +{ + Elf32_Shdr shrd; + char *data; + ssize_t size; + int64_t off; + + if (-1 == getSectionHdr (ec, + bo, + ehdr, + strTableOffset, + &shrd)) + return NULL; + if ((shrd.sh_type != SHT_STRTAB) || + (shrd.sh_size <= sh_name) ) + return NULL; + off = ec->seek (ec->cls, + shrd.sh_offset, + SEEK_SET); + if (-1 == off) + return NULL; + size = ec->read (ec->cls, + (void **) &data, + shrd.sh_size); + if (size < shrd.sh_size) + return NULL; + if (data[shrd.sh_size - 1] != '\0') + return NULL; + return strdup (&data[sh_name]); +} + + +#define ADD(s, type) do { \ + if (0!=ec->proc (ec->cls, "elf", type, \ + EXTRACTOR_METAFORMAT_UTF8, "text/plain", \ + s, strlen (s) + 1)) \ + { \ + return; \ + } \ +} while (0) + + +/** + * Main entry method for the 'application/x-executable' extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_elf_extract_method (struct EXTRACTOR_ExtractContext *ec) +{ + Elf32_Ehdr ehdr; + Elf64_Ehdr ehdr64; + int ret; + unsigned int bo; + char *data; + ssize_t size; + uint64_t max; + size_t want; + + max = ec->get_size (ec->cls); + want = sizeof (ehdr); + if (sizeof (ehdr64) > want) + want = sizeof (ehdr64); + want += EI_NIDENT; + if (max < want) + return; + size = ec->read (ec->cls, + (void**) &data, + max); + if (size < EI_NIDENT) + return; + if (0 != memcmp (data, + elfMagic, + sizeof (elfMagic))) + return; /* not an elf */ + switch (data[EI_CLASS]) + { + case ELFCLASS32: + if (size < sizeof (Elf32_Ehdr) + EI_NIDENT) + return; + bo = getByteorder (data[EI_DATA]); + EXTRACTOR_common_cat_unpack (&data[EI_NIDENT], + ELF_HEADER_SPECS[bo], + ELF_HEADER_FIELDS (&ehdr)); + if (ehdr.e_shoff + ehdr.e_shentsize * ehdr.e_shnum > max) + return; /* invalid offsets... */ + if (ehdr.e_shentsize < ELF_SECTION_SIZE) + return; /* huh? */ + if (ehdr.e_phoff + ehdr.e_phensize * ehdr.e_phnum > max) + return; + ret = 0; + bo = getByteorder (data[EI_CLASS]); + break; + case ELFCLASS64: + if (size < sizeof (Elf64_Ehdr) + EI_NIDENT) + return; + bo = getByteorder (data[EI_DATA]); + EXTRACTOR_common_cat_unpack (&data[EI_NIDENT], + ELF64_HEADER_SPECS[bo], + ELF64_HEADER_FIELDS (&ehdr64)); + if (ehdr64.e_shoff + ((uint32_t) ehdr64.e_shentsize * ehdr64.e_shnum) > + max) + return; /* invalid offsets... */ + if (ehdr64.e_phoff + ((uint32_t) ehdr64.e_phensize * ehdr64.e_phnum) > + max) + return; + bo = getByteorder (data[EI_CLASS]); + ret = 1; + break; + default: + return; + } + + ADD ("application/x-executable", + EXTRACTOR_METATYPE_MIMETYPE); + switch ( ((unsigned char*) data)[EI_OSABI]) + { + case ELFOSABI_LINUX: + ADD ("Linux", + EXTRACTOR_METATYPE_TARGET_OS); + break; + case ELFOSABI_FREEBSD: + ADD ("FreeBSD", + EXTRACTOR_METATYPE_TARGET_OS); + break; + case ELFOSABI_NETBSD: + ADD ("NetBSD", + EXTRACTOR_METATYPE_TARGET_OS); + break; + case ELFOSABI_OPENBSD: + ADD ("OpenBSD", + EXTRACTOR_METATYPE_TARGET_OS); + break; + case ELFOSABI_IRIX: + ADD ("IRIX", + EXTRACTOR_METATYPE_TARGET_OS); + break; + default: + break; + } + switch ( (0 == ret) ? ehdr.e_type : ehdr64.e_type) + { + case ET_REL: + ADD ("Relocatable file", + EXTRACTOR_METATYPE_RESOURCE_TYPE); + break; + case ET_EXEC: + ADD ("Executable file", + EXTRACTOR_METATYPE_RESOURCE_TYPE); + break; + case ET_DYN: + ADD ("Shared object file", + EXTRACTOR_METATYPE_RESOURCE_TYPE); + break; + case ET_CORE: + ADD ("Core file", + EXTRACTOR_METATYPE_RESOURCE_TYPE); + break; + default: + break; /* unknown */ + } + switch ( (0 == ret) ? ehdr.e_machine : ehdr64.e_machine) + { + case EM_M32: + ADD ("M32", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_386: + ADD ("i386", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_68K: + ADD ("68K", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_88K: + ADD ("88K", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_SPARC: + ADD ("Sparc", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_860: + ADD ("960", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_MIPS: + ADD ("MIPS", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_PPC: + ADD ("PPC", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_PPC64: + ADD ("PPC64", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_S390: + ADD ("S390", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_ARM: + ADD ("ARM", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_ALPHA: + ADD ("ALPHA", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_IA_64: + ADD ("IA-64", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_X86_64: + ADD ("x86_64", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + case EM_CUDA: + ADD ("NVIDIA CUDA", + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + break; + default: + break; /* oops */ + } + + if (0 != ret) + return; /* FIXME: full support for 64-bit ELF... */ + for (Elf32_Half idx = 0; idx < ehdr.e_phnum; idx++) + { + Elf32_Phdr phdr; + + if (0 != getProgramHdr (ec, + bo, + &ehdr, + idx, + &phdr)) + return; + if (phdr.p_type == PT_DYNAMIC) + { + unsigned int dc = phdr.p_filesz / ELF_DYN_SIZE; + Elf32_Addr stringPtr; + Elf32_Half stringIdx; + Elf32_Half six; + + stringPtr = 0; + for (unsigned int id = 0; id < dc; id++) + { + Elf32_Dyn dyn; + + if (0 != getDynTag (ec, + bo, + &ehdr, + phdr.p_offset, + phdr.p_filesz, + id, + &dyn)) + return; + if (DT_STRTAB == dyn.d_tag) + { + stringPtr = dyn.d_un.d_ptr; + break; + } + } + if (0 == stringPtr) + return; + for (six = 0; six < ehdr.e_shnum; six++) + { + Elf32_Shdr sec; + + if (-1 == getSectionHdr (ec, + bo, + &ehdr, + six, + &sec)) + return; + if ( (sec.sh_addr == stringPtr) && + (sec.sh_type == SHT_STRTAB) ) + { + stringIdx = six; + break; + } + } + if (six == ehdr.e_shnum) + return; /* stringIdx not found */ + + for (unsigned int id = 0; id < dc; id++) + { + Elf32_Dyn dyn; + + if (0 != getDynTag (ec, + bo, + &ehdr, + phdr.p_offset, + phdr.p_filesz, + id, + &dyn)) + return; + switch (dyn.d_tag) + { + case DT_RPATH: + { + char *rpath; + + rpath = readStringTable (ec, + bo, + &ehdr, + stringIdx, + dyn.d_un.d_val); + /* "source" of the dependencies: path + to dynamic libraries */ + if (NULL != rpath) + { + ADD (rpath, + EXTRACTOR_METATYPE_LIBRARY_SEARCH_PATH); + free (rpath); + } + break; + } + case DT_NEEDED: + { + char *needed; + + needed = readStringTable (ec, + bo, + &ehdr, + stringIdx, + dyn.d_un.d_val); + if (NULL != needed) + { + ADD (needed, + EXTRACTOR_METATYPE_LIBRARY_DEPENDENCY); + free (needed); + } + break; + } + } + } + } + } +} + + +/* end of gif_extractor.c */ |