[Git][ghc/ghc][wip/andreask/linker_fix] rts: LoadArchive/LoadObj - refactor object verification.

Andreas Klebinger pushed to branch wip/andreask/linker_fix at Glasgow Haskell Compiler / GHC Commits: ae4264e7 by Andreas Klebinger at 2025-08-08T16:39:09+02:00 rts: LoadArchive/LoadObj - refactor object verification. If we have a potential image produced by mkOc in hand we now always call `verifyAndInitOc` to check for valid object code. This in turn calls out to the platform specific verification code. This means we no longer rely on the adhoc checking of magic numbers in loadArchive, instead using the verifyImage_* functions to recognize object code. On windows this adds loadArchive support for AArch64/32bit COFF bigobj files. - - - - - 6 changed files: - rts/Linker.c - rts/LinkerInternals.h - rts/linker/LoadArchive.c - rts/linker/MachO.c - rts/linker/MachO.h - rts/linker/PEi386.c Changes: ===================================== rts/Linker.c ===================================== @@ -1415,7 +1415,7 @@ preloadObjectFile (pathchar *path) // We calculate the correct alignment from the header before // reading the file, and then we misalign image on purpose so // that the actual sections end up aligned again. - misalignment = machoGetMisalignment(f); + machoGetMisalignment(f, &misalignment); image = stgMallocBytes(fileSize + misalignment, "loadObj(image)"); image += misalignment; @@ -1441,14 +1441,7 @@ preloadObjectFile (pathchar *path) /* FIXME (AP): =mapped= parameter unconditionally set to true */ oc = mkOc(STATIC_OBJECT, path, image, fileSize, true, NULL, misalignment); -#if defined(OBJFORMAT_MACHO) - if (ocVerifyImage_MachO( oc )) - ocInit_MachO( oc ); -#endif -#if defined(OBJFORMAT_ELF) - if(ocVerifyImage_ELF( oc )) - ocInit_ELF( oc ); -#endif + verifyAndInitOc(oc); return oc; } @@ -1505,27 +1498,44 @@ HsInt loadObj (pathchar *path) return r; } +// Call the relevant VeriffyImage_* and ocInit_* functions. +// Return 1 on success. +HsInt verifyAndInitOc (ObjectCode* oc) +{ + int r; + + IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); + + /* verify the in-memory image */ +#if defined(OBJFORMAT_ELF) + r = ocVerifyImage_ELF ( oc ); + if(r) { + ocInit_ELF( oc ); + } +#elif defined(OBJFORMAT_PEi386) + r = ocVerifyImage_PEi386 ( oc ); +#elif defined(OBJFORMAT_MACHO) + r = ocVerifyImage_MachO ( oc ); + if(r) { + ocInit_MachO( oc ); + } +#else + barf("loadObj: no verify method"); +#endif + if (!r) { + IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n")); + return r; + } + return 1; +} + +// Precondition: oc already verified. HsInt loadOc (ObjectCode* oc) { int r; IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); - /* verify the in-memory image */ -# if defined(OBJFORMAT_ELF) - r = ocVerifyImage_ELF ( oc ); -# elif defined(OBJFORMAT_PEi386) - r = ocVerifyImage_PEi386 ( oc ); -# elif defined(OBJFORMAT_MACHO) - r = ocVerifyImage_MachO ( oc ); -# else - barf("loadObj: no verify method"); -# endif - if (!r) { - IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n")); - return r; - } - /* Note [loadOc orderings] ~~~~~~~~~~~~~~~~~~~~~~~ The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO ===================================== rts/LinkerInternals.h ===================================== @@ -485,12 +485,19 @@ HsInt loadArchive_ (pathchar *path); HsInt isAlreadyLoaded( pathchar *path ); OStatus getObjectLoadStatus_ (pathchar *path); ObjectCode *lookupObjectByPath(pathchar *path); + +/* Verify an objects is an a format that can be loaded and initialize the oc struct if required. */ +HsInt verifyAndInitOc( ObjectCode *oc ); + +//Expects the oc to be verified already. HsInt loadOc( ObjectCode* oc ); ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize, bool mapped, pathchar *archiveMemberName, int misalignment ); + + void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections); void freeSegments(ObjectCode *oc); ===================================== rts/linker/LoadArchive.c ===================================== @@ -110,56 +110,50 @@ static bool loadFatArchive(char input[static 20], FILE* f, pathchar* path) } #endif -enum ObjectFileFormat { - NotObject, - COFFAmd64, - COFFI386, - COFFAArch64, - ELF, - MachO32, - MachO64, -}; - -static enum ObjectFileFormat identifyObjectFile_(char* buf, size_t sz) -{ - if (sz > 2 && ((uint16_t*)buf)[0] == 0x8664) { - return COFFAmd64; - } - if (sz > 2 && ((uint16_t*)buf)[0] == 0x014c) { - return COFFI386; - } - if (sz > 2 && ((uint16_t*)buf)[0] == 0xaa64) { - return COFFAArch64; - } - if (sz > 4 && memcmp(buf, "\x7f" "ELF", 4) == 0) { - return ELF; - } - if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedface) { - return MachO32; - } - if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedfacf) { - return MachO64; - } - // BigObj COFF files ... - if (sz > 8 && ((uint64_t*)buf)[0] == 0x86640002ffff0000) { - return COFFAmd64; - } - if (sz > 8 && ((uint64_t*)buf)[0] == 0x014c0002ffff0000) { - return COFFI386; - } - if (sz > 8 && ((uint64_t*)buf)[0] == 0xaa640002ffff0000) { - return COFFAArch64; - } - return NotObject; -} - -static enum ObjectFileFormat identifyObjectFile(FILE *f) -{ - char buf[32]; - ssize_t sz = fread(buf, 1, 32, f); - CHECK(fseek(f, -sz, SEEK_CUR) == 0); - return identifyObjectFile_(buf, sz); -} +// enum ObjectFileFormat { +// NotObject, +// COFFAmd64, +// COFFI386, +// COFFAArch64, +// ELF, +// MachO32, +// MachO64, +// }; + +// static enum ObjectFileFormat identifyObjectFile_(char* buf, size_t sz) +// { +// if (sz > 2 && ((uint16_t*)buf)[0] == 0x8664) { +// return COFFAmd64; +// } +// if (sz > 2 && ((uint16_t*)buf)[0] == 0x014c) { +// return COFFI386; +// } +// if (sz > 2 && ((uint16_t*)buf)[0] == 0xaa64) { +// return COFFAArch64; +// } +// if (sz > 4 && memcmp(buf, "\x7f" "ELF", 4) == 0) { +// return ELF; +// } +// if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedface) { +// return MachO32; +// } +// if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedfacf) { +// return MachO64; +// } +// // BigObj COFF files ... +// if (sz > 8 && ((uint64_t*)buf)[0] == 0x86640002ffff0000) { +// return COFFAmd64; +// } +// return NotObject; +// } + +// static enum ObjectFileFormat identifyObjectFile(FILE *f) +// { +// char buf[32]; +// ssize_t sz = fread(buf, 1, 32, f); +// CHECK(fseek(f, -sz, SEEK_CUR) == 0); +// return identifyObjectFile_(buf, sz); +// } static bool readThinArchiveMember(int n, int memberSize, pathchar* path, char* fileName, char* image) @@ -553,9 +547,11 @@ HsInt loadArchive_ (pathchar *path) } DEBUG_LOG("Found member file `%s'\n", fileName); - bool is_symbol_table = strcmp("", fileName) == 0; - enum ObjectFileFormat object_fmt = is_symbol_table ? NotObject : identifyObjectFile(f); + +///////////////////////////////////////////////// +// We found the member file. Load it into memory. +///////////////////////////////////////////////// #if defined(OBJFORMAT_PEi386) /* @@ -575,17 +571,22 @@ HsInt loadArchive_ (pathchar *path) #endif // windows DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize); - DEBUG_LOG("\tisObject = %d\n", object_fmt); + // DEBUG_LOG("\tisObject = %d\n", object_fmt); - if ((!is_symbol_table && isThin) || object_fmt != NotObject) { - DEBUG_LOG("Member is an object file...loading...\n"); + //if ((!is_symbol_table && isThin) || object_fmt != NotObject) + if (!is_symbol_table && !isImportLib) + { + DEBUG_LOG("Member might be an object file...loading...\n"); #if defined(darwin_HOST_OS) || defined(ios_HOST_OS) if (RTS_LINKER_USE_MMAP) image = mmapAnonForLinker(memberSize); else { /* See loadObj() */ - misalignment = machoGetMisalignment(f); + if(!machoGetMisalignment(f, &misalignment)) + DEBUG_LOG("Failed to load member as mach-o file. Skipping.\n"); + continue; + } image = stgMallocBytes(memberSize + misalignment, "loadArchive(image)"); image += misalignment; @@ -616,19 +617,23 @@ HsInt loadArchive_ (pathchar *path) pathprintf(archiveMemberName, size+1, WSTR("%" PATH_FMT "(#%d:%.*s)"), path, memberIdx, (int)thisFileNameSize, fileName); +/////////////////////////////////////////////////////////////// +// Verfiy the object file is valid, and load it if appropriate. +/////////////////////////////////////////////////////////////// + + // Prepare headers, doesn't load any data yet. ObjectCode *oc = mkOc(STATIC_OBJECT, path, image, memberSize, false, archiveMemberName, misalignment); -#if defined(OBJFORMAT_MACHO) - ASSERT(object_fmt == MachO32 || object_fmt == MachO64); - ocInit_MachO( oc ); -#endif -#if defined(OBJFORMAT_ELF) - ASSERT(object_fmt == ELF); - ocInit_ELF( oc ); -#endif - stgFree(archiveMemberName); + if(!verifyAndInitOc( oc )) + { + freeObjectCode( oc ); + IF_DEBUG(linker, ocDebugBelch(oc, "Faild to verify ... skipping.")); + continue; + }; + + if (0 == loadOc(oc)) { stgFree(fileName); fclose(f); ===================================== rts/linker/MachO.c ===================================== @@ -1725,31 +1725,41 @@ ocRunFini_MachO ( ObjectCode *oc ) /* * Figure out by how much to shift the entire Mach-O file in memory * when loading so that its single segment ends up 16-byte-aligned + * + * Returns 1 and sets misalignment_out to the detected misalignment i + * f we successfully parsed the file. + * + * If we can't parse the file we set misalignment_out to 0 and return 0 */ int -machoGetMisalignment( FILE * f ) +machoGetMisalignment( FILE * f, int* misalignment_out ) { MachOHeader header; int misalignment; + *misalignment_out = 0; { size_t n = fread(&header, sizeof(header), 1, f); if (n != 1) { - barf("machoGetMisalignment: can't read the Mach-O header"); + debugBelch("machoGetMisalignment: can't read the Mach-O header"); + return 0; } } fseek(f, -sizeof(header), SEEK_CUR); if(header.magic != MH_MAGIC_64) { - barf("Bad magic. Expected: %08x, got: %08x.", + debugBelch("Bad magic. Expected: %08x, got: %08x.", MH_MAGIC_64, header.magic); + return 0; } misalignment = (header.sizeofcmds + sizeof(header)) & 0xF; IF_DEBUG(linker, debugBelch("mach-o misalignment %d\n", misalignment)); - return misalignment ? (16 - misalignment) : 0; + misalignment ? (16 - misalignment) : 0; + *misalignment_out = misalignment + return 1; } #endif /* darwin_HOST_OS || ios_HOST_OS */ ===================================== rts/linker/MachO.h ===================================== @@ -13,7 +13,7 @@ int ocGetNames_MachO ( ObjectCode* oc ); int ocResolve_MachO ( ObjectCode* oc ); int ocRunInit_MachO ( ObjectCode* oc ); int ocRunFini_MachO ( ObjectCode* oc ); -int machoGetMisalignment ( FILE * ); +int machoGetMisalignment ( FILE *, int* ); int ocAllocateExtras_MachO ( ObjectCode* oc ); SectionKind getSectionKind_MachO ( MachOSection *macho ); ===================================== rts/linker/PEi386.c ===================================== @@ -775,6 +775,10 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ) *************/ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) { + if((size_t) oc->fileSize < sizeof(IMAGE_FILE_HEADER)) { + errorBelch ("Supposed COFF file smaller than minimum header size.\n"); + return NULL; + } COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc)); COFF_HEADER_INFO* info @@ -808,6 +812,11 @@ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) stgFree (info); info = NULL; errorBelch ("Unknown COFF %d type in getHeaderInfo.", coff_type); + if(oc->archiveMemberName) { + errorBelch ("Archive %" PATH_FMT ".\n", oc->archiveMemberName); + } + errorBelch ("In %" PATH_FMT ".\n", oc->fileName); + } break; } View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ae4264e70a96b8503c9997f060c30446... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ae4264e70a96b8503c9997f060c30446... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Andreas Klebinger (@AndreasK)