[Git][ghc/ghc][wip/andreask/linker_fix] rts: LoadArchive/LoadObj - refactor object verification.

Andreas Klebinger pushed to branch wip/andreask/linker_fix at Glasgow Haskell Compiler / GHC Commits: 1a10f18c by Andreas Klebinger at 2025-08-11T22:34:04+02:00 rts: LoadArchive/LoadObj - refactor object verification. Fixes #26231. We now consistently call `verifyAndInitOc` to check for valid object code. Allowing us to replace the somewhat adhoc magic number checking in loadArchive with the platform specific verification logic. On windows this adds loadArchive support for AArch64/32bit COFF bigobj files. - - - - - 6 changed files: - rts/Linker.c - rts/LinkerInternals.h - rts/linker/LoadArchive.c - rts/linker/MachO.c - rts/linker/MachO.h - rts/linker/PEi386.c Changes: ===================================== rts/Linker.c ===================================== @@ -1415,7 +1415,9 @@ preloadObjectFile (pathchar *path) // We calculate the correct alignment from the header before // reading the file, and then we misalign image on purpose so // that the actual sections end up aligned again. - misalignment = machoGetMisalignment(f); + machoGetMisalignment(f, &misalignment); + //machoGetMisalignment might fail to parse the header, but in that + //case so will verifyAndInitOc so we leave cleanup to after verifyAndInitOc. image = stgMallocBytes(fileSize + misalignment, "loadObj(image)"); image += misalignment; @@ -1441,14 +1443,11 @@ preloadObjectFile (pathchar *path) /* FIXME (AP): =mapped= parameter unconditionally set to true */ oc = mkOc(STATIC_OBJECT, path, image, fileSize, true, NULL, misalignment); -#if defined(OBJFORMAT_MACHO) - if (ocVerifyImage_MachO( oc )) - ocInit_MachO( oc ); -#endif -#if defined(OBJFORMAT_ELF) - if(ocVerifyImage_ELF( oc )) - ocInit_ELF( oc ); -#endif + if (!verifyAndInitOc(oc)) { + freeObjectCode(oc); + debugBelch("loadObj: Failed to verify oc.\n"); + return NULL; + }; return oc; } @@ -1505,27 +1504,44 @@ HsInt loadObj (pathchar *path) return r; } +// Call the relevant VeriffyImage_* and ocInit_* functions. +// Return 1 on success. +HsInt verifyAndInitOc (ObjectCode* oc) +{ + int r; + + IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); + + /* verify the in-memory image */ +#if defined(OBJFORMAT_ELF) + r = ocVerifyImage_ELF ( oc ); + if(r) { + ocInit_ELF( oc ); + } +#elif defined(OBJFORMAT_PEi386) + r = ocVerifyImage_PEi386 ( oc ); +#elif defined(OBJFORMAT_MACHO) + r = ocVerifyImage_MachO ( oc ); + if(r) { + ocInit_MachO( oc ); + } +#else + barf("loadObj: no verify method"); +#endif + if (!r) { + IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n")); + return r; + } + return 1; +} + +// Precondition: oc already verified. HsInt loadOc (ObjectCode* oc) { int r; IF_DEBUG(linker, ocDebugBelch(oc, "start\n")); - /* verify the in-memory image */ -# if defined(OBJFORMAT_ELF) - r = ocVerifyImage_ELF ( oc ); -# elif defined(OBJFORMAT_PEi386) - r = ocVerifyImage_PEi386 ( oc ); -# elif defined(OBJFORMAT_MACHO) - r = ocVerifyImage_MachO ( oc ); -# else - barf("loadObj: no verify method"); -# endif - if (!r) { - IF_DEBUG(linker, ocDebugBelch(oc, "ocVerifyImage_* failed\n")); - return r; - } - /* Note [loadOc orderings] ~~~~~~~~~~~~~~~~~~~~~~~ The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO ===================================== rts/LinkerInternals.h ===================================== @@ -485,12 +485,18 @@ HsInt loadArchive_ (pathchar *path); HsInt isAlreadyLoaded( pathchar *path ); OStatus getObjectLoadStatus_ (pathchar *path); ObjectCode *lookupObjectByPath(pathchar *path); + +/* Verify an objects is an a format that can be loaded and initialize the oc struct if required. */ +HsInt verifyAndInitOc( ObjectCode *oc ); + +//Expects the oc to be verified already. HsInt loadOc( ObjectCode* oc ); ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize, bool mapped, pathchar *archiveMemberName, int misalignment ); + void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections); void freeSegments(ObjectCode *oc); ===================================== rts/linker/LoadArchive.c ===================================== @@ -110,51 +110,6 @@ static bool loadFatArchive(char input[static 20], FILE* f, pathchar* path) } #endif -enum ObjectFileFormat { - NotObject, - COFFAmd64, - COFFI386, - COFFAArch64, - ELF, - MachO32, - MachO64, -}; - -static enum ObjectFileFormat identifyObjectFile_(char* buf, size_t sz) -{ - if (sz > 2 && ((uint16_t*)buf)[0] == 0x8664) { - return COFFAmd64; - } - if (sz > 2 && ((uint16_t*)buf)[0] == 0x014c) { - return COFFI386; - } - if (sz > 2 && ((uint16_t*)buf)[0] == 0xaa64) { - return COFFAArch64; - } - if (sz > 4 && memcmp(buf, "\x7f" "ELF", 4) == 0) { - return ELF; - } - if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedface) { - return MachO32; - } - if (sz > 4 && ((uint32_t*)buf)[0] == 0xfeedfacf) { - return MachO64; - } - // BigObj COFF files ... - if (sz > 8 && ((uint64_t*)buf)[0] == 0x86640002ffff0000) { - return COFFAmd64; - } - return NotObject; -} - -static enum ObjectFileFormat identifyObjectFile(FILE *f) -{ - char buf[32]; - ssize_t sz = fread(buf, 1, 32, f); - CHECK(fseek(f, -sz, SEEK_CUR) == 0); - return identifyObjectFile_(buf, sz); -} - static bool readThinArchiveMember(int n, int memberSize, pathchar* path, char* fileName, char* image) { @@ -547,9 +502,11 @@ HsInt loadArchive_ (pathchar *path) } DEBUG_LOG("Found member file `%s'\n", fileName); - bool is_symbol_table = strcmp("", fileName) == 0; - enum ObjectFileFormat object_fmt = is_symbol_table ? NotObject : identifyObjectFile(f); + +///////////////////////////////////////////////// +// We found the member file. Load it into memory. +///////////////////////////////////////////////// #if defined(OBJFORMAT_PEi386) /* @@ -569,17 +526,20 @@ HsInt loadArchive_ (pathchar *path) #endif // windows DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize); - DEBUG_LOG("\tisObject = %d\n", object_fmt); - if ((!is_symbol_table && isThin) || object_fmt != NotObject) { - DEBUG_LOG("Member is an object file...loading...\n"); + if (!is_symbol_table && !isImportLib) + { + DEBUG_LOG("Member might be an object file...loading...\n"); #if defined(darwin_HOST_OS) || defined(ios_HOST_OS) if (RTS_LINKER_USE_MMAP) image = mmapAnonForLinker(memberSize); else { /* See loadObj() */ - misalignment = machoGetMisalignment(f); + if(!machoGetMisalignment(f, &misalignment)) + DEBUG_LOG("Failed to load member as mach-o file. Skipping.\n"); + continue; + } image = stgMallocBytes(memberSize + misalignment, "loadArchive(image)"); image += misalignment; @@ -610,19 +570,23 @@ HsInt loadArchive_ (pathchar *path) pathprintf(archiveMemberName, size+1, WSTR("%" PATH_FMT "(#%d:%.*s)"), path, memberIdx, (int)thisFileNameSize, fileName); +/////////////////////////////////////////////////////////////// +// Verfiy the object file is valid, and load it if appropriate. +/////////////////////////////////////////////////////////////// + + // Prepare headers, doesn't load any data yet. ObjectCode *oc = mkOc(STATIC_OBJECT, path, image, memberSize, false, archiveMemberName, misalignment); -#if defined(OBJFORMAT_MACHO) - ASSERT(object_fmt == MachO32 || object_fmt == MachO64); - ocInit_MachO( oc ); -#endif -#if defined(OBJFORMAT_ELF) - ASSERT(object_fmt == ELF); - ocInit_ELF( oc ); -#endif - stgFree(archiveMemberName); + if(!verifyAndInitOc( oc )) + { + freeObjectCode( oc ); + IF_DEBUG(linker, ocDebugBelch(oc, "Faild to verify ... skipping.")); + continue; + }; + + if (0 == loadOc(oc)) { stgFree(fileName); fclose(f); ===================================== rts/linker/MachO.c ===================================== @@ -1725,31 +1725,41 @@ ocRunFini_MachO ( ObjectCode *oc ) /* * Figure out by how much to shift the entire Mach-O file in memory * when loading so that its single segment ends up 16-byte-aligned + * + * Returns 1 and sets misalignment_out to the detected misalignment if + * we successfully parsed the file. + * + * If we can't parse the file we set misalignment_out to 0 and return 0 */ int -machoGetMisalignment( FILE * f ) +machoGetMisalignment( FILE * f, int* misalignment_out ) { MachOHeader header; int misalignment; + *misalignment_out = 0; { size_t n = fread(&header, sizeof(header), 1, f); if (n != 1) { - barf("machoGetMisalignment: can't read the Mach-O header"); + debugBelch("machoGetMisalignment: can't read the Mach-O header"); + return 0; } } fseek(f, -sizeof(header), SEEK_CUR); if(header.magic != MH_MAGIC_64) { - barf("Bad magic. Expected: %08x, got: %08x.", + debugBelch("Bad magic. Expected: %08x, got: %08x.", MH_MAGIC_64, header.magic); + return 0; } misalignment = (header.sizeofcmds + sizeof(header)) & 0xF; IF_DEBUG(linker, debugBelch("mach-o misalignment %d\n", misalignment)); - return misalignment ? (16 - misalignment) : 0; + misalignment = misalignment ? (16 - misalignment) : 0; + *misalignment_out = misalignment; + return 1; } #endif /* darwin_HOST_OS || ios_HOST_OS */ ===================================== rts/linker/MachO.h ===================================== @@ -13,7 +13,7 @@ int ocGetNames_MachO ( ObjectCode* oc ); int ocResolve_MachO ( ObjectCode* oc ); int ocRunInit_MachO ( ObjectCode* oc ); int ocRunFini_MachO ( ObjectCode* oc ); -int machoGetMisalignment ( FILE * ); +int machoGetMisalignment ( FILE *, int* ); int ocAllocateExtras_MachO ( ObjectCode* oc ); SectionKind getSectionKind_MachO ( MachOSection *macho ); ===================================== rts/linker/PEi386.c ===================================== @@ -775,6 +775,10 @@ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ) *************/ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) { + if((size_t) oc->fileSize < sizeof(IMAGE_FILE_HEADER)) { + errorBelch ("Supposed COFF file smaller than minimum header size.\n"); + return NULL; + } COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc)); COFF_HEADER_INFO* info @@ -808,6 +812,11 @@ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) stgFree (info); info = NULL; errorBelch ("Unknown COFF %d type in getHeaderInfo.", coff_type); + if(oc->archiveMemberName) { + errorBelch ("Archive %" PATH_FMT ".\n", oc->archiveMemberName); + } + errorBelch ("In %" PATH_FMT ".\n", oc->fileName); + } break; } View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1a10f18c2a5ba760f3b102bc00ae9735... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1a10f18c2a5ba760f3b102bc00ae9735... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Andreas Klebinger (@AndreasK)