guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/6] split linker out of elf module


From: Andy Wingo
Subject: [PATCH 1/6] split linker out of elf module
Date: Sat, 18 May 2013 17:05:35 +0200

* module/Makefile.am:
* module/system/vm/linker.scm: New file, split out of (system vm elf).

* module/system/vm/elf.scm: Remove linking capabilities.

* module/language/objcode/elf.scm: Adapt caller to use (system vm
  linker).

* test-suite/tests/linker.test: New test.
---
 module/Makefile.am              |    1 +
 module/language/objcode/elf.scm |   29 +--
 module/system/vm/elf.scm        |  387 ++--------------------------------
 module/system/vm/linker.scm     |  442 +++++++++++++++++++++++++++++++++++++++
 test-suite/tests/linker.test    |   86 ++++++++
 5 files changed, 562 insertions(+), 383 deletions(-)
 create mode 100644 module/system/vm/linker.scm
 create mode 100644 test-suite/tests/linker.test

diff --git a/module/Makefile.am b/module/Makefile.am
index 4daf7cf..0601a05 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -348,6 +348,7 @@ SYSTEM_SOURCES =                            \
   system/vm/inspect.scm                                \
   system/vm/coverage.scm                       \
   system/vm/elf.scm                            \
+  system/vm/linker.scm                         \
   system/vm/frame.scm                          \
   system/vm/instruction.scm                    \
   system/vm/objcode.scm                                \
diff --git a/module/language/objcode/elf.scm b/module/language/objcode/elf.scm
index 9654c08..1edfdcf 100644
--- a/module/language/objcode/elf.scm
+++ b/module/language/objcode/elf.scm
@@ -1,6 +1,6 @@
 ;;; Embedding bytecode in ELF
 
-;; Copyright (C) 2012 Free Software Foundation, Inc.
+;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -30,24 +30,25 @@
   #:use-module (rnrs bytevectors)
   #:use-module (ice-9 binary-ports)
   #:use-module (system vm elf)
+  #:use-module (system vm linker)
   #:export (write-objcode))
 
 (define (bytecode->elf bv)
-  (let ((string-table (make-elf-string-table)))
+  (let ((string-table (make-string-table)))
     (define (intern-string! string)
       (call-with-values
-          (lambda () (elf-string-table-intern string-table string))
+          (lambda () (string-table-intern string-table string))
         (lambda (table idx)
           (set! string-table table)
           idx)))
     (define (make-object name bv relocs . kwargs)
       (let ((name-idx (intern-string! (symbol->string name))))
-        (make-elf-object (apply make-elf-section
-                                #:name name-idx
-                                #:size (bytevector-length bv)
-                                kwargs)
-                         bv relocs
-                         (list (make-elf-symbol name 0)))))
+        (make-linker-object (apply make-elf-section
+                                   #:name name-idx
+                                   #:size (bytevector-length bv)
+                                   kwargs)
+                            bv relocs
+                            (list (make-linker-symbol name 0)))))
     (define (make-dynamic-section word-size endianness)
       (define (make-dynamic-section/32)
         (let ((bv (make-bytevector 24 0)))
@@ -57,7 +58,7 @@
           (bytevector-u32-set! bv 12 0 endianness)
           (bytevector-u32-set! bv 16 DT_NULL endianness)
           (bytevector-u32-set! bv 20 0 endianness)
-          (values bv (make-elf-reloc 'abs32/1 12 0 '.rtl-text))))
+          (values bv (make-linker-reloc 'abs32/1 12 0 '.rtl-text))))
       (define (make-dynamic-section/64)
         (let ((bv (make-bytevector 48 0)))
           (bytevector-u64-set! bv 0 DT_GUILE_RTL_VERSION endianness)
@@ -66,7 +67,7 @@
           (bytevector-u64-set! bv 24 0 endianness)
           (bytevector-u64-set! bv 32 DT_NULL endianness)
           (bytevector-u64-set! bv 40 0 endianness)
-          (values bv (make-elf-reloc 'abs64/1 24 0 '.rtl-text))))
+          (values bv (make-linker-reloc 'abs64/1 24 0 '.rtl-text))))
       (call-with-values (lambda ()
                           (case word-size
                             ((4) (make-dynamic-section/32))
@@ -75,9 +76,9 @@
         (lambda (bv reloc)
           (make-object '.dynamic bv (list reloc)
                        #:type SHT_DYNAMIC #:flags SHF_ALLOC))))
-    (define (link-string-table)
+    (define (make-string-table)
       (intern-string! ".shstrtab")
-      (make-object '.shstrtab (link-elf-string-table string-table) '()
+      (make-object '.shstrtab (link-string-table string-table) '()
                    #:type SHT_STRTAB #:flags 0))
     (let* ((word-size (target-word-size))
            (endianness (target-endianness))
@@ -85,7 +86,7 @@
            (dt (make-dynamic-section word-size endianness))
            ;; This needs to be linked last, because linking other
            ;; sections adds entries to the string table.
-           (shstrtab (link-string-table)))
+           (shstrtab (make-string-table)))
       (link-elf (list text dt shstrtab)
                 #:endianness endianness #:word-size word-size))))
 
diff --git a/module/system/vm/elf.scm b/module/system/vm/elf.scm
index 040b274..e2b2454 100644
--- a/module/system/vm/elf.scm
+++ b/module/system/vm/elf.scm
@@ -1,6 +1,6 @@
 ;;; Guile ELF reader and writer
 
-;; Copyright (C)  2011, 2012 Free Software Foundation, Inc.
+;; Copyright (C)  2011, 2012, 2013 Free Software Foundation, Inc.
 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -33,12 +33,22 @@
             elf-entry elf-phoff elf-shoff elf-flags elf-ehsize
             elf-phentsize elf-phnum elf-shentsize elf-shnum elf-shstrndx
 
+            elf-header-len write-elf-header
+
             (make-elf-segment* . make-elf-segment)
             elf-segment?
             elf-segment-type elf-segment-offset elf-segment-vaddr
             elf-segment-paddr elf-segment-filesz elf-segment-memsz
             elf-segment-flags elf-segment-align
 
+            elf-program-header-len write-elf-program-header
+
+            PT_NULL PT_LOAD PT_DYNAMIC PT_INTERP PT_NOTE PT_SHLIB
+            PT_PHDR PT_TLS PT_NUM PT_LOOS PT_GNU_EH_FRAME PT_GNU_STACK
+            PT_GNU_RELRO
+
+            PF_R PF_W PF_X
+
             (make-elf-section* . make-elf-section)
             elf-section?
             elf-section-name elf-section-type elf-section-flags
@@ -46,11 +56,15 @@
             elf-section-link elf-section-info elf-section-addralign
             elf-section-entsize
 
+            elf-section-header-len write-elf-section-header
+
             make-elf-symbol elf-symbol?
             elf-symbol-name elf-symbol-value elf-symbol-size
             elf-symbol-info elf-symbol-other elf-symbol-shndx
             elf-symbol-binding elf-symbol-type elf-symbol-visibility
 
+            SHN_UNDEF
+
             SHT_NULL SHT_PROGBITS SHT_SYMTAB SHT_STRTAB SHT_RELA
             SHT_HASH SHT_DYNAMIC SHT_NOTE SHT_NOBITS SHT_REL SHT_SHLIB
             SHT_DYNSYM SHT_INIT_ARRAY SHT_FINI_ARRAY SHT_PREINIT_ARRAY
@@ -72,6 +86,8 @@
             DT_GUILE_RTL_VERSION DT_HIGUILE DT_LOOS DT_HIOS DT_LOPROC
             DT_HIPROC
 
+            string-table-ref
+
             STB_LOCAL STB_GLOBAL STB_WEAK STB_NUM STB_LOOS STB_GNU
             STB_HIOS STB_LOPROC STB_HIPROC
 
@@ -89,23 +105,7 @@
             elf-symbol-table-ref
 
             parse-elf-note
-            elf-note-name elf-note-desc elf-note-type
-
-            (make-string-table . make-elf-string-table)
-            (string-table-intern . elf-string-table-intern)
-            (link-string-table . link-elf-string-table)
-
-            (make-reloc . make-elf-reloc)
-            (make-symbol . make-elf-symbol)
-
-            (make-object . make-elf-object)
-            (object? . elf-object?)
-            (object-section . elf-object-section)
-            (object-bv . elf-object-bv)
-            (object-relocs . elf-object-relocs)
-            (object-symbols . elf-object-symbols)
-
-            link-elf))
+            elf-note-name elf-note-desc elf-note-type))
 
 ;; #define EI_NIDENT 16
 
@@ -902,354 +902,3 @@
         (bytevector-copy! bv (+ offset 12) name 0 (1- namesz))
         (bytevector-copy! bv (+ offset 12 namesz) desc 0 descsz)
         (make-elf-note (utf8->string name) desc type)))))
-
-
-
-
-;;;
-;;; All of that was the parser.  Now, on to a linker.
-;;;
-
-;; A relocation records a reference to a symbol.  When the symbol is
-;; resolved to an address, the reloc location will be updated to point
-;; to the address.
-;;
-;; Two types.  Abs32/1 and Abs64/1 are absolute offsets in bytes.
-;; Rel32/4 is a relative signed offset in 32-bit units.  Either can have
-;; an arbitrary addend as well.
-;;
-(define-record-type <reloc>
-  (make-reloc type loc addend symbol)
-  reloc?
-  (type reloc-type) ;; rel32/4, abs32/1, abs64/1
-  (loc reloc-loc)
-  (addend reloc-addend)
-  (symbol reloc-symbol))
-
-;; A symbol is an association between a name and an address.  The
-;; address is always in regard to some particular address space.  When
-;; objects come into the linker, their symbols live in the object
-;; address space.  When the objects are allocated into ELF segments, the
-;; symbols will be relocated into memory address space, corresponding to
-;; the position the ELF will be loaded at.
-;;
-(define-record-type <symbol>
-  (make-symbol name address)
-  symbol?
-  (name symbol-name)
-  (address symbol-address))
-
-(define-record-type <object>
-  (make-object section bv relocs symbols)
-  object?
-  (section object-section)
-  (bv object-bv)
-  (relocs object-relocs)
-  (symbols object-symbols))
-
-(define (make-string-table)
-  '(("" 0 #vu8())))
-
-(define (string-table-length table)
-  (let ((last (car table)))
-    ;; The + 1 is for the trailing NUL byte.
-    (+ (cadr last) (bytevector-length (caddr last)) 1)))
-
-(define (string-table-intern table str)
-  (cond
-   ((assoc str table)
-    => (lambda (ent)
-         (values table (cadr ent))))
-   (else
-    (let* ((next (string-table-length table)))
-      (values (cons (list str next (string->utf8 str))
-                    table)
-              next)))))
-
-(define (link-string-table table)
-  (let ((out (make-bytevector (string-table-length table) 0)))
-    (for-each
-     (lambda (ent)
-       (let ((bytes (caddr ent)))
-         (bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
-     table)
-    out))
-
-(define (segment-kind section)
-  (let ((flags (elf-section-flags section)))
-    (cons (cond
-           ((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
-           ((zero? (logand SHF_ALLOC flags)) PT_NOTE)
-           (else PT_LOAD))
-          (logior (if (zero? (logand SHF_ALLOC flags))
-                      0
-                      PF_R)
-                  (if (zero? (logand SHF_EXECINSTR flags))
-                      0
-                      PF_X)
-                  (if (zero? (logand SHF_WRITE flags))
-                      0
-                      PF_W)))))
-
-(define (group-by-cars ls)
-  (let lp ((in ls) (k #f) (group #f) (out '()))
-    (cond
-     ((null? in)
-      (reverse!
-       (if group
-           (cons (cons k (reverse! group)) out)
-           out)))
-     ((and group (equal? k (caar in)))
-      (lp (cdr in) k (cons (cdar in) group) out))
-     (else
-      (lp (cdr in) (caar in) (list (cdar in))
-          (if group
-              (cons (cons k (reverse! group)) out)
-              out))))))
-
-(define (collate-objects-into-segments objects)
-  (group-by-cars
-   (stable-sort!
-    (map (lambda (o)
-           (cons (segment-kind (object-section o)) o))
-         objects)
-    (lambda (x y)
-      (let ((x-type (caar x)) (y-type (caar y))
-            (x-flags (cdar x)) (y-flags (cdar y))
-            (x-section (object-section (cdr x)))
-            (y-section (object-section (cdr y))))
-        (cond
-         ((not (equal? x-flags y-flags))
-          (< x-flags y-flags))
-         ((not (equal? x-type y-type))
-          (< x-type y-type))
-         ((not (equal? (elf-section-type x-section)
-                       (elf-section-type y-section)))
-          (cond
-           ((equal? (elf-section-type x-section) SHT_NOBITS) #t)
-           ((equal? (elf-section-type y-section) SHT_NOBITS) #f)
-           (else (< (elf-section-type x-section)
-                    (elf-section-type y-section)))))
-         (else
-          (< (elf-section-size x-section)
-             (elf-section-size y-section)))))))))
-
-(define (align address alignment)
-  (+ address
-     (modulo (- alignment (modulo address alignment)) alignment)))
-
-(define (fold1 proc ls s0)
-  (let lp ((ls ls) (s0 s0))
-    (if (null? ls)
-        s0
-        (lp (cdr ls) (proc (car ls) s0)))))
-
-(define (fold2 proc ls s0 s1)
-  (let lp ((ls ls) (s0 s0) (s1 s1))
-    (if (null? ls)
-        (values s0 s1)
-        (receive (s0 s1) (proc (car ls) s0 s1)
-          (lp (cdr ls) s0 s1)))))
-
-(define (fold4 proc ls s0 s1 s2 s3)
-  (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
-    (if (null? ls)
-        (values s0 s1 s2 s3)
-        (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
-          (lp (cdr ls) s0 s1 s2 s3)))))
-
-(define (fold5 proc ls s0 s1 s2 s3 s4)
-  (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
-    (if (null? ls)
-        (values s0 s1 s2 s3 s4)
-        (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
-          (lp (cdr ls) s0 s1 s2 s3 s4)))))
-
-(define (relocate-section-header sec fileaddr memaddr)
-  (make-elf-section (elf-section-name sec) (elf-section-type sec)
-                    (elf-section-flags sec) memaddr
-                    fileaddr (elf-section-size sec)
-                    (elf-section-link sec) (elf-section-info sec)
-                    (elf-section-addralign sec) (elf-section-entsize sec)))
-
-(define *page-size* 4096)
-
-;; Adds object symbols to global table, relocating them from object
-;; address space to memory address space.
-(define (add-symbols symbols offset symtab)
-  (fold1 (lambda (symbol symtab)
-           (let ((name (symbol-name symbol))
-                 (addr (symbol-address symbol)))
-             (vhash-consq name (make-symbol name (+ addr offset)) symtab)))
-         symbols
-         symtab))
-
-(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
-  (let* ((loadable? (not (zero? flags)))
-         (alignment (fold1 (lambda (o alignment)
-                             (lcm (elf-section-addralign (object-section o))
-                                  alignment))
-                           objects
-                           alignment))
-         (fileaddr (align fileaddr alignment))
-         (memaddr (align memaddr alignment)))
-    (receive (objects fileend memend symtab)
-        (fold4 (lambda (o out fileaddr memaddr symtab)
-                 (let* ((section (object-section o))
-                        (fileaddr
-                         (if (= (elf-section-type section) SHT_NOBITS)
-                             fileaddr
-                             (align fileaddr (elf-section-addralign section))))
-                        (memaddr
-                         (align memaddr (elf-section-addralign section))))
-                   (values
-                    (cons (make-object (relocate-section-header section 
fileaddr
-                                                                memaddr)
-                                       (object-bv o)
-                                       (object-relocs o)
-                                       (object-symbols o))
-                          out)
-                    (if (= (elf-section-type section) SHT_NOBITS)
-                        fileaddr
-                        (+ fileaddr (elf-section-size section)))
-                    (+ memaddr (elf-section-size section))
-                    (add-symbols (object-symbols o) memaddr symtab))))
-               objects '() fileaddr memaddr symtab)
-      (values
-       (make-elf-segment* #:type type #:offset fileaddr
-                          #:vaddr (if loadable? memaddr 0)
-                          #:filesz (- fileend fileaddr)
-                          #:memsz (if loadable? (- memend memaddr) 0)
-                          #:flags flags #:align alignment)
-       (reverse objects)
-       symtab))))
-
-(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
-  (let ((ent (vhash-assq (reloc-symbol reloc) symtab)))
-    (unless ent
-      (error "Undefined symbol" (reloc-symbol reloc)))
-    (let* ((file-loc (+ (reloc-loc reloc) file-offset))
-           (mem-loc (+ (reloc-loc reloc) mem-offset))
-           (addr (symbol-address (cdr ent))))
-      (case (reloc-type reloc)
-        ((rel32/4)
-         (let ((diff (- addr mem-loc)))
-           (unless (zero? (modulo diff 4))
-             (error "Bad offset" reloc symbol mem-offset))
-           (bytevector-s32-set! bv file-loc
-                                (+ (/ diff 4) (reloc-addend reloc))
-                                endianness)))
-        ((abs32/1)
-         (bytevector-u32-set! bv file-loc addr endianness))
-        ((abs64/1)
-         (bytevector-u64-set! bv file-loc addr endianness))
-        (else
-         (error "bad reloc type" reloc))))))
-
-(define (write-object bv o symtab endianness)
-  (let* ((section (object-section o))
-         (offset (elf-section-offset section))
-         (addr (elf-section-addr section))
-         (len (elf-section-size section))
-         (bytes (object-bv o))
-         (relocs (object-relocs o)))
-    (if (not (= (elf-section-type section) SHT_NOBITS))
-        (begin
-          (if (not (= (elf-section-size section) (bytevector-length bytes)))
-              (error "unexpected length" section bytes))
-          (bytevector-copy! bytes 0 bv offset len)
-          (for-each (lambda (reloc)
-                      (process-reloc reloc bv offset addr symtab endianness))
-                    relocs)))))
-
-(define (compute-sections-by-name seglists)
-  (let lp ((in (apply append (map cdr seglists)))
-           (n 1) (out '()) (shstrtab #f))
-    (if (null? in)
-        (fold1 (lambda (x tail)
-                 (cond
-                  ((false-if-exception
-                    (string-table-ref shstrtab (car x)))
-                   => (lambda (str) (acons str (cdr x) tail)))
-                  (else tail)))
-               out '())
-        (let* ((section (object-section (car in)))
-               (bv (object-bv (car in)))
-               (name (elf-section-name section)))
-          (lp (cdr in) (1+ n) (acons name n out)
-              (or shstrtab
-                  (and (= (elf-section-type section) SHT_STRTAB)
-                       (equal? (false-if-exception
-                                (string-table-ref bv name))
-                               ".shstrtab")
-                       bv)))))))
-
-;; Given a list of section-header/bytevector pairs, collate the sections
-;; into segments, allocate the segments, allocate the ELF bytevector,
-;; and write the segments into the bytevector, relocating as we go.
-;;
-(define* (link-elf objects #:key
-                   (page-aligned? #t)
-                   (endianness (target-endianness))
-                   (word-size (target-word-size)))
-  (let* ((seglists (collate-objects-into-segments objects))
-         (sections-by-name (compute-sections-by-name seglists))
-         (nsegments (length seglists))
-         (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
-         (program-headers-offset (elf-header-len word-size))
-         (fileaddr (+ program-headers-offset
-                      (* nsegments (elf-program-header-len word-size))))
-         (memaddr 0))
-   (receive (out fileend memend symtab _)
-       (fold5
-        (lambda (x out fileaddr memaddr symtab prev-flags)
-          (let ((type (caar x))
-                (flags (cdar x))
-                (objects (cdr x)))
-            (receive (segment objects symtab)
-                (alloc-segment type flags objects fileaddr memaddr symtab
-                               (if (and page-aligned?
-                                        (not (= flags prev-flags)))
-                                   *page-size*
-                                   8))
-              (values
-               (cons (cons segment objects) out)
-               (+ (elf-segment-offset segment) (elf-segment-filesz segment))
-               (if (zero? (elf-segment-memsz segment))
-                   memaddr
-                   (+ (elf-segment-vaddr segment)
-                      (elf-segment-memsz segment)))
-               symtab
-               flags))))
-        seglists '() fileaddr memaddr vlist-null 0)
-     (let* ((out (reverse! out))
-            (section-table-offset (+ (align fileend word-size)))
-            (fileend (+ section-table-offset
-                        (* nsections (elf-section-header-len word-size))))
-            (bv (make-bytevector fileend 0)))
-       (write-elf-header bv #:byte-order endianness #:word-size word-size
-                         #:phoff program-headers-offset #:phnum nsegments
-                         #:shoff section-table-offset #:shnum nsections
-                         #:shstrndx (or (assoc-ref sections-by-name 
".shstrtab")
-                                         SHN_UNDEF))
-       (write-elf-section-header bv section-table-offset
-                                 endianness word-size
-                                 (make-elf-section* #:type SHT_NULL #:flags 0
-                                                    #:addralign 0))
-       (fold2 (lambda (x phidx shidx)
-                (write-elf-program-header
-                 bv (+ program-headers-offset
-                       (* (elf-program-header-len word-size) phidx))
-                 endianness word-size (car x))
-                (values
-                 (1+ phidx)
-                 (fold1 (lambda (o shidx)
-                          (write-object bv o symtab endianness)
-                          (write-elf-section-header
-                           bv (+ section-table-offset
-                                 (* (elf-section-header-len word-size) shidx))
-                           endianness word-size (object-section o))
-                          (1+ shidx))
-                        (cdr x) shidx)))
-              out 0 1)
-       bv))))
diff --git a/module/system/vm/linker.scm b/module/system/vm/linker.scm
new file mode 100644
index 0000000..e9dca71
--- /dev/null
+++ b/module/system/vm/linker.scm
@@ -0,0 +1,442 @@
+;;; Guile ELF linker
+
+;; Copyright (C)  2011, 2012, 2013 Free Software Foundation, Inc.
+
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+
+;;; Commentary:
+;;;
+;;; A linker combines several linker objects into an executable or a
+;;; loadable library.
+;;;
+;;; There are several common formats for libraries out there.  Since
+;;; Guile includes its own linker and loader, we are free to choose any
+;;; format, or make up our own.
+;;;
+;;; There are essentially two requirements for a linker format:
+;;; libraries should be able to be loaded with the minimal amount of
+;;; work; and they should support introspection in some way, in order to
+;;; enable good debugging.
+;;;
+;;; These requirements are somewhat at odds, as loading should not have
+;;; to stumble over features related to introspection.  It so happens
+;;; that a lot of smart people have thought about this situation, and
+;;; the ELF format embodies the outcome of their thinking.  Guile uses
+;;; ELF as its format, regardless of the platform's native library
+;;; format.  It's not inconceivable that Guile could interoperate with
+;;; the native dynamic loader at some point, but it's not a near-term
+;;; goal.
+;;;
+;;; Guile's linker takes a list of objects, sorts them according to
+;;; similarity from the perspective of the loader, then writes them out
+;;; into one big bytevector in ELF format.
+;;;
+;;; It is often the case that different parts of a library need to refer
+;;; to each other.  For example, program text may need to refer to a
+;;; constant from writable memory.  When the linker places sections
+;;; (linker objects) into specific locations in the linked bytevector,
+;;; it needs to fix up those references.  This process is called
+;;; /relocation/.  References needing relocations are recorded in
+;;; "linker-reloc" objects, and collected in a list in each
+;;; "linker-object".  The actual definitions of the references are
+;;; stored in "linker-symbol" objects, also collected in a list in each
+;;; "linker-object".
+;;;
+;;; By default, the ELF files created by the linker include some padding
+;;; so that different parts of the file can be loaded in with different
+;;; permissions.  For example, some parts of the file are read-only and
+;;; thus can be shared between processes.  Some parts of the file don't
+;;; need to be loaded at all.  However this padding can be too much for
+;;; interactive compilation, when the code is never written out to disk;
+;;; in that case, pass #:page-aligned? #f to `link-elf'.
+;;;
+;;; Code:
+
+(define-module (system vm linker)
+  #:use-module (rnrs bytevectors)
+  #:use-module (system foreign)
+  #:use-module (system base target)
+  #:use-module (srfi srfi-9)
+  #:use-module (ice-9 receive)
+  #:use-module (ice-9 vlist)
+  #:use-module (system vm elf)
+  #:export (make-string-table
+            string-table-intern
+            link-string-table
+
+            make-linker-reloc
+            make-linker-symbol
+
+            make-linker-object
+            linker-object?
+            linker-object-section
+            linker-object-bv
+            linker-object-relocs
+            linker-object-symbols
+
+            link-elf))
+
+;; A relocation records a reference to a symbol.  When the symbol is
+;; resolved to an address, the reloc location will be updated to point
+;; to the address.
+;;
+;; Two types.  Abs32/1 and Abs64/1 are absolute offsets in bytes.
+;; Rel32/4 is a relative signed offset in 32-bit units.  Either can have
+;; an arbitrary addend as well.
+;;
+(define-record-type <linker-reloc>
+  (make-linker-reloc type loc addend symbol)
+  linker-reloc?
+  (type linker-reloc-type) ;; rel32/4, abs32/1, abs64/1
+  (loc linker-reloc-loc)
+  (addend linker-reloc-addend)
+  (symbol linker-reloc-symbol))
+
+;; A symbol is an association between a name and an address.  The
+;; address is always in regard to some particular address space.  When
+;; objects come into the linker, their symbols live in the object
+;; address space.  When the objects are allocated into ELF segments, the
+;; symbols will be relocated into memory address space, corresponding to
+;; the position the ELF will be loaded at.
+;;
+(define-record-type <linker-symbol>
+  (make-linker-symbol name address)
+  linker-symbol?
+  (name linker-symbol-name)
+  (address linker-symbol-address))
+
+(define-record-type <linker-object>
+  (make-linker-object section bv relocs symbols)
+  linker-object?
+  (section linker-object-section)
+  (bv linker-object-bv)
+  (relocs linker-object-relocs)
+  (symbols linker-object-symbols))
+
+(define (make-string-table)
+  '(("" 0 #vu8())))
+
+(define (string-table-length table)
+  (let ((last (car table)))
+    ;; The + 1 is for the trailing NUL byte.
+    (+ (cadr last) (bytevector-length (caddr last)) 1)))
+
+(define (string-table-intern table str)
+  (cond
+   ((assoc str table)
+    => (lambda (ent)
+         (values table (cadr ent))))
+   (else
+    (let* ((next (string-table-length table)))
+      (values (cons (list str next (string->utf8 str))
+                    table)
+              next)))))
+
+(define (link-string-table table)
+  (let ((out (make-bytevector (string-table-length table) 0)))
+    (for-each
+     (lambda (ent)
+       (let ((bytes (caddr ent)))
+         (bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
+     table)
+    out))
+
+(define (segment-kind section)
+  (let ((flags (elf-section-flags section)))
+    (cons (cond
+           ((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
+           ((zero? (logand SHF_ALLOC flags)) PT_NOTE)
+           (else PT_LOAD))
+          (logior (if (zero? (logand SHF_ALLOC flags))
+                      0
+                      PF_R)
+                  (if (zero? (logand SHF_EXECINSTR flags))
+                      0
+                      PF_X)
+                  (if (zero? (logand SHF_WRITE flags))
+                      0
+                      PF_W)))))
+
+(define (group-by-cars ls)
+  (let lp ((in ls) (k #f) (group #f) (out '()))
+    (cond
+     ((null? in)
+      (reverse!
+       (if group
+           (cons (cons k (reverse! group)) out)
+           out)))
+     ((and group (equal? k (caar in)))
+      (lp (cdr in) k (cons (cdar in) group) out))
+     (else
+      (lp (cdr in) (caar in) (list (cdar in))
+          (if group
+              (cons (cons k (reverse! group)) out)
+              out))))))
+
+(define (collate-objects-into-segments objects)
+  (group-by-cars
+   (stable-sort!
+    (map (lambda (o)
+           (cons (segment-kind (linker-object-section o)) o))
+         objects)
+    (lambda (x y)
+      (let ((x-type (caar x)) (y-type (caar y))
+            (x-flags (cdar x)) (y-flags (cdar y))
+            (x-section (linker-object-section (cdr x)))
+            (y-section (linker-object-section (cdr y))))
+        (cond
+         ((not (equal? x-flags y-flags))
+          (< x-flags y-flags))
+         ((not (equal? x-type y-type))
+          (< x-type y-type))
+         ((not (equal? (elf-section-type x-section)
+                       (elf-section-type y-section)))
+          (cond
+           ((equal? (elf-section-type x-section) SHT_NOBITS) #t)
+           ((equal? (elf-section-type y-section) SHT_NOBITS) #f)
+           (else (< (elf-section-type x-section)
+                    (elf-section-type y-section)))))
+         (else
+          (< (elf-section-size x-section)
+             (elf-section-size y-section)))))))))
+
+(define (align address alignment)
+  (+ address
+     (modulo (- alignment (modulo address alignment)) alignment)))
+
+(define (fold1 proc ls s0)
+  (let lp ((ls ls) (s0 s0))
+    (if (null? ls)
+        s0
+        (lp (cdr ls) (proc (car ls) s0)))))
+
+(define (fold2 proc ls s0 s1)
+  (let lp ((ls ls) (s0 s0) (s1 s1))
+    (if (null? ls)
+        (values s0 s1)
+        (receive (s0 s1) (proc (car ls) s0 s1)
+          (lp (cdr ls) s0 s1)))))
+
+(define (fold4 proc ls s0 s1 s2 s3)
+  (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
+    (if (null? ls)
+        (values s0 s1 s2 s3)
+        (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
+          (lp (cdr ls) s0 s1 s2 s3)))))
+
+(define (fold5 proc ls s0 s1 s2 s3 s4)
+  (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
+    (if (null? ls)
+        (values s0 s1 s2 s3 s4)
+        (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
+          (lp (cdr ls) s0 s1 s2 s3 s4)))))
+
+(define (relocate-section-header sec fileaddr memaddr)
+  (make-elf-section #:name (elf-section-name sec)
+                    #:type (elf-section-type sec)
+                    #:flags (elf-section-flags sec)
+                    #:addr memaddr
+                    #:offset fileaddr
+                    #:size (elf-section-size sec)
+                    #:link (elf-section-link sec)
+                    #:info (elf-section-info sec)
+                    #:addralign (elf-section-addralign sec)
+                    #:entsize (elf-section-entsize sec)))
+
+(define *page-size* 4096)
+
+;; Adds object symbols to global table, relocating them from object
+;; address space to memory address space.
+(define (add-symbols symbols offset symtab)
+  (fold1 (lambda (symbol symtab)
+           (let ((name (linker-symbol-name symbol))
+                 (addr (linker-symbol-address symbol)))
+             (when (vhash-assq name symtab)
+               (error "duplicate symbol" name))
+             (vhash-consq name (make-linker-symbol name (+ addr offset)) 
symtab)))
+         symbols
+         symtab))
+
+(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
+  (let* ((loadable? (not (zero? flags)))
+         (alignment (fold1 (lambda (o alignment)
+                             (lcm (elf-section-addralign
+                                   (linker-object-section o))
+                                  alignment))
+                           objects
+                           alignment))
+         (fileaddr (align fileaddr alignment))
+         (memaddr (align memaddr alignment)))
+    (receive (objects fileend memend symtab)
+        (fold4 (lambda (o out fileaddr memaddr symtab)
+                 (let* ((section (linker-object-section o))
+                        (fileaddr
+                         (if (= (elf-section-type section) SHT_NOBITS)
+                             fileaddr
+                             (align fileaddr (elf-section-addralign section))))
+                        (memaddr
+                         (align memaddr (elf-section-addralign section))))
+                   (values
+                    (cons (make-linker-object
+                           (relocate-section-header section fileaddr
+                                                    memaddr)
+                           (linker-object-bv o)
+                           (linker-object-relocs o)
+                           (linker-object-symbols o))
+                          out)
+                    (if (= (elf-section-type section) SHT_NOBITS)
+                        fileaddr
+                        (+ fileaddr (elf-section-size section)))
+                    (+ memaddr (elf-section-size section))
+                    (add-symbols (linker-object-symbols o) memaddr symtab))))
+               objects '() fileaddr memaddr symtab)
+      (values
+       (make-elf-segment #:type type #:offset fileaddr
+                         #:vaddr (if loadable? memaddr 0)
+                         #:filesz (- fileend fileaddr)
+                         #:memsz (if loadable? (- memend memaddr) 0)
+                         #:flags flags #:align alignment)
+       (reverse objects)
+       symtab))))
+
+(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
+  (let ((ent (vhash-assq (linker-reloc-symbol reloc) symtab)))
+    (unless ent
+      (error "Undefined symbol" (linker-reloc-symbol reloc)))
+    (let* ((file-loc (+ (linker-reloc-loc reloc) file-offset))
+           (mem-loc (+ (linker-reloc-loc reloc) mem-offset))
+           (addr (linker-symbol-address (cdr ent))))
+      (case (linker-reloc-type reloc)
+        ((rel32/4)
+         (let ((diff (- addr mem-loc)))
+           (unless (zero? (modulo diff 4))
+             (error "Bad offset" reloc symbol mem-offset))
+           (bytevector-s32-set! bv file-loc
+                                (+ (/ diff 4) (linker-reloc-addend reloc))
+                                endianness)))
+        ((abs32/1)
+         (bytevector-u32-set! bv file-loc addr endianness))
+        ((abs64/1)
+         (bytevector-u64-set! bv file-loc addr endianness))
+        (else
+         (error "bad reloc type" reloc))))))
+
+(define (write-linker-object bv o symtab endianness)
+  (let* ((section (linker-object-section o))
+         (offset (elf-section-offset section))
+         (addr (elf-section-addr section))
+         (len (elf-section-size section))
+         (bytes (linker-object-bv o))
+         (relocs (linker-object-relocs o)))
+    (if (not (= (elf-section-type section) SHT_NOBITS))
+        (begin
+          (if (not (= (elf-section-size section) (bytevector-length bytes)))
+              (error "unexpected length" section bytes))
+          (bytevector-copy! bytes 0 bv offset len)
+          (for-each (lambda (reloc)
+                      (process-reloc reloc bv offset addr symtab endianness))
+                    relocs)))))
+
+(define (compute-sections-by-name seglists)
+  (let lp ((in (apply append (map cdr seglists)))
+           (n 1) (out '()) (shstrtab #f))
+    (if (null? in)
+        (fold1 (lambda (x tail)
+                 (cond
+                  ((false-if-exception
+                    (string-table-ref shstrtab (car x)))
+                   => (lambda (str) (acons str (cdr x) tail)))
+                  (else tail)))
+               out '())
+        (let* ((section (linker-object-section (car in)))
+               (bv (linker-object-bv (car in)))
+               (name (elf-section-name section)))
+          (lp (cdr in) (1+ n) (acons name n out)
+              (or shstrtab
+                  (and (= (elf-section-type section) SHT_STRTAB)
+                       (equal? (false-if-exception
+                                (string-table-ref bv name))
+                               ".shstrtab")
+                       bv)))))))
+
+;; Given a list of section-header/bytevector pairs, collate the sections
+;; into segments, allocate the segments, allocate the ELF bytevector,
+;; and write the segments into the bytevector, relocating as we go.
+;;
+(define* (link-elf objects #:key
+                   (page-aligned? #t)
+                   (endianness (target-endianness))
+                   (word-size (target-word-size)))
+  (let* ((seglists (collate-objects-into-segments objects))
+         (sections-by-name (compute-sections-by-name seglists))
+         (nsegments (length seglists))
+         (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
+         (program-headers-offset (elf-header-len word-size))
+         (fileaddr (+ program-headers-offset
+                      (* nsegments (elf-program-header-len word-size))))
+         (memaddr 0))
+    (receive (out fileend memend symtab _)
+        (fold5
+         (lambda (x out fileaddr memaddr symtab prev-flags)
+           (let ((type (caar x))
+                 (flags (cdar x))
+                 (objects (cdr x)))
+             (receive (segment objects symtab)
+                 (alloc-segment type flags objects fileaddr memaddr symtab
+                                (if (and page-aligned?
+                                         (not (= flags prev-flags)))
+                                    *page-size*
+                                    8))
+               (values
+                (cons (cons segment objects) out)
+                (+ (elf-segment-offset segment) (elf-segment-filesz segment))
+                (if (zero? (elf-segment-memsz segment))
+                    memaddr
+                    (+ (elf-segment-vaddr segment)
+                       (elf-segment-memsz segment)))
+                symtab
+                flags))))
+         seglists '() fileaddr memaddr vlist-null 0)
+      (let* ((out (reverse! out))
+             (section-table-offset (+ (align fileend word-size)))
+             (fileend (+ section-table-offset
+                         (* nsections (elf-section-header-len word-size))))
+             (bv (make-bytevector fileend 0)))
+        (write-elf-header bv #:byte-order endianness #:word-size word-size
+                          #:phoff program-headers-offset #:phnum nsegments
+                          #:shoff section-table-offset #:shnum nsections
+                          #:shstrndx (or (assoc-ref sections-by-name 
".shstrtab")
+                                         SHN_UNDEF))
+        (write-elf-section-header bv section-table-offset
+                                  endianness word-size
+                                  (make-elf-section #:type SHT_NULL #:flags 0
+                                                    #:addralign 0))
+        (fold2 (lambda (x phidx shidx)
+                 (write-elf-program-header
+                  bv (+ program-headers-offset
+                        (* (elf-program-header-len word-size) phidx))
+                  endianness word-size (car x))
+                 (values
+                  (1+ phidx)
+                  (fold1 (lambda (o shidx)
+                           (write-linker-object bv o symtab endianness)
+                           (write-elf-section-header
+                            bv (+ section-table-offset
+                                  (* (elf-section-header-len word-size) shidx))
+                            endianness word-size (linker-object-section o))
+                           (1+ shidx))
+                         (cdr x) shidx)))
+               out 0 1)
+        bv))))
diff --git a/test-suite/tests/linker.test b/test-suite/tests/linker.test
new file mode 100644
index 0000000..7ea2631
--- /dev/null
+++ b/test-suite/tests/linker.test
@@ -0,0 +1,86 @@
+;;;; linker.test                               -*- scheme -*-
+;;;;
+;;;; Copyright 2013 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+
+(define-module (test-suite test-linker)
+  #:use-module (test-suite lib)
+  #:use-module (rnrs bytevectors)
+  #:use-module (system base target)
+  #:use-module (system vm elf)
+  #:use-module (system vm linker))
+
+(define (link-elf-with-one-main-section name bytes)
+  (let ((string-table (make-string-table)))
+    (define (intern-string! string)
+      (call-with-values
+          (lambda () (string-table-intern string-table string))
+        (lambda (table idx)
+          (set! string-table table)
+          idx)))
+    (define (make-object name bv relocs . kwargs)
+      (let ((name-idx (intern-string! (symbol->string name))))
+        (make-linker-object (apply make-elf-section
+                                   #:name name-idx
+                                   #:size (bytevector-length bv)
+                                   kwargs)
+                            bv relocs
+                            (list (make-linker-symbol name 0)))))
+    (define (make-string-table)
+      (intern-string! ".shstrtab")
+      (make-object '.shstrtab (link-string-table string-table) '()
+                   #:type SHT_STRTAB #:flags 0))
+    (let* ((word-size (target-word-size))
+           (endianness (target-endianness))
+           (sec (make-object name bytes '()))
+           ;; This needs to be linked last, because linking other
+           ;; sections adds entries to the string table.
+           (shstrtab (make-string-table)))
+      (link-elf (list sec shstrtab)
+                #:endianness endianness #:word-size word-size))))
+
+(with-test-prefix "simple"
+  (define foo-bytes #vu8(0 1 2 3 4 5))
+  (define bytes #f)
+  (define elf #f)
+
+  (define (bytevectors-equal? bv-a bv-b start-a start-b size)
+    (or (zero? size)
+        (and (equal? (bytevector-u8-ref bv-a start-a)
+                     (bytevector-u8-ref bv-b start-b))
+             (bytevectors-equal? bv-a bv-b (1+ start-a) (1+ start-b)
+                                 (1- size)))))
+
+  (pass-if "linking succeeds"
+    (begin
+      (set! bytes (link-elf-with-one-main-section '.foo foo-bytes))
+      #t))
+
+  (pass-if "parsing succeeds"
+    (begin
+      (set! elf (parse-elf bytes))
+      (elf? elf)))
+
+  ;; 3 sections: the initial NULL section, .foo, and .shstrtab.
+  (pass-if-equal 3 (elf-shnum elf))
+
+  (pass-if ".foo section checks out"
+    (let ((sec (assoc-ref (elf-sections-by-name elf) ".foo")))
+      (and sec
+           (= (elf-section-size sec) (bytevector-length foo-bytes))
+           (bytevectors-equal? bytes foo-bytes
+                               (elf-section-offset sec) 0
+                               (bytevector-length foo-bytes))))))
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]