poke-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: DWARF5 line number program header


From: Jose E. Marchesi
Subject: Re: DWARF5 line number program header
Date: Sun, 05 Nov 2023 15:04:41 +0100
User-agent: Gnus/5.13 (Gnus v5.13)

Hello Martin.
Thanks for the patch.

See some comments below.

> diff --git a/dwarf-abbrev.pk b/dwarf-abbrev.pk
> index 2fcbf35..c44b26d 100644
> --- a/dwarf-abbrev.pk
> +++ b/dwarf-abbrev.pk
> @@ -28,7 +28,6 @@
>
>  load leb128;
>  load "dwarf-info.pk";
> -load "dwarf-config.pk";
>
>  type Dwarf_Abbrev_Table_Hdr =
>    struct
> diff --git a/dwarf-info.pk b/dwarf-info.pk
> index 73c3082..a07222d 100644
> --- a/dwarf-info.pk
> +++ b/dwarf-info.pk
> @@ -1924,6 +1924,43 @@ dw_cfg.add
>    :class "dw-form"
>    :tag DW_FORM_udata :name "DW_FORM_udata" :doc "Data";
>
> +/* Line number header entry format encodings */
> +
> +var DW_LNCT_path = 0x1,
> +    DW_LNCT_directory_index = 0x2,
> +    DW_LNCT_timestamp = 0x3,
> +    DW_LNCT_size = 0x4,
> +    DW_LNCT_MD5 = 0x5,
> +    DW_LNCT_lo_user = 0x2000,
> +    DW_LNCT_hi_user = 0x3f;
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_path :name "DW_LNCT_path";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_directory_index :name "DW_LNCT_directory_index";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_timestamp :name "DW_LNCT_timestamp";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_size :name "DW_LNCT_size";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_MD5 :name "DW_LNCT_MD5";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_lo_user :name "DW_LNCT_lo_user";
> +
> +dw_cfg.add
> +  :class "dw-lnct"
> +  :tag DW_LNCT_hi_user :name "DW_LNCT_hi_user";

Does this ready belong to dwarf-abbrev.pk?  Why not having these defined
in dwarf-line.pk instead?

>  /* Contributions are stored in .debug_info sections.  */
>
> diff --git a/dwarf-line.pk b/dwarf-line.pk
> new file mode 100644
> index 0000000..56866c0
> --- /dev/null
> +++ b/dwarf-line.pk
> @@ -0,0 +1,159 @@
> +
> +load leb128;
> +load "dwarf-info.pk";
> +
> +type Dwarf_Line_Entry_Fmt =
> +  struct
> +  {
> +    ULEB128 content_type : dw_cfg.entry_p ("dw-lnct", content_type.value);
> +    ULEB128 form_code : dw_cfg.entry_p ("dw-form", form_code.value);
> +
> +    method _print = void:
> +      {
> +        printf("#<%s, %s>", dw_cfg.get_name ("dw-lnct", content_type.value),
> +                            dw_cfg.get_name ("dw-form", form_code.value));
> +      }
> +  };
> +
> +type Dwarf_Line_Hdr_V5 =
> +  struct
> +  {
> +    uint<32> unit_length;
> +    uint<16> version : version == 5;
> +    uint<8> address_size ;

Is this a size/offset, probably in bytes?  Then it would be better to
use an offset.

> +    uint<8> segment_selector_size;

Likewise.

> +    uint<32> header_length;

Likewise.

> +    uint<8> minimum_instruction_length;

Likewise.

> +    uint<8> maximum_operations_per_instruction;
> +    uint<8> default_is_stmt;
> +    int<8> line_base;
> +    uint<8> line_range;
> +    uint<8> opcode_base;

What is this?  Does it require a constraint?

> +    uint<8>[opcode_base - 1] standard_opcode_lengths;

Offset?

> +    uint<8> directory_entry_format_count;
> +    Dwarf_Line_Entry_Fmt[directory_entry_format_count] 
> directory_entry_format;
> +    ULEB128 directories_count;
> +    fun Get_Dirs_Payload_Size = uint<8>:

Please use lower case names for function names, like
get_dirs_payload_size.

> +      {
> +        var payload_bytes = 0;
> +        for (var i = 0; i < directories_count.value; i++)
> +          for (var j = 0; j < directory_entry_format_count; j++)
> +            {
> +              var ct = directory_entry_format[j].content_type.value;
> +              var fc = directory_entry_format[j].form_code.value;
> +              if ((ct == DW_LNCT_path) && (fc == DW_FORM_line_strp))
> +                payload_bytes += 4;
> +              else
> +                raise Exception { code = 256,
> +                                  msg = format("unknown file name entry 
> format (dw-lnct 0x%u8x, dw-form 0x%u8x)", ct, fc) };
> +            }
> +          return payload_bytes;
> +      }
> +    // Directory Payload
> +    uint<8>[Get_Dirs_Payload_Size];
> +    uint<8> file_name_entry_format_count;
> +    Dwarf_Line_Entry_Fmt[file_name_entry_format_count] 
> file_name_entry_format; // offset: 39, size: 4
> +    ULEB128 file_names_count;
> +    fun Get_Files_Payload_Size = uint<8>:

Likewise.

> +      {
> +        var payload_bytes = 0;
> +        for (var i = 0; i < file_names_count.value; i++)
> +          for (var j = 0; j < file_name_entry_format_count; j++)
> +            {
> +              var ct = file_name_entry_format[j].content_type.value;
> +              var fc = file_name_entry_format[j].form_code.value;
> +              if ((ct == DW_LNCT_path) && (fc == DW_FORM_line_strp))
> +                payload_bytes += 4;
> +              else if ((ct == DW_LNCT_directory_index) && (fc == 
> DW_FORM_udata))
> +                payload_bytes += 1;
> +              else if ((ct == DW_LNCT_MD5) && (fc == DW_FORM_data16))
> +                // This is how clang encodes the md5sum
> +                payload_bytes += 16;
> +              else
> +                raise Exception { code = 257,
> +                                  msg = format("unknown file name entry 
> format (dw-lnct 0x%u8x, dw-form 0x%u8x)", ct, fc) };
> +            }
> +            return payload_bytes;
> +      }
> +    // Files Payload
> +    uint<8>[Get_Files_Payload_Size];
> +
> +    /* Given an offset into the ELF file's .debug_line_str section, return
> +     * the source file directories as array of strings.  The array index
> +     * will express the DWARF directory index, and helps to match the source
> +     * file with its respective path. */
> +    method get_dirs = (Elf64_Off offset) string[]:
> +      {
> +        var retval = string[directories_count.value]();
> +        var _tmp_string = "";
> +        var off = directories_count'offset + directories_count'size;
> +        for (var i = 0; i < directories_count.value; i++)
> +          {
> +            _tmp_string = "";
> +            for (var j = 0; j < directory_entry_format_count; j++)
> +              {
> +                var ct = directory_entry_format[j].content_type.value;
> +                var fc = directory_entry_format[j].form_code.value;
> +                if ((ct == DW_LNCT_path) && (fc == DW_FORM_line_strp))
> +                  {
> +                    var strtab_off = offset<uint<32>,B> @ off;
> +                    _tmp_string = string @ offset + strtab_off;
> +                  }
> +                if (_tmp_string != "")
> +                    retval[i] = _tmp_string;
> +                else
> +                  raise Exception { code = 258, msg = format("Failed getting 
> source dir #%u8d\n", i) };
> +              }
> +          }
> +          return retval;
> +      }
> +
> +    /* Given an offset into the ELF file's .debug_line_str section, return
> +     * the source files as array of strings. */
> +    method get_files = (Elf64_Off offset) string[]:
> +      {
> +        // The upper limit for the source files count is 256.  This aligns to
> +        // elfutil's readelf.c where we have struct encpair enc[256];
> +        var retval = string[file_names_count.value]();
> +        var _tmp_off = file_names_count'offset + file_names_count'size;
> +        var _file_name = "";
> +        var _dir_index_unset = -1;
> +        var _dir_index = _dir_index_unset;
> +         for (var i = 0; i < file_names_count.value; i++)
> +           {
> +           _file_name = "";
> +           _dir_index = _dir_index_unset;
> +           for (var j = 0; j < file_name_entry_format_count; j++)
> +             {
> +               var ct = file_name_entry_format[j].content_type.value;
> +               var fc = file_name_entry_format[j].form_code.value;
> +               if ((ct == DW_LNCT_path) && (fc == DW_FORM_line_strp))
> +                 {
> +                   var _strtab_off = offset<uint<32>,B> @ _tmp_off;
> +                   _tmp_off += _strtab_off'size;
> +                   _file_name = string @ offset + _strtab_off;
> +                 }
> +               if ((ct == DW_LNCT_directory_index) && (fc == DW_FORM_udata))
> +                 {
> +                   var _tmp_dirindex = ULEB128 @ _tmp_off;
> +                   _tmp_off += _tmp_dirindex'size;
> +                   _dir_index = _tmp_dirindex.value;
> +                 }
> +               if ((ct == DW_LNCT_MD5) && (fc == DW_FORM_data16))
> +                 {
> +                   // This is how clang encodes the md5sum, skip it
> +                  _tmp_off += 16#B;
> +                 }
> +             }
> +             if ((_file_name != "") && (_dir_index != _dir_index_unset))
> +               {
> +                 var _tmp_dir = string[directories_count.value]();
> +                 _tmp_dir = get_dirs(offset);
> +                 retval[i] = _tmp_dir[_dir_index] + "/" + _file_name;
> +               }
> +             else
> +              raise Exception { code = 259, msg = format("Failed getting 
> source file #%u8d\n", i) };
> +           }
> +        return retval;
> +      }
> +    };

I would add a type Dwarf_Line_Hdr union type like this:

type Dwarf_Line_Hdr =
  union
  {
    Dwarf_Line_Hdr_V5 v5;
  };

> diff --git a/dwarf.pk b/dwarf.pk
> index 609c8f9..e0e14c1 100644
> --- a/dwarf.pk
> +++ b/dwarf.pk
> @@ -19,6 +19,7 @@
>  load "dwarf-common.pk";
>  load "dwarf-config.pk";
>  load "dwarf-abbrev.pk";
> +load "dwarf-line.pk";
>  load "dwarf-info.pk";
>  load "dwarf-expr.pk";
>  load "dwarf-frame.pk";



reply via email to

[Prev in Thread] Current Thread [Next in Thread]