1 /* DWARF index writing support for GDB.
3 Copyright (C) 1994-2022 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "dwarf2/index-write.h"
25 #include "cli/cli-decode.h"
26 #include "gdbsupport/byte-vector.h"
27 #include "gdbsupport/filestuff.h"
28 #include "gdbsupport/gdb_unlinker.h"
29 #include "gdbsupport/pathstuff.h"
30 #include "gdbsupport/scoped_fd.h"
31 #include "complaints.h"
32 #include "dwarf2/index-common.h"
34 #include "dwarf2/read.h"
35 #include "dwarf2/dwz.h"
36 #include "gdb/gdb-index.h"
43 #include <forward_list>
45 #include <unordered_map>
46 #include <unordered_set>
48 /* Ensure only legit values are used. */
49 #define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
51 gdb_assert ((unsigned int) (value) <= 1); \
52 GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
55 /* Ensure only legit values are used. */
56 #define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
58 gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
59 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
60 GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
63 /* Ensure we don't use more than the allotted number of bits for the CU. */
64 #define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
66 gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
67 GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
70 /* The "save gdb-index" command. */
72 /* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
76 file_write (FILE *file, const void *data, size_t size)
78 if (fwrite (data, 1, size, file) != size)
79 error (_("couldn't data write to file"));
82 /* Write the contents of VEC to FILE, with error checking. */
84 template<typename Elem, typename Alloc>
86 file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
89 file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
92 /* In-memory buffer to prepare data to be written later to a file. */
96 /* Copy ARRAY to the end of the buffer. */
97 void append_array (gdb::array_view<const gdb_byte> array)
99 std::copy (array.begin (), array.end (), grow (array.size ()));
102 /* Copy CSTR (a zero-terminated string) to the end of buffer. The
103 terminating zero is appended too. */
104 void append_cstr0 (const char *cstr)
106 const size_t size = strlen (cstr) + 1;
107 std::copy (cstr, cstr + size, grow (size));
110 /* Store INPUT as ULEB128 to the end of buffer. */
111 void append_unsigned_leb128 (ULONGEST input)
115 gdb_byte output = input & 0x7f;
119 m_vec.push_back (output);
125 /* Accept a host-format integer in VAL and append it to the buffer
126 as a target-format integer which is LEN bytes long. */
127 void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
129 ::store_unsigned_integer (grow (len), len, byte_order, val);
132 /* Copy VALUE to the end of the buffer, little-endian. */
133 void append_offset (offset_type value)
135 append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
138 /* Return the size of the buffer. */
141 return m_vec.size ();
144 /* Return true iff the buffer is empty. */
147 return m_vec.empty ();
150 /* Write the buffer to FILE. */
151 void file_write (FILE *file) const
153 ::file_write (file, m_vec);
157 /* Grow SIZE bytes at the end of the buffer. Returns a pointer to
158 the start of the new block. */
159 gdb_byte *grow (size_t size)
161 m_vec.resize (m_vec.size () + size);
162 return &*(m_vec.end () - size);
165 gdb::byte_vector m_vec;
168 /* An entry in the symbol table. */
169 struct symtab_index_entry
171 /* The name of the symbol. */
173 /* The offset of the name in the constant pool. */
174 offset_type index_offset;
175 /* A sorted vector of the indices of all the CUs that hold an object
177 std::vector<offset_type> cu_indices;
180 /* The symbol table. This is a power-of-2-sized hash table. */
188 offset_type n_elements = 0;
189 std::vector<symtab_index_entry> data;
191 /* Temporary storage for names. */
192 auto_obstack m_string_obstack;
195 /* Find a slot in SYMTAB for the symbol NAME. Returns a reference to
198 Function is used only during write_hash_table so no index format backward
199 compatibility is needed. */
201 static symtab_index_entry &
202 find_slot (struct mapped_symtab *symtab, const char *name)
204 offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
206 index = hash & (symtab->data.size () - 1);
207 step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
211 if (symtab->data[index].name == NULL
212 || strcmp (name, symtab->data[index].name) == 0)
213 return symtab->data[index];
214 index = (index + step) & (symtab->data.size () - 1);
218 /* Expand SYMTAB's hash table. */
221 hash_expand (struct mapped_symtab *symtab)
223 auto old_entries = std::move (symtab->data);
225 symtab->data.clear ();
226 symtab->data.resize (old_entries.size () * 2);
228 for (auto &it : old_entries)
231 auto &ref = find_slot (symtab, it.name);
232 ref = std::move (it);
236 /* Add an entry to SYMTAB. NAME is the name of the symbol.
237 CU_INDEX is the index of the CU in which the symbol appears.
238 IS_STATIC is one if the symbol is static, otherwise zero (global). */
241 add_index_entry (struct mapped_symtab *symtab, const char *name,
242 int is_static, gdb_index_symbol_kind kind,
243 offset_type cu_index)
245 offset_type cu_index_and_attrs;
247 ++symtab->n_elements;
248 if (4 * symtab->n_elements / 3 >= symtab->data.size ())
249 hash_expand (symtab);
251 symtab_index_entry &slot = find_slot (symtab, name);
252 if (slot.name == NULL)
255 /* index_offset is set later. */
258 cu_index_and_attrs = 0;
259 DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
260 DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
261 DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
263 /* We don't want to record an index value twice as we want to avoid the
265 We process all global symbols and then all static symbols
266 (which would allow us to avoid the duplication by only having to check
267 the last entry pushed), but a symbol could have multiple kinds in one CU.
268 To keep things simple we don't worry about the duplication here and
269 sort and uniquify the list after we've processed all symbols. */
270 slot.cu_indices.push_back (cu_index_and_attrs);
273 /* Sort and remove duplicates of all symbols' cu_indices lists. */
276 uniquify_cu_indices (struct mapped_symtab *symtab)
278 for (auto &entry : symtab->data)
280 if (entry.name != NULL && !entry.cu_indices.empty ())
282 auto &cu_indices = entry.cu_indices;
283 std::sort (cu_indices.begin (), cu_indices.end ());
284 auto from = std::unique (cu_indices.begin (), cu_indices.end ());
285 cu_indices.erase (from, cu_indices.end ());
290 /* A form of 'const char *' suitable for container keys. Only the
291 pointer is stored. The strings themselves are compared, not the
296 c_str_view (const char *cstr)
300 bool operator== (const c_str_view &other) const
302 return strcmp (m_cstr, other.m_cstr) == 0;
305 /* Return the underlying C string. Note, the returned string is
306 only a reference with lifetime of this object. */
307 const char *c_str () const
313 friend class c_str_view_hasher;
314 const char *const m_cstr;
317 /* A std::unordered_map::hasher for c_str_view that uses the right
318 hash function for strings in a mapped index. */
319 class c_str_view_hasher
322 size_t operator () (const c_str_view &x) const
324 return mapped_index_string_hash (INT_MAX, x.m_cstr);
328 /* A std::unordered_map::hasher for std::vector<>. */
333 size_t operator () (const std::vector<T> &key) const
335 return iterative_hash (key.data (),
336 sizeof (key.front ()) * key.size (), 0);
340 /* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
341 constant pool entries going into the data buffer CPOOL. */
344 write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
347 /* Elements are sorted vectors of the indices of all the CUs that
348 hold an object of this name. */
349 std::unordered_map<std::vector<offset_type>, offset_type,
350 vector_hasher<offset_type>>
353 /* We add all the index vectors to the constant pool first, to
354 ensure alignment is ok. */
355 for (symtab_index_entry &entry : symtab->data)
357 if (entry.name == NULL)
359 gdb_assert (entry.index_offset == 0);
361 /* Finding before inserting is faster than always trying to
362 insert, because inserting always allocates a node, does the
363 lookup, and then destroys the new node if another node
364 already had the same key. C++17 try_emplace will avoid
367 = symbol_hash_table.find (entry.cu_indices);
368 if (found != symbol_hash_table.end ())
370 entry.index_offset = found->second;
374 symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
375 entry.index_offset = cpool.size ();
376 cpool.append_offset (entry.cu_indices.size ());
377 for (const auto index : entry.cu_indices)
378 cpool.append_offset (index);
382 /* Now write out the hash table. */
383 std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
384 for (const auto &entry : symtab->data)
386 offset_type str_off, vec_off;
388 if (entry.name != NULL)
390 const auto insertpair = str_table.emplace (entry.name, cpool.size ());
391 if (insertpair.second)
392 cpool.append_cstr0 (entry.name);
393 str_off = insertpair.first->second;
394 vec_off = entry.index_offset;
398 /* While 0 is a valid constant pool index, it is not valid
399 to have 0 for both offsets. */
404 output.append_offset (str_off);
405 output.append_offset (vec_off);
409 typedef std::unordered_map<dwarf2_per_cu_data *, unsigned int> cu_index_map;
411 /* Helper struct for building the address table. */
412 struct addrmap_index_data
414 addrmap_index_data (data_buf &addr_vec_, cu_index_map &cu_index_htab_)
415 : addr_vec (addr_vec_),
416 cu_index_htab (cu_index_htab_)
420 cu_index_map &cu_index_htab;
422 int operator() (CORE_ADDR start_addr, void *obj);
424 /* True if the previous_* fields are valid.
425 We can't write an entry until we see the next entry (since it is only then
426 that we know the end of the entry). */
427 bool previous_valid = false;
428 /* Index of the CU in the table of all CUs in the index file. */
429 unsigned int previous_cu_index = 0;
430 /* Start address of the CU. */
431 CORE_ADDR previous_cu_start = 0;
434 /* Write an address entry to ADDR_VEC. */
437 add_address_entry (data_buf &addr_vec,
438 CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
440 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
441 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
442 addr_vec.append_offset (cu_index);
445 /* Worker function for traversing an addrmap to build the address table. */
448 addrmap_index_data::operator() (CORE_ADDR start_addr, void *obj)
450 dwarf2_per_cu_data *per_cu = (dwarf2_per_cu_data *) obj;
453 add_address_entry (addr_vec,
454 previous_cu_start, start_addr,
457 previous_cu_start = start_addr;
460 const auto it = cu_index_htab.find (per_cu);
461 gdb_assert (it != cu_index_htab.cend ());
462 previous_cu_index = it->second;
463 previous_valid = true;
466 previous_valid = false;
471 /* Write PER_BFD's address map to ADDR_VEC.
472 CU_INDEX_HTAB is used to map addrmap entries to their CU indices
473 in the index file. */
476 write_address_map (struct addrmap *addrmap, data_buf &addr_vec,
477 cu_index_map &cu_index_htab)
479 struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
481 addrmap_foreach (addrmap, addrmap_index_data);
483 /* It's highly unlikely the last entry (end address = 0xff...ff)
484 is valid, but we should still handle it.
485 The end address is recorded as the start of the next region, but that
486 doesn't work here. To cope we pass 0xff...ff, this is a rare situation
488 if (addrmap_index_data.previous_valid)
489 add_address_entry (addr_vec,
490 addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
491 addrmap_index_data.previous_cu_index);
494 /* DWARF-5 .debug_names builder. */
498 debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
499 bfd_endian dwarf5_byte_order)
500 : m_dwarf5_byte_order (dwarf5_byte_order),
501 m_dwarf32 (dwarf5_byte_order),
502 m_dwarf64 (dwarf5_byte_order),
504 ? static_cast<dwarf &> (m_dwarf64)
505 : static_cast<dwarf &> (m_dwarf32)),
506 m_name_table_string_offs (m_dwarf.name_table_string_offs),
507 m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
508 m_debugstrlookup (per_objfile)
511 int dwarf5_offset_size () const
513 const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
514 return dwarf5_is_dwarf64 ? 8 : 4;
517 /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */
518 enum class unit_kind { cu, tu };
520 /* Insert one symbol. */
521 void insert (int dwarf_tag, const char *name, int cu_index, bool is_static,
522 unit_kind kind, enum language lang)
524 if (lang == language_ada)
526 /* We want to ensure that the Ada main function's name appears
527 verbatim in the index. However, this name will be of the
528 form "_ada_mumble", and will be rewritten by ada_decode.
529 So, recognize it specially here and add it to the index by
531 if (strcmp (main_name (), name) == 0)
533 const auto insertpair
534 = m_name_to_value_set.emplace (c_str_view (name),
535 std::set<symbol_value> ());
536 std::set<symbol_value> &value_set = insertpair.first->second;
537 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
541 /* In order for the index to work when read back into gdb, it
542 has to supply a funny form of the name: it should be the
543 encoded name, with any suffixes stripped. Using the
544 ordinary encoded name will not work properly with the
545 searching logic in find_name_components_bounds; nor will
546 using the decoded name. Furthermore, an Ada "verbatim"
547 name (of the form "<MumBle>") must be entered without the
548 angle brackets. Note that the current index is unusual,
549 see PR symtab/24820 for details. */
550 std::string decoded = ada_decode (name);
551 if (decoded[0] == '<')
552 name = (char *) obstack_copy0 (&m_string_obstack,
553 decoded.c_str () + 1,
554 decoded.length () - 2);
556 name = obstack_strdup (&m_string_obstack,
557 ada_encode (decoded.c_str ()));
560 const auto insertpair
561 = m_name_to_value_set.emplace (c_str_view (name),
562 std::set<symbol_value> ());
563 std::set<symbol_value> &value_set = insertpair.first->second;
564 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
567 void insert (const cooked_index_entry *entry)
569 const auto it = m_cu_index_htab.find (entry->per_cu);
570 gdb_assert (it != m_cu_index_htab.cend ());
571 const char *name = entry->full_name (&m_string_obstack);
572 insert (entry->tag, name, it->second, (entry->flags & IS_STATIC) != 0,
573 entry->per_cu->is_debug_types ? unit_kind::tu : unit_kind::cu,
574 entry->per_cu->lang);
577 /* Build all the tables. All symbols must be already inserted.
578 This function does not call file_write, caller has to do it
582 /* Verify the build method has not be called twice. */
583 gdb_assert (m_abbrev_table.empty ());
584 const size_t name_count = m_name_to_value_set.size ();
585 m_bucket_table.resize
586 (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
587 m_hash_table.reserve (name_count);
588 m_name_table_string_offs.reserve (name_count);
589 m_name_table_entry_offs.reserve (name_count);
591 /* Map each hash of symbol to its name and value. */
595 decltype (m_name_to_value_set)::const_iterator it;
597 std::vector<std::forward_list<hash_it_pair>> bucket_hash;
598 bucket_hash.resize (m_bucket_table.size ());
599 for (decltype (m_name_to_value_set)::const_iterator it
600 = m_name_to_value_set.cbegin ();
601 it != m_name_to_value_set.cend ();
604 const char *const name = it->first.c_str ();
605 const uint32_t hash = dwarf5_djb_hash (name);
606 hash_it_pair hashitpair;
607 hashitpair.hash = hash;
609 auto &slot = bucket_hash[hash % bucket_hash.size()];
610 slot.push_front (std::move (hashitpair));
612 for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
614 const std::forward_list<hash_it_pair> &hashitlist
615 = bucket_hash[bucket_ix];
616 if (hashitlist.empty ())
618 uint32_t &bucket_slot = m_bucket_table[bucket_ix];
619 /* The hashes array is indexed starting at 1. */
620 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
621 sizeof (bucket_slot), m_dwarf5_byte_order,
622 m_hash_table.size () + 1);
623 for (const hash_it_pair &hashitpair : hashitlist)
625 m_hash_table.push_back (0);
626 store_unsigned_integer (reinterpret_cast<gdb_byte *>
627 (&m_hash_table.back ()),
628 sizeof (m_hash_table.back ()),
629 m_dwarf5_byte_order, hashitpair.hash);
630 const c_str_view &name = hashitpair.it->first;
631 const std::set<symbol_value> &value_set = hashitpair.it->second;
632 m_name_table_string_offs.push_back_reorder
633 (m_debugstrlookup.lookup (name.c_str ()));
634 m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
635 gdb_assert (!value_set.empty ());
636 for (const symbol_value &value : value_set)
638 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
644 m_abbrev_table.append_unsigned_leb128 (idx);
645 m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
646 m_abbrev_table.append_unsigned_leb128
647 (value.kind == unit_kind::cu ? DW_IDX_compile_unit
649 m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
650 m_abbrev_table.append_unsigned_leb128 (value.is_static
651 ? DW_IDX_GNU_internal
652 : DW_IDX_GNU_external);
653 m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
655 /* Terminate attributes list. */
656 m_abbrev_table.append_unsigned_leb128 (0);
657 m_abbrev_table.append_unsigned_leb128 (0);
660 m_entry_pool.append_unsigned_leb128 (idx);
661 m_entry_pool.append_unsigned_leb128 (value.cu_index);
664 /* Terminate the list of CUs. */
665 m_entry_pool.append_unsigned_leb128 (0);
668 gdb_assert (m_hash_table.size () == name_count);
670 /* Terminate tags list. */
671 m_abbrev_table.append_unsigned_leb128 (0);
674 /* Return .debug_names bucket count. This must be called only after
675 calling the build method. */
676 uint32_t bucket_count () const
678 /* Verify the build method has been already called. */
679 gdb_assert (!m_abbrev_table.empty ());
680 const uint32_t retval = m_bucket_table.size ();
682 /* Check for overflow. */
683 gdb_assert (retval == m_bucket_table.size ());
687 /* Return .debug_names names count. This must be called only after
688 calling the build method. */
689 uint32_t name_count () const
691 /* Verify the build method has been already called. */
692 gdb_assert (!m_abbrev_table.empty ());
693 const uint32_t retval = m_hash_table.size ();
695 /* Check for overflow. */
696 gdb_assert (retval == m_hash_table.size ());
700 /* Return number of bytes of .debug_names abbreviation table. This
701 must be called only after calling the build method. */
702 uint32_t abbrev_table_bytes () const
704 gdb_assert (!m_abbrev_table.empty ());
705 return m_abbrev_table.size ();
708 /* Return number of bytes the .debug_names section will have. This
709 must be called only after calling the build method. */
710 size_t bytes () const
712 /* Verify the build method has been already called. */
713 gdb_assert (!m_abbrev_table.empty ());
714 size_t expected_bytes = 0;
715 expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
716 expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
717 expected_bytes += m_name_table_string_offs.bytes ();
718 expected_bytes += m_name_table_entry_offs.bytes ();
719 expected_bytes += m_abbrev_table.size ();
720 expected_bytes += m_entry_pool.size ();
721 return expected_bytes;
724 /* Write .debug_names to FILE_NAMES and .debug_str addition to
725 FILE_STR. This must be called only after calling the build
727 void file_write (FILE *file_names, FILE *file_str) const
729 /* Verify the build method has been already called. */
730 gdb_assert (!m_abbrev_table.empty ());
731 ::file_write (file_names, m_bucket_table);
732 ::file_write (file_names, m_hash_table);
733 m_name_table_string_offs.file_write (file_names);
734 m_name_table_entry_offs.file_write (file_names);
735 m_abbrev_table.file_write (file_names);
736 m_entry_pool.file_write (file_names);
737 m_debugstrlookup.file_write (file_str);
740 void add_cu (dwarf2_per_cu_data *per_cu, offset_type index)
742 m_cu_index_htab.emplace (per_cu, index);
747 /* Storage for symbol names mapping them to their .debug_str section
749 class debug_str_lookup
753 /* Object constructor to be called for current DWARF2_PER_OBJFILE.
754 All .debug_str section strings are automatically stored. */
755 debug_str_lookup (dwarf2_per_objfile *per_objfile)
756 : m_abfd (per_objfile->objfile->obfd),
757 m_per_objfile (per_objfile)
759 per_objfile->per_bfd->str.read (per_objfile->objfile);
760 if (per_objfile->per_bfd->str.buffer == NULL)
762 for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
763 data < (per_objfile->per_bfd->str.buffer
764 + per_objfile->per_bfd->str.size);)
766 const char *const s = reinterpret_cast<const char *> (data);
767 const auto insertpair
768 = m_str_table.emplace (c_str_view (s),
769 data - per_objfile->per_bfd->str.buffer);
770 if (!insertpair.second)
771 complaint (_("Duplicate string \"%s\" in "
772 ".debug_str section [in module %s]"),
773 s, bfd_get_filename (m_abfd));
774 data += strlen (s) + 1;
778 /* Return offset of symbol name S in the .debug_str section. Add
779 such symbol to the section's end if it does not exist there
781 size_t lookup (const char *s)
783 const auto it = m_str_table.find (c_str_view (s));
784 if (it != m_str_table.end ())
786 const size_t offset = (m_per_objfile->per_bfd->str.size
787 + m_str_add_buf.size ());
788 m_str_table.emplace (c_str_view (s), offset);
789 m_str_add_buf.append_cstr0 (s);
793 /* Append the end of the .debug_str section to FILE. */
794 void file_write (FILE *file) const
796 m_str_add_buf.file_write (file);
800 std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
802 dwarf2_per_objfile *m_per_objfile;
804 /* Data to add at the end of .debug_str for new needed symbol names. */
805 data_buf m_str_add_buf;
808 /* Container to map used DWARF tags to their .debug_names abbreviation
813 index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
814 : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
819 operator== (const index_key &other) const
821 return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
822 && kind == other.kind);
826 const bool is_static;
827 const unit_kind kind;
830 /* Provide std::unordered_map::hasher for index_key. */
831 class index_key_hasher
835 operator () (const index_key &key) const
837 return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
841 /* Parameters of one symbol entry. */
845 const int dwarf_tag, cu_index;
846 const bool is_static;
847 const unit_kind kind;
849 symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
851 : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
856 operator< (const symbol_value &other) const
876 /* Abstract base class to unify DWARF-32 and DWARF-64 name table
881 const bfd_endian dwarf5_byte_order;
883 explicit offset_vec (bfd_endian dwarf5_byte_order_)
884 : dwarf5_byte_order (dwarf5_byte_order_)
887 /* Call std::vector::reserve for NELEM elements. */
888 virtual void reserve (size_t nelem) = 0;
890 /* Call std::vector::push_back with store_unsigned_integer byte
891 reordering for ELEM. */
892 virtual void push_back_reorder (size_t elem) = 0;
894 /* Return expected output size in bytes. */
895 virtual size_t bytes () const = 0;
897 /* Write name table to FILE. */
898 virtual void file_write (FILE *file) const = 0;
901 /* Template to unify DWARF-32 and DWARF-64 output. */
902 template<typename OffsetSize>
903 class offset_vec_tmpl : public offset_vec
906 explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
907 : offset_vec (dwarf5_byte_order_)
910 /* Implement offset_vec::reserve. */
911 void reserve (size_t nelem) override
913 m_vec.reserve (nelem);
916 /* Implement offset_vec::push_back_reorder. */
917 void push_back_reorder (size_t elem) override
919 m_vec.push_back (elem);
920 /* Check for overflow. */
921 gdb_assert (m_vec.back () == elem);
922 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
923 sizeof (m_vec.back ()), dwarf5_byte_order, elem);
926 /* Implement offset_vec::bytes. */
927 size_t bytes () const override
929 return m_vec.size () * sizeof (m_vec[0]);
932 /* Implement offset_vec::file_write. */
933 void file_write (FILE *file) const override
935 ::file_write (file, m_vec);
939 std::vector<OffsetSize> m_vec;
942 /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
943 respecting name table width. */
947 offset_vec &name_table_string_offs, &name_table_entry_offs;
949 dwarf (offset_vec &name_table_string_offs_,
950 offset_vec &name_table_entry_offs_)
951 : name_table_string_offs (name_table_string_offs_),
952 name_table_entry_offs (name_table_entry_offs_)
957 /* Template to unify DWARF-32 and DWARF-64 .debug_names output
958 respecting name table width. */
959 template<typename OffsetSize>
960 class dwarf_tmpl : public dwarf
963 explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
964 : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
965 m_name_table_string_offs (dwarf5_byte_order_),
966 m_name_table_entry_offs (dwarf5_byte_order_)
970 offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
971 offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
974 /* Store value of each symbol. */
975 std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
978 /* Tables of DWARF-5 .debug_names. They are in object file byte
980 std::vector<uint32_t> m_bucket_table;
981 std::vector<uint32_t> m_hash_table;
983 const bfd_endian m_dwarf5_byte_order;
984 dwarf_tmpl<uint32_t> m_dwarf32;
985 dwarf_tmpl<uint64_t> m_dwarf64;
987 offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
988 debug_str_lookup m_debugstrlookup;
990 /* Map each used .debug_names abbreviation tag parameter to its
992 std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
994 /* Next unused .debug_names abbreviation tag for
995 m_indexkey_to_idx. */
998 /* .debug_names abbreviation table. */
999 data_buf m_abbrev_table;
1001 /* .debug_names entry pool. */
1002 data_buf m_entry_pool;
1004 /* Temporary storage for Ada names. */
1005 auto_obstack m_string_obstack;
1007 cu_index_map m_cu_index_htab;
1010 /* Return iff any of the needed offsets does not fit into 32-bit
1011 .debug_names section. */
1014 check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
1016 for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
1018 if (to_underlying (per_cu->sect_off)
1019 >= (static_cast<uint64_t> (1) << 32))
1025 /* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek
1026 position is at the end of the file. */
1029 assert_file_size (FILE *file, size_t expected_size)
1031 const auto file_size = ftell (file);
1032 if (file_size == -1)
1033 perror_with_name (("ftell"));
1034 gdb_assert (file_size == expected_size);
1037 /* Write a gdb index file to OUT_FILE from all the sections passed as
1041 write_gdbindex_1 (FILE *out_file,
1042 const data_buf &cu_list,
1043 const data_buf &types_cu_list,
1044 const data_buf &addr_vec,
1045 const data_buf &symtab_vec,
1046 const data_buf &constant_pool)
1049 const offset_type size_of_header = 6 * sizeof (offset_type);
1050 offset_type total_len = size_of_header;
1052 /* The version number. */
1053 contents.append_offset (8);
1055 /* The offset of the CU list from the start of the file. */
1056 contents.append_offset (total_len);
1057 total_len += cu_list.size ();
1059 /* The offset of the types CU list from the start of the file. */
1060 contents.append_offset (total_len);
1061 total_len += types_cu_list.size ();
1063 /* The offset of the address table from the start of the file. */
1064 contents.append_offset (total_len);
1065 total_len += addr_vec.size ();
1067 /* The offset of the symbol table from the start of the file. */
1068 contents.append_offset (total_len);
1069 total_len += symtab_vec.size ();
1071 /* The offset of the constant pool from the start of the file. */
1072 contents.append_offset (total_len);
1073 total_len += constant_pool.size ();
1075 gdb_assert (contents.size () == size_of_header);
1077 contents.file_write (out_file);
1078 cu_list.file_write (out_file);
1079 types_cu_list.file_write (out_file);
1080 addr_vec.file_write (out_file);
1081 symtab_vec.file_write (out_file);
1082 constant_pool.file_write (out_file);
1084 assert_file_size (out_file, total_len);
1087 /* Write the contents of the internal "cooked" index. */
1090 write_cooked_index (cooked_index_vector *table,
1091 const cu_index_map &cu_index_htab,
1092 struct mapped_symtab *symtab)
1094 for (const cooked_index_entry *entry : table->all_entries ())
1096 const auto it = cu_index_htab.find (entry->per_cu);
1097 gdb_assert (it != cu_index_htab.cend ());
1099 const char *name = entry->full_name (&symtab->m_string_obstack);
1101 gdb_index_symbol_kind kind;
1102 if (entry->tag == DW_TAG_subprogram)
1103 kind = GDB_INDEX_SYMBOL_KIND_FUNCTION;
1104 else if (entry->tag == DW_TAG_variable
1105 || entry->tag == DW_TAG_constant
1106 || entry->tag == DW_TAG_enumerator)
1107 kind = GDB_INDEX_SYMBOL_KIND_VARIABLE;
1108 else if (entry->tag == DW_TAG_module
1109 || entry->tag == DW_TAG_common_block)
1110 kind = GDB_INDEX_SYMBOL_KIND_OTHER;
1112 kind = GDB_INDEX_SYMBOL_KIND_TYPE;
1114 add_index_entry (symtab, name, (entry->flags & IS_STATIC) != 0,
1119 /* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1120 If OBJFILE has an associated dwz file, write contents of a .gdb_index
1121 section for that dwz file into DWZ_OUT_FILE. If OBJFILE does not have an
1122 associated dwz file, DWZ_OUT_FILE must be NULL. */
1125 write_gdbindex (dwarf2_per_objfile *per_objfile, FILE *out_file,
1128 mapped_symtab symtab;
1129 data_buf objfile_cu_list;
1130 data_buf dwz_cu_list;
1132 /* While we're scanning CU's create a table that maps a dwarf2_per_cu_data
1133 (which is what addrmap records) to its index (which is what is recorded
1134 in the index file). This will later be needed to write the address
1136 cu_index_map cu_index_htab;
1137 cu_index_htab.reserve (per_objfile->per_bfd->all_comp_units.size ());
1139 /* Store out the .debug_type CUs, if any. */
1140 data_buf types_cu_list;
1142 /* The CU list is already sorted, so we don't need to do additional
1143 work here. Also, the debug_types entries do not appear in
1144 all_comp_units, but only in their own hash table. */
1147 int types_counter = 0;
1148 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1150 dwarf2_per_cu_data *per_cu
1151 = per_objfile->per_bfd->all_comp_units[i].get ();
1153 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1155 const auto insertpair = cu_index_htab.emplace (per_cu, this_counter);
1156 gdb_assert (insertpair.second);
1158 /* The all_comp_units list contains CUs read from the objfile as well as
1159 from the eventual dwz file. We need to place the entry in the
1160 corresponding index. */
1161 data_buf &cu_list = (per_cu->is_debug_types
1163 : per_cu->is_dwz ? dwz_cu_list : objfile_cu_list);
1164 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1165 to_underlying (per_cu->sect_off));
1166 if (per_cu->is_debug_types)
1168 signatured_type *sig_type = (signatured_type *) per_cu;
1169 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1170 to_underlying (sig_type->type_offset_in_tu));
1171 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1172 sig_type->signature);
1175 cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1180 cooked_index_vector *table
1181 = (static_cast<cooked_index_vector *>
1182 (per_objfile->per_bfd->index_table.get ()));
1183 write_cooked_index (table, cu_index_htab, &symtab);
1185 /* Dump the address map. */
1187 for (auto map : table->get_addrmaps ())
1188 write_address_map (map, addr_vec, cu_index_htab);
1190 /* Now that we've processed all symbols we can shrink their cu_indices
1192 uniquify_cu_indices (&symtab);
1194 data_buf symtab_vec, constant_pool;
1195 if (symtab.n_elements == 0)
1196 symtab.data.resize (0);
1198 write_hash_table (&symtab, symtab_vec, constant_pool);
1200 write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1201 symtab_vec, constant_pool);
1203 if (dwz_out_file != NULL)
1204 write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1206 gdb_assert (dwz_cu_list.empty ());
1209 /* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */
1210 static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1212 /* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1213 needed addition to .debug_str section to OUT_FILE_STR. Return how
1214 many bytes were expected to be written into OUT_FILE. */
1217 write_debug_names (dwarf2_per_objfile *per_objfile,
1218 FILE *out_file, FILE *out_file_str)
1220 const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1221 struct objfile *objfile = per_objfile->objfile;
1222 const enum bfd_endian dwarf5_byte_order
1223 = gdbarch_byte_order (objfile->arch ());
1225 /* The CU list is already sorted, so we don't need to do additional
1226 work here. Also, the debug_types entries do not appear in
1227 all_comp_units, but only in their own hash table. */
1229 data_buf types_cu_list;
1230 debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
1232 int types_counter = 0;
1233 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1235 dwarf2_per_cu_data *per_cu
1236 = per_objfile->per_bfd->all_comp_units[i].get ();
1238 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1239 data_buf &this_list = per_cu->is_debug_types ? types_cu_list : cu_list;
1241 nametable.add_cu (per_cu, this_counter);
1242 this_list.append_uint (nametable.dwarf5_offset_size (),
1244 to_underlying (per_cu->sect_off));
1248 /* Verify that all units are represented. */
1249 gdb_assert (counter == (per_objfile->per_bfd->all_comp_units.size ()
1250 - per_objfile->per_bfd->tu_stats.nr_tus));
1251 gdb_assert (types_counter == per_objfile->per_bfd->tu_stats.nr_tus);
1253 cooked_index_vector *table
1254 = (static_cast<cooked_index_vector *>
1255 (per_objfile->per_bfd->index_table.get ()));
1256 for (const cooked_index_entry *entry : table->all_entries ())
1257 nametable.insert (entry);
1261 /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */
1263 const offset_type bytes_of_header
1264 = ((dwarf5_is_dwarf64 ? 12 : 4)
1266 + sizeof (dwarf5_gdb_augmentation));
1267 size_t expected_bytes = 0;
1268 expected_bytes += bytes_of_header;
1269 expected_bytes += cu_list.size ();
1270 expected_bytes += types_cu_list.size ();
1271 expected_bytes += nametable.bytes ();
1274 if (!dwarf5_is_dwarf64)
1276 const uint64_t size64 = expected_bytes - 4;
1277 gdb_assert (size64 < 0xfffffff0);
1278 header.append_uint (4, dwarf5_byte_order, size64);
1282 header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1283 header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1286 /* The version number. */
1287 header.append_uint (2, dwarf5_byte_order, 5);
1290 header.append_uint (2, dwarf5_byte_order, 0);
1292 /* comp_unit_count - The number of CUs in the CU list. */
1293 header.append_uint (4, dwarf5_byte_order, counter);
1295 /* local_type_unit_count - The number of TUs in the local TU
1297 header.append_uint (4, dwarf5_byte_order, types_counter);
1299 /* foreign_type_unit_count - The number of TUs in the foreign TU
1301 header.append_uint (4, dwarf5_byte_order, 0);
1303 /* bucket_count - The number of hash buckets in the hash lookup
1305 header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1307 /* name_count - The number of unique names in the index. */
1308 header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1310 /* abbrev_table_size - The size in bytes of the abbreviations
1312 header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1314 /* augmentation_string_size - The size in bytes of the augmentation
1315 string. This value is rounded up to a multiple of 4. */
1316 static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1317 header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1318 header.append_array (dwarf5_gdb_augmentation);
1320 gdb_assert (header.size () == bytes_of_header);
1322 header.file_write (out_file);
1323 cu_list.file_write (out_file);
1324 types_cu_list.file_write (out_file);
1325 nametable.file_write (out_file, out_file_str);
1327 assert_file_size (out_file, expected_bytes);
1330 /* This represents an index file being written (work-in-progress).
1332 The data is initially written to a temporary file. When the finalize method
1333 is called, the file is closed and moved to its final location.
1335 On failure (if this object is being destroyed with having called finalize),
1336 the temporary file is closed and deleted. */
1338 struct index_wip_file
1340 index_wip_file (const char *dir, const char *basename,
1343 filename = (std::string (dir) + SLASH_STRING + basename
1346 filename_temp = make_temp_filename (filename);
1348 scoped_fd out_file_fd = gdb_mkostemp_cloexec (filename_temp.data (),
1350 if (out_file_fd.get () == -1)
1351 perror_with_name (("mkstemp"));
1353 out_file = out_file_fd.to_file ("wb");
1355 if (out_file == nullptr)
1356 error (_("Can't open `%s' for writing"), filename_temp.data ());
1358 unlink_file.emplace (filename_temp.data ());
1363 /* We want to keep the file. */
1364 unlink_file->keep ();
1366 /* Close and move the str file in place. */
1367 unlink_file.reset ();
1368 if (rename (filename_temp.data (), filename.c_str ()) != 0)
1369 perror_with_name (("rename"));
1372 std::string filename;
1373 gdb::char_vector filename_temp;
1375 /* Order matters here; we want FILE to be closed before
1376 FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1377 delete a file that is still open. So, we wrap the unlinker in an
1378 optional and emplace it once we know the file name. */
1379 gdb::optional<gdb::unlinker> unlink_file;
1381 gdb_file_up out_file;
1384 /* See dwarf-index-write.h. */
1387 write_dwarf_index (dwarf2_per_objfile *per_objfile, const char *dir,
1388 const char *basename, const char *dwz_basename,
1389 dw_index_kind index_kind)
1391 struct objfile *objfile = per_objfile->objfile;
1393 cooked_index_vector *table
1394 = (static_cast<cooked_index_vector *>
1395 (per_objfile->per_bfd->index_table.get ()));
1396 if (table == nullptr)
1398 if (per_objfile->per_bfd->index_table != nullptr)
1399 error (_("Cannot use an index to create the index"));
1400 error (_("No debugging symbols"));
1403 if (per_objfile->per_bfd->types.size () > 1)
1404 error (_("Cannot make an index when the file has multiple .debug_types sections"));
1407 if (stat (objfile_name (objfile), &st) < 0)
1408 perror_with_name (objfile_name (objfile));
1410 const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1411 ? INDEX5_SUFFIX : INDEX4_SUFFIX);
1413 index_wip_file objfile_index_wip (dir, basename, index_suffix);
1414 gdb::optional<index_wip_file> dwz_index_wip;
1416 if (dwz_basename != NULL)
1417 dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
1419 if (index_kind == dw_index_kind::DEBUG_NAMES)
1421 index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1423 write_debug_names (per_objfile, objfile_index_wip.out_file.get (),
1424 str_wip_file.out_file.get ());
1426 str_wip_file.finalize ();
1429 write_gdbindex (per_objfile, objfile_index_wip.out_file.get (),
1430 (dwz_index_wip.has_value ()
1431 ? dwz_index_wip->out_file.get () : NULL));
1433 objfile_index_wip.finalize ();
1435 if (dwz_index_wip.has_value ())
1436 dwz_index_wip->finalize ();
1439 /* Implementation of the `save gdb-index' command.
1441 Note that the .gdb_index file format used by this command is
1442 documented in the GDB manual. Any changes here must be documented
1446 save_gdb_index_command (const char *arg, int from_tty)
1448 const char dwarf5space[] = "-dwarf-5 ";
1449 dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1454 arg = skip_spaces (arg);
1455 if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1457 index_kind = dw_index_kind::DEBUG_NAMES;
1458 arg += strlen (dwarf5space);
1459 arg = skip_spaces (arg);
1463 error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1465 for (objfile *objfile : current_program_space->objfiles ())
1469 /* If the objfile does not correspond to an actual file, skip it. */
1470 if (stat (objfile_name (objfile), &st) < 0)
1473 dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
1475 if (per_objfile != NULL)
1479 const char *basename = lbasename (objfile_name (objfile));
1480 const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
1481 const char *dwz_basename = NULL;
1484 dwz_basename = lbasename (dwz->filename ());
1486 write_dwarf_index (per_objfile, arg, basename, dwz_basename,
1489 catch (const gdb_exception_error &except)
1491 exception_fprintf (gdb_stderr, except,
1492 _("Error while writing index for `%s': "),
1493 objfile_name (objfile));
1500 void _initialize_dwarf_index_write ();
1502 _initialize_dwarf_index_write ()
1504 cmd_list_element *c = add_cmd ("gdb-index", class_files,
1505 save_gdb_index_command, _("\
1506 Save a gdb-index file.\n\
1507 Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1509 No options create one file with .gdb-index extension for pre-DWARF-5\n\
1510 compatible .gdb_index section. With -dwarf-5 creates two files with\n\
1511 extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1513 set_cmd_completer (c, filename_completer);