1 /* DWARF index writing support for GDB.
3 Copyright (C) 1994-2022 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "dwarf2/index-write.h"
25 #include "cli/cli-decode.h"
26 #include "gdbsupport/byte-vector.h"
27 #include "gdbsupport/filestuff.h"
28 #include "gdbsupport/gdb_unlinker.h"
29 #include "gdbsupport/pathstuff.h"
30 #include "gdbsupport/scoped_fd.h"
31 #include "complaints.h"
32 #include "dwarf2/index-common.h"
34 #include "dwarf2/read.h"
35 #include "dwarf2/dwz.h"
36 #include "gdb/gdb-index.h"
40 #include "dwarf2/tag.h"
44 #include <forward_list>
46 #include <unordered_map>
47 #include <unordered_set>
49 /* Ensure only legit values are used. */
50 #define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
52 gdb_assert ((unsigned int) (value) <= 1); \
53 GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
56 /* Ensure only legit values are used. */
57 #define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
59 gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
60 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
61 GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
64 /* Ensure we don't use more than the allotted number of bits for the CU. */
65 #define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
67 gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
68 GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
71 /* The "save gdb-index" command. */
73 /* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
77 file_write (FILE *file, const void *data, size_t size)
79 if (fwrite (data, 1, size, file) != size)
80 error (_("couldn't data write to file"));
83 /* Write the contents of VEC to FILE, with error checking. */
85 template<typename Elem, typename Alloc>
87 file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
90 file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
93 /* In-memory buffer to prepare data to be written later to a file. */
97 /* Copy ARRAY to the end of the buffer. */
98 void append_array (gdb::array_view<const gdb_byte> array)
100 std::copy (array.begin (), array.end (), grow (array.size ()));
103 /* Copy CSTR (a zero-terminated string) to the end of buffer. The
104 terminating zero is appended too. */
105 void append_cstr0 (const char *cstr)
107 const size_t size = strlen (cstr) + 1;
108 std::copy (cstr, cstr + size, grow (size));
111 /* Store INPUT as ULEB128 to the end of buffer. */
112 void append_unsigned_leb128 (ULONGEST input)
116 gdb_byte output = input & 0x7f;
120 m_vec.push_back (output);
126 /* Accept a host-format integer in VAL and append it to the buffer
127 as a target-format integer which is LEN bytes long. */
128 void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
130 ::store_unsigned_integer (grow (len), len, byte_order, val);
133 /* Copy VALUE to the end of the buffer, little-endian. */
134 void append_offset (offset_type value)
136 append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
139 /* Return the size of the buffer. */
142 return m_vec.size ();
145 /* Return true iff the buffer is empty. */
148 return m_vec.empty ();
151 /* Write the buffer to FILE. */
152 void file_write (FILE *file) const
154 ::file_write (file, m_vec);
158 /* Grow SIZE bytes at the end of the buffer. Returns a pointer to
159 the start of the new block. */
160 gdb_byte *grow (size_t size)
162 m_vec.resize (m_vec.size () + size);
163 return &*(m_vec.end () - size);
166 gdb::byte_vector m_vec;
169 /* An entry in the symbol table. */
170 struct symtab_index_entry
172 /* The name of the symbol. */
174 /* The offset of the name in the constant pool. */
175 offset_type index_offset;
176 /* A sorted vector of the indices of all the CUs that hold an object
178 std::vector<offset_type> cu_indices;
181 /* The symbol table. This is a power-of-2-sized hash table. */
189 offset_type n_elements = 0;
190 std::vector<symtab_index_entry> data;
192 /* Temporary storage for names. */
193 auto_obstack m_string_obstack;
196 /* Find a slot in SYMTAB for the symbol NAME. Returns a reference to
199 Function is used only during write_hash_table so no index format backward
200 compatibility is needed. */
202 static symtab_index_entry &
203 find_slot (struct mapped_symtab *symtab, const char *name)
205 offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
207 index = hash & (symtab->data.size () - 1);
208 step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
212 if (symtab->data[index].name == NULL
213 || strcmp (name, symtab->data[index].name) == 0)
214 return symtab->data[index];
215 index = (index + step) & (symtab->data.size () - 1);
219 /* Expand SYMTAB's hash table. */
222 hash_expand (struct mapped_symtab *symtab)
224 auto old_entries = std::move (symtab->data);
226 symtab->data.clear ();
227 symtab->data.resize (old_entries.size () * 2);
229 for (auto &it : old_entries)
232 auto &ref = find_slot (symtab, it.name);
233 ref = std::move (it);
237 /* Add an entry to SYMTAB. NAME is the name of the symbol.
238 CU_INDEX is the index of the CU in which the symbol appears.
239 IS_STATIC is one if the symbol is static, otherwise zero (global). */
242 add_index_entry (struct mapped_symtab *symtab, const char *name,
243 int is_static, gdb_index_symbol_kind kind,
244 offset_type cu_index)
246 offset_type cu_index_and_attrs;
248 ++symtab->n_elements;
249 if (4 * symtab->n_elements / 3 >= symtab->data.size ())
250 hash_expand (symtab);
252 symtab_index_entry &slot = find_slot (symtab, name);
253 if (slot.name == NULL)
256 /* index_offset is set later. */
259 cu_index_and_attrs = 0;
260 DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
261 DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
262 DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
264 /* We don't want to record an index value twice as we want to avoid the
266 We process all global symbols and then all static symbols
267 (which would allow us to avoid the duplication by only having to check
268 the last entry pushed), but a symbol could have multiple kinds in one CU.
269 To keep things simple we don't worry about the duplication here and
270 sort and uniquify the list after we've processed all symbols. */
271 slot.cu_indices.push_back (cu_index_and_attrs);
274 /* Sort and remove duplicates of all symbols' cu_indices lists. */
277 uniquify_cu_indices (struct mapped_symtab *symtab)
279 for (auto &entry : symtab->data)
281 if (entry.name != NULL && !entry.cu_indices.empty ())
283 auto &cu_indices = entry.cu_indices;
284 std::sort (cu_indices.begin (), cu_indices.end ());
285 auto from = std::unique (cu_indices.begin (), cu_indices.end ());
286 cu_indices.erase (from, cu_indices.end ());
291 /* A form of 'const char *' suitable for container keys. Only the
292 pointer is stored. The strings themselves are compared, not the
297 c_str_view (const char *cstr)
301 bool operator== (const c_str_view &other) const
303 return strcmp (m_cstr, other.m_cstr) == 0;
306 /* Return the underlying C string. Note, the returned string is
307 only a reference with lifetime of this object. */
308 const char *c_str () const
314 friend class c_str_view_hasher;
315 const char *const m_cstr;
318 /* A std::unordered_map::hasher for c_str_view that uses the right
319 hash function for strings in a mapped index. */
320 class c_str_view_hasher
323 size_t operator () (const c_str_view &x) const
325 return mapped_index_string_hash (INT_MAX, x.m_cstr);
329 /* A std::unordered_map::hasher for std::vector<>. */
334 size_t operator () (const std::vector<T> &key) const
336 return iterative_hash (key.data (),
337 sizeof (key.front ()) * key.size (), 0);
341 /* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
342 constant pool entries going into the data buffer CPOOL. */
345 write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
348 /* Elements are sorted vectors of the indices of all the CUs that
349 hold an object of this name. */
350 std::unordered_map<std::vector<offset_type>, offset_type,
351 vector_hasher<offset_type>>
354 /* We add all the index vectors to the constant pool first, to
355 ensure alignment is ok. */
356 for (symtab_index_entry &entry : symtab->data)
358 if (entry.name == NULL)
360 gdb_assert (entry.index_offset == 0);
362 /* Finding before inserting is faster than always trying to
363 insert, because inserting always allocates a node, does the
364 lookup, and then destroys the new node if another node
365 already had the same key. C++17 try_emplace will avoid
368 = symbol_hash_table.find (entry.cu_indices);
369 if (found != symbol_hash_table.end ())
371 entry.index_offset = found->second;
375 symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
376 entry.index_offset = cpool.size ();
377 cpool.append_offset (entry.cu_indices.size ());
378 for (const auto index : entry.cu_indices)
379 cpool.append_offset (index);
383 /* Now write out the hash table. */
384 std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
385 for (const auto &entry : symtab->data)
387 offset_type str_off, vec_off;
389 if (entry.name != NULL)
391 const auto insertpair = str_table.emplace (entry.name, cpool.size ());
392 if (insertpair.second)
393 cpool.append_cstr0 (entry.name);
394 str_off = insertpair.first->second;
395 vec_off = entry.index_offset;
399 /* While 0 is a valid constant pool index, it is not valid
400 to have 0 for both offsets. */
405 output.append_offset (str_off);
406 output.append_offset (vec_off);
410 typedef std::unordered_map<dwarf2_per_cu_data *, unsigned int> cu_index_map;
412 /* Helper struct for building the address table. */
413 struct addrmap_index_data
415 addrmap_index_data (data_buf &addr_vec_, cu_index_map &cu_index_htab_)
416 : addr_vec (addr_vec_),
417 cu_index_htab (cu_index_htab_)
421 cu_index_map &cu_index_htab;
423 int operator() (CORE_ADDR start_addr, void *obj);
425 /* True if the previous_* fields are valid.
426 We can't write an entry until we see the next entry (since it is only then
427 that we know the end of the entry). */
428 bool previous_valid = false;
429 /* Index of the CU in the table of all CUs in the index file. */
430 unsigned int previous_cu_index = 0;
431 /* Start address of the CU. */
432 CORE_ADDR previous_cu_start = 0;
435 /* Write an address entry to ADDR_VEC. */
438 add_address_entry (data_buf &addr_vec,
439 CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
441 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
442 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
443 addr_vec.append_offset (cu_index);
446 /* Worker function for traversing an addrmap to build the address table. */
449 addrmap_index_data::operator() (CORE_ADDR start_addr, void *obj)
451 dwarf2_per_cu_data *per_cu = (dwarf2_per_cu_data *) obj;
454 add_address_entry (addr_vec,
455 previous_cu_start, start_addr,
458 previous_cu_start = start_addr;
461 const auto it = cu_index_htab.find (per_cu);
462 gdb_assert (it != cu_index_htab.cend ());
463 previous_cu_index = it->second;
464 previous_valid = true;
467 previous_valid = false;
472 /* Write PER_BFD's address map to ADDR_VEC.
473 CU_INDEX_HTAB is used to map addrmap entries to their CU indices
474 in the index file. */
477 write_address_map (struct addrmap *addrmap, data_buf &addr_vec,
478 cu_index_map &cu_index_htab)
480 struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
482 addrmap->foreach (addrmap_index_data);
484 /* It's highly unlikely the last entry (end address = 0xff...ff)
485 is valid, but we should still handle it.
486 The end address is recorded as the start of the next region, but that
487 doesn't work here. To cope we pass 0xff...ff, this is a rare situation
489 if (addrmap_index_data.previous_valid)
490 add_address_entry (addr_vec,
491 addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
492 addrmap_index_data.previous_cu_index);
495 /* DWARF-5 .debug_names builder. */
499 debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
500 bfd_endian dwarf5_byte_order)
501 : m_dwarf5_byte_order (dwarf5_byte_order),
502 m_dwarf32 (dwarf5_byte_order),
503 m_dwarf64 (dwarf5_byte_order),
505 ? static_cast<dwarf &> (m_dwarf64)
506 : static_cast<dwarf &> (m_dwarf32)),
507 m_name_table_string_offs (m_dwarf.name_table_string_offs),
508 m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
509 m_debugstrlookup (per_objfile)
512 int dwarf5_offset_size () const
514 const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
515 return dwarf5_is_dwarf64 ? 8 : 4;
518 /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */
519 enum class unit_kind { cu, tu };
521 /* Insert one symbol. */
522 void insert (const cooked_index_entry *entry)
524 const auto it = m_cu_index_htab.find (entry->per_cu);
525 gdb_assert (it != m_cu_index_htab.cend ());
526 const char *name = entry->full_name (&m_string_obstack);
528 /* This is incorrect but it mirrors gdb's historical behavior; and
529 because the current .debug_names generation is also incorrect,
530 it seems better to follow what was done before, rather than
531 introduce a mismatch between the newer and older gdb. */
532 dwarf_tag tag = entry->tag;
533 if (tag != DW_TAG_typedef && tag_is_type (tag))
534 tag = DW_TAG_structure_type;
535 else if (tag == DW_TAG_enumerator || tag == DW_TAG_constant)
536 tag = DW_TAG_variable;
538 int cu_index = it->second;
539 bool is_static = (entry->flags & IS_STATIC) != 0;
540 unit_kind kind = (entry->per_cu->is_debug_types
544 if (entry->per_cu->lang () == language_ada)
546 /* We want to ensure that the Ada main function's name appears
547 verbatim in the index. However, this name will be of the
548 form "_ada_mumble", and will be rewritten by ada_decode.
549 So, recognize it specially here and add it to the index by
551 if (strcmp (main_name (), name) == 0)
553 const auto insertpair
554 = m_name_to_value_set.emplace (c_str_view (name),
555 std::set<symbol_value> ());
556 std::set<symbol_value> &value_set = insertpair.first->second;
557 value_set.emplace (symbol_value (tag, cu_index, is_static, kind));
560 /* In order for the index to work when read back into gdb, it
561 has to supply a funny form of the name: it should be the
562 encoded name, with any suffixes stripped. Using the
563 ordinary encoded name will not work properly with the
564 searching logic in find_name_components_bounds; nor will
565 using the decoded name. Furthermore, an Ada "verbatim"
566 name (of the form "<MumBle>") must be entered without the
567 angle brackets. Note that the current index is unusual,
568 see PR symtab/24820 for details. */
569 std::string decoded = ada_decode (name);
570 if (decoded[0] == '<')
571 name = (char *) obstack_copy0 (&m_string_obstack,
572 decoded.c_str () + 1,
573 decoded.length () - 2);
575 name = obstack_strdup (&m_string_obstack,
576 ada_encode (decoded.c_str ()));
579 const auto insertpair
580 = m_name_to_value_set.emplace (c_str_view (name),
581 std::set<symbol_value> ());
582 std::set<symbol_value> &value_set = insertpair.first->second;
583 value_set.emplace (symbol_value (tag, cu_index, is_static, kind));
586 /* Build all the tables. All symbols must be already inserted.
587 This function does not call file_write, caller has to do it
591 /* Verify the build method has not be called twice. */
592 gdb_assert (m_abbrev_table.empty ());
593 const size_t name_count = m_name_to_value_set.size ();
594 m_bucket_table.resize
595 (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
596 m_hash_table.reserve (name_count);
597 m_name_table_string_offs.reserve (name_count);
598 m_name_table_entry_offs.reserve (name_count);
600 /* Map each hash of symbol to its name and value. */
604 decltype (m_name_to_value_set)::const_iterator it;
606 std::vector<std::forward_list<hash_it_pair>> bucket_hash;
607 bucket_hash.resize (m_bucket_table.size ());
608 for (decltype (m_name_to_value_set)::const_iterator it
609 = m_name_to_value_set.cbegin ();
610 it != m_name_to_value_set.cend ();
613 const char *const name = it->first.c_str ();
614 const uint32_t hash = dwarf5_djb_hash (name);
615 hash_it_pair hashitpair;
616 hashitpair.hash = hash;
618 auto &slot = bucket_hash[hash % bucket_hash.size()];
619 slot.push_front (std::move (hashitpair));
621 for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
623 const std::forward_list<hash_it_pair> &hashitlist
624 = bucket_hash[bucket_ix];
625 if (hashitlist.empty ())
627 uint32_t &bucket_slot = m_bucket_table[bucket_ix];
628 /* The hashes array is indexed starting at 1. */
629 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
630 sizeof (bucket_slot), m_dwarf5_byte_order,
631 m_hash_table.size () + 1);
632 for (const hash_it_pair &hashitpair : hashitlist)
634 m_hash_table.push_back (0);
635 store_unsigned_integer (reinterpret_cast<gdb_byte *>
636 (&m_hash_table.back ()),
637 sizeof (m_hash_table.back ()),
638 m_dwarf5_byte_order, hashitpair.hash);
639 const c_str_view &name = hashitpair.it->first;
640 const std::set<symbol_value> &value_set = hashitpair.it->second;
641 m_name_table_string_offs.push_back_reorder
642 (m_debugstrlookup.lookup (name.c_str ()));
643 m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
644 gdb_assert (!value_set.empty ());
645 for (const symbol_value &value : value_set)
647 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
653 m_abbrev_table.append_unsigned_leb128 (idx);
654 m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
655 m_abbrev_table.append_unsigned_leb128
656 (value.kind == unit_kind::cu ? DW_IDX_compile_unit
658 m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
659 m_abbrev_table.append_unsigned_leb128 (value.is_static
660 ? DW_IDX_GNU_internal
661 : DW_IDX_GNU_external);
662 m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
664 /* Terminate attributes list. */
665 m_abbrev_table.append_unsigned_leb128 (0);
666 m_abbrev_table.append_unsigned_leb128 (0);
669 m_entry_pool.append_unsigned_leb128 (idx);
670 m_entry_pool.append_unsigned_leb128 (value.cu_index);
673 /* Terminate the list of CUs. */
674 m_entry_pool.append_unsigned_leb128 (0);
677 gdb_assert (m_hash_table.size () == name_count);
679 /* Terminate tags list. */
680 m_abbrev_table.append_unsigned_leb128 (0);
683 /* Return .debug_names bucket count. This must be called only after
684 calling the build method. */
685 uint32_t bucket_count () const
687 /* Verify the build method has been already called. */
688 gdb_assert (!m_abbrev_table.empty ());
689 const uint32_t retval = m_bucket_table.size ();
691 /* Check for overflow. */
692 gdb_assert (retval == m_bucket_table.size ());
696 /* Return .debug_names names count. This must be called only after
697 calling the build method. */
698 uint32_t name_count () const
700 /* Verify the build method has been already called. */
701 gdb_assert (!m_abbrev_table.empty ());
702 const uint32_t retval = m_hash_table.size ();
704 /* Check for overflow. */
705 gdb_assert (retval == m_hash_table.size ());
709 /* Return number of bytes of .debug_names abbreviation table. This
710 must be called only after calling the build method. */
711 uint32_t abbrev_table_bytes () const
713 gdb_assert (!m_abbrev_table.empty ());
714 return m_abbrev_table.size ();
717 /* Return number of bytes the .debug_names section will have. This
718 must be called only after calling the build method. */
719 size_t bytes () const
721 /* Verify the build method has been already called. */
722 gdb_assert (!m_abbrev_table.empty ());
723 size_t expected_bytes = 0;
724 expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
725 expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
726 expected_bytes += m_name_table_string_offs.bytes ();
727 expected_bytes += m_name_table_entry_offs.bytes ();
728 expected_bytes += m_abbrev_table.size ();
729 expected_bytes += m_entry_pool.size ();
730 return expected_bytes;
733 /* Write .debug_names to FILE_NAMES and .debug_str addition to
734 FILE_STR. This must be called only after calling the build
736 void file_write (FILE *file_names, FILE *file_str) const
738 /* Verify the build method has been already called. */
739 gdb_assert (!m_abbrev_table.empty ());
740 ::file_write (file_names, m_bucket_table);
741 ::file_write (file_names, m_hash_table);
742 m_name_table_string_offs.file_write (file_names);
743 m_name_table_entry_offs.file_write (file_names);
744 m_abbrev_table.file_write (file_names);
745 m_entry_pool.file_write (file_names);
746 m_debugstrlookup.file_write (file_str);
749 void add_cu (dwarf2_per_cu_data *per_cu, offset_type index)
751 m_cu_index_htab.emplace (per_cu, index);
756 /* Storage for symbol names mapping them to their .debug_str section
758 class debug_str_lookup
762 /* Object constructor to be called for current DWARF2_PER_OBJFILE.
763 All .debug_str section strings are automatically stored. */
764 debug_str_lookup (dwarf2_per_objfile *per_objfile)
765 : m_abfd (per_objfile->objfile->obfd.get ()),
766 m_per_objfile (per_objfile)
768 per_objfile->per_bfd->str.read (per_objfile->objfile);
769 if (per_objfile->per_bfd->str.buffer == NULL)
771 for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
772 data < (per_objfile->per_bfd->str.buffer
773 + per_objfile->per_bfd->str.size);)
775 const char *const s = reinterpret_cast<const char *> (data);
776 const auto insertpair
777 = m_str_table.emplace (c_str_view (s),
778 data - per_objfile->per_bfd->str.buffer);
779 if (!insertpair.second)
780 complaint (_("Duplicate string \"%s\" in "
781 ".debug_str section [in module %s]"),
782 s, bfd_get_filename (m_abfd));
783 data += strlen (s) + 1;
787 /* Return offset of symbol name S in the .debug_str section. Add
788 such symbol to the section's end if it does not exist there
790 size_t lookup (const char *s)
792 const auto it = m_str_table.find (c_str_view (s));
793 if (it != m_str_table.end ())
795 const size_t offset = (m_per_objfile->per_bfd->str.size
796 + m_str_add_buf.size ());
797 m_str_table.emplace (c_str_view (s), offset);
798 m_str_add_buf.append_cstr0 (s);
802 /* Append the end of the .debug_str section to FILE. */
803 void file_write (FILE *file) const
805 m_str_add_buf.file_write (file);
809 std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
811 dwarf2_per_objfile *m_per_objfile;
813 /* Data to add at the end of .debug_str for new needed symbol names. */
814 data_buf m_str_add_buf;
817 /* Container to map used DWARF tags to their .debug_names abbreviation
822 index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
823 : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
828 operator== (const index_key &other) const
830 return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
831 && kind == other.kind);
835 const bool is_static;
836 const unit_kind kind;
839 /* Provide std::unordered_map::hasher for index_key. */
840 class index_key_hasher
844 operator () (const index_key &key) const
846 return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
850 /* Parameters of one symbol entry. */
854 const int dwarf_tag, cu_index;
855 const bool is_static;
856 const unit_kind kind;
858 symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
860 : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
865 operator< (const symbol_value &other) const
885 /* Abstract base class to unify DWARF-32 and DWARF-64 name table
890 const bfd_endian dwarf5_byte_order;
892 explicit offset_vec (bfd_endian dwarf5_byte_order_)
893 : dwarf5_byte_order (dwarf5_byte_order_)
896 /* Call std::vector::reserve for NELEM elements. */
897 virtual void reserve (size_t nelem) = 0;
899 /* Call std::vector::push_back with store_unsigned_integer byte
900 reordering for ELEM. */
901 virtual void push_back_reorder (size_t elem) = 0;
903 /* Return expected output size in bytes. */
904 virtual size_t bytes () const = 0;
906 /* Write name table to FILE. */
907 virtual void file_write (FILE *file) const = 0;
910 /* Template to unify DWARF-32 and DWARF-64 output. */
911 template<typename OffsetSize>
912 class offset_vec_tmpl : public offset_vec
915 explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
916 : offset_vec (dwarf5_byte_order_)
919 /* Implement offset_vec::reserve. */
920 void reserve (size_t nelem) override
922 m_vec.reserve (nelem);
925 /* Implement offset_vec::push_back_reorder. */
926 void push_back_reorder (size_t elem) override
928 m_vec.push_back (elem);
929 /* Check for overflow. */
930 gdb_assert (m_vec.back () == elem);
931 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
932 sizeof (m_vec.back ()), dwarf5_byte_order, elem);
935 /* Implement offset_vec::bytes. */
936 size_t bytes () const override
938 return m_vec.size () * sizeof (m_vec[0]);
941 /* Implement offset_vec::file_write. */
942 void file_write (FILE *file) const override
944 ::file_write (file, m_vec);
948 std::vector<OffsetSize> m_vec;
951 /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
952 respecting name table width. */
956 offset_vec &name_table_string_offs, &name_table_entry_offs;
958 dwarf (offset_vec &name_table_string_offs_,
959 offset_vec &name_table_entry_offs_)
960 : name_table_string_offs (name_table_string_offs_),
961 name_table_entry_offs (name_table_entry_offs_)
966 /* Template to unify DWARF-32 and DWARF-64 .debug_names output
967 respecting name table width. */
968 template<typename OffsetSize>
969 class dwarf_tmpl : public dwarf
972 explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
973 : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
974 m_name_table_string_offs (dwarf5_byte_order_),
975 m_name_table_entry_offs (dwarf5_byte_order_)
979 offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
980 offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
983 /* Store value of each symbol. */
984 std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
987 /* Tables of DWARF-5 .debug_names. They are in object file byte
989 std::vector<uint32_t> m_bucket_table;
990 std::vector<uint32_t> m_hash_table;
992 const bfd_endian m_dwarf5_byte_order;
993 dwarf_tmpl<uint32_t> m_dwarf32;
994 dwarf_tmpl<uint64_t> m_dwarf64;
996 offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
997 debug_str_lookup m_debugstrlookup;
999 /* Map each used .debug_names abbreviation tag parameter to its
1001 std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1003 /* Next unused .debug_names abbreviation tag for
1004 m_indexkey_to_idx. */
1007 /* .debug_names abbreviation table. */
1008 data_buf m_abbrev_table;
1010 /* .debug_names entry pool. */
1011 data_buf m_entry_pool;
1013 /* Temporary storage for Ada names. */
1014 auto_obstack m_string_obstack;
1016 cu_index_map m_cu_index_htab;
1019 /* Return iff any of the needed offsets does not fit into 32-bit
1020 .debug_names section. */
1023 check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
1025 for (const auto &per_cu : per_objfile->per_bfd->all_units)
1027 if (to_underlying (per_cu->sect_off)
1028 >= (static_cast<uint64_t> (1) << 32))
1034 /* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek
1035 position is at the end of the file. */
1038 assert_file_size (FILE *file, size_t expected_size)
1040 const auto file_size = ftell (file);
1041 if (file_size == -1)
1042 perror_with_name (("ftell"));
1043 gdb_assert (file_size == expected_size);
1046 /* Write a gdb index file to OUT_FILE from all the sections passed as
1050 write_gdbindex_1 (FILE *out_file,
1051 const data_buf &cu_list,
1052 const data_buf &types_cu_list,
1053 const data_buf &addr_vec,
1054 const data_buf &symtab_vec,
1055 const data_buf &constant_pool)
1058 const offset_type size_of_header = 6 * sizeof (offset_type);
1059 offset_type total_len = size_of_header;
1061 /* The version number. */
1062 contents.append_offset (8);
1064 /* The offset of the CU list from the start of the file. */
1065 contents.append_offset (total_len);
1066 total_len += cu_list.size ();
1068 /* The offset of the types CU list from the start of the file. */
1069 contents.append_offset (total_len);
1070 total_len += types_cu_list.size ();
1072 /* The offset of the address table from the start of the file. */
1073 contents.append_offset (total_len);
1074 total_len += addr_vec.size ();
1076 /* The offset of the symbol table from the start of the file. */
1077 contents.append_offset (total_len);
1078 total_len += symtab_vec.size ();
1080 /* The offset of the constant pool from the start of the file. */
1081 contents.append_offset (total_len);
1082 total_len += constant_pool.size ();
1084 gdb_assert (contents.size () == size_of_header);
1086 contents.file_write (out_file);
1087 cu_list.file_write (out_file);
1088 types_cu_list.file_write (out_file);
1089 addr_vec.file_write (out_file);
1090 symtab_vec.file_write (out_file);
1091 constant_pool.file_write (out_file);
1093 assert_file_size (out_file, total_len);
1096 /* Write the contents of the internal "cooked" index. */
1099 write_cooked_index (cooked_index_vector *table,
1100 const cu_index_map &cu_index_htab,
1101 struct mapped_symtab *symtab)
1103 /* We track type names and only enter a given type once. */
1104 htab_up type_names (htab_create_alloc (10, htab_hash_string, htab_eq_string,
1105 nullptr, xcalloc, xfree));
1106 /* Same with variable names. However, if a type and variable share
1107 a name, we want both, which is why there are two hash tables
1109 htab_up var_names (htab_create_alloc (10, htab_hash_string, htab_eq_string,
1110 nullptr, xcalloc, xfree));
1112 const char *main_for_ada = main_name ();
1114 for (const cooked_index_entry *entry : table->all_entries ())
1116 const auto it = cu_index_htab.find (entry->per_cu);
1117 gdb_assert (it != cu_index_htab.cend ());
1119 const char *name = entry->full_name (&symtab->m_string_obstack);
1121 if (entry->per_cu->lang () == language_ada)
1123 /* We want to ensure that the Ada main function's name
1124 appears verbatim in the index. However, this name will
1125 be of the form "_ada_mumble", and will be rewritten by
1126 ada_decode. So, recognize it specially here and add it
1127 to the index by hand. */
1128 if (entry->tag == DW_TAG_subprogram
1129 && strcmp (main_for_ada, name) == 0)
1131 /* Leave it alone. */
1135 /* In order for the index to work when read back into
1136 gdb, it has to use the encoded name, with any
1137 suffixes stripped. */
1138 std::string encoded = ada_encode (name, false);
1139 name = obstack_strdup (&symtab->m_string_obstack,
1143 else if (entry->per_cu->lang () == language_cplus
1144 && (entry->flags & IS_LINKAGE) != 0)
1146 /* GDB never put C++ linkage names into .gdb_index. The
1147 theory here is that a linkage name will normally be in
1148 the minimal symbols anyway, so including it in the index
1149 is usually redundant -- and the cases where it would not
1150 be redundant are rare and not worth supporting. */
1154 gdb_index_symbol_kind kind;
1155 if (entry->tag == DW_TAG_subprogram)
1156 kind = GDB_INDEX_SYMBOL_KIND_FUNCTION;
1157 else if (entry->tag == DW_TAG_variable
1158 || entry->tag == DW_TAG_constant
1159 || entry->tag == DW_TAG_enumerator)
1161 kind = GDB_INDEX_SYMBOL_KIND_VARIABLE;
1162 void **slot = htab_find_slot (var_names.get (), name, INSERT);
1163 if (*slot != nullptr)
1165 *slot = (void *) name;
1167 else if (entry->tag == DW_TAG_module
1168 || entry->tag == DW_TAG_common_block)
1169 kind = GDB_INDEX_SYMBOL_KIND_OTHER;
1172 kind = GDB_INDEX_SYMBOL_KIND_TYPE;
1173 void **slot = htab_find_slot (type_names.get (), name, INSERT);
1174 if (*slot != nullptr)
1176 *slot = (void *) name;
1179 add_index_entry (symtab, name, (entry->flags & IS_STATIC) != 0,
1184 /* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1185 If OBJFILE has an associated dwz file, write contents of a .gdb_index
1186 section for that dwz file into DWZ_OUT_FILE. If OBJFILE does not have an
1187 associated dwz file, DWZ_OUT_FILE must be NULL. */
1190 write_gdbindex (dwarf2_per_objfile *per_objfile,
1191 cooked_index_vector *table,
1192 FILE *out_file, FILE *dwz_out_file)
1194 mapped_symtab symtab;
1195 data_buf objfile_cu_list;
1196 data_buf dwz_cu_list;
1198 /* While we're scanning CU's create a table that maps a dwarf2_per_cu_data
1199 (which is what addrmap records) to its index (which is what is recorded
1200 in the index file). This will later be needed to write the address
1202 cu_index_map cu_index_htab;
1203 cu_index_htab.reserve (per_objfile->per_bfd->all_units.size ());
1205 /* Store out the .debug_type CUs, if any. */
1206 data_buf types_cu_list;
1208 /* The CU list is already sorted, so we don't need to do additional
1209 work here. Also, the debug_types entries do not appear in
1210 all_units, but only in their own hash table. */
1213 int types_counter = 0;
1214 for (int i = 0; i < per_objfile->per_bfd->all_units.size (); ++i)
1216 dwarf2_per_cu_data *per_cu
1217 = per_objfile->per_bfd->all_units[i].get ();
1219 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1221 const auto insertpair = cu_index_htab.emplace (per_cu, this_counter);
1222 gdb_assert (insertpair.second);
1224 /* The all_units list contains CUs read from the objfile as well as
1225 from the eventual dwz file. We need to place the entry in the
1226 corresponding index. */
1227 data_buf &cu_list = (per_cu->is_debug_types
1229 : per_cu->is_dwz ? dwz_cu_list : objfile_cu_list);
1230 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1231 to_underlying (per_cu->sect_off));
1232 if (per_cu->is_debug_types)
1234 signatured_type *sig_type = (signatured_type *) per_cu;
1235 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1236 to_underlying (sig_type->type_offset_in_tu));
1237 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1238 sig_type->signature);
1241 cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length ());
1246 write_cooked_index (table, cu_index_htab, &symtab);
1248 /* Dump the address map. */
1250 for (auto map : table->get_addrmaps ())
1251 write_address_map (map, addr_vec, cu_index_htab);
1253 /* Now that we've processed all symbols we can shrink their cu_indices
1255 uniquify_cu_indices (&symtab);
1257 data_buf symtab_vec, constant_pool;
1258 if (symtab.n_elements == 0)
1259 symtab.data.resize (0);
1261 write_hash_table (&symtab, symtab_vec, constant_pool);
1263 write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1264 symtab_vec, constant_pool);
1266 if (dwz_out_file != NULL)
1267 write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1269 gdb_assert (dwz_cu_list.empty ());
1272 /* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */
1273 static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1275 /* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1276 needed addition to .debug_str section to OUT_FILE_STR. Return how
1277 many bytes were expected to be written into OUT_FILE. */
1280 write_debug_names (dwarf2_per_objfile *per_objfile,
1281 cooked_index_vector *table,
1282 FILE *out_file, FILE *out_file_str)
1284 const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1285 struct objfile *objfile = per_objfile->objfile;
1286 const enum bfd_endian dwarf5_byte_order
1287 = gdbarch_byte_order (objfile->arch ());
1289 /* The CU list is already sorted, so we don't need to do additional
1290 work here. Also, the debug_types entries do not appear in
1291 all_units, but only in their own hash table. */
1293 data_buf types_cu_list;
1294 debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
1296 int types_counter = 0;
1297 for (int i = 0; i < per_objfile->per_bfd->all_units.size (); ++i)
1299 dwarf2_per_cu_data *per_cu
1300 = per_objfile->per_bfd->all_units[i].get ();
1302 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1303 data_buf &this_list = per_cu->is_debug_types ? types_cu_list : cu_list;
1305 nametable.add_cu (per_cu, this_counter);
1306 this_list.append_uint (nametable.dwarf5_offset_size (),
1308 to_underlying (per_cu->sect_off));
1312 /* Verify that all units are represented. */
1313 gdb_assert (counter == per_objfile->per_bfd->all_comp_units.size ());
1314 gdb_assert (types_counter == per_objfile->per_bfd->all_type_units.size ());
1316 for (const cooked_index_entry *entry : table->all_entries ())
1317 nametable.insert (entry);
1321 /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */
1323 const offset_type bytes_of_header
1324 = ((dwarf5_is_dwarf64 ? 12 : 4)
1326 + sizeof (dwarf5_gdb_augmentation));
1327 size_t expected_bytes = 0;
1328 expected_bytes += bytes_of_header;
1329 expected_bytes += cu_list.size ();
1330 expected_bytes += types_cu_list.size ();
1331 expected_bytes += nametable.bytes ();
1334 if (!dwarf5_is_dwarf64)
1336 const uint64_t size64 = expected_bytes - 4;
1337 gdb_assert (size64 < 0xfffffff0);
1338 header.append_uint (4, dwarf5_byte_order, size64);
1342 header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1343 header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1346 /* The version number. */
1347 header.append_uint (2, dwarf5_byte_order, 5);
1350 header.append_uint (2, dwarf5_byte_order, 0);
1352 /* comp_unit_count - The number of CUs in the CU list. */
1353 header.append_uint (4, dwarf5_byte_order, counter);
1355 /* local_type_unit_count - The number of TUs in the local TU
1357 header.append_uint (4, dwarf5_byte_order, types_counter);
1359 /* foreign_type_unit_count - The number of TUs in the foreign TU
1361 header.append_uint (4, dwarf5_byte_order, 0);
1363 /* bucket_count - The number of hash buckets in the hash lookup
1365 header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1367 /* name_count - The number of unique names in the index. */
1368 header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1370 /* abbrev_table_size - The size in bytes of the abbreviations
1372 header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1374 /* augmentation_string_size - The size in bytes of the augmentation
1375 string. This value is rounded up to a multiple of 4. */
1376 static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1377 header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1378 header.append_array (dwarf5_gdb_augmentation);
1380 gdb_assert (header.size () == bytes_of_header);
1382 header.file_write (out_file);
1383 cu_list.file_write (out_file);
1384 types_cu_list.file_write (out_file);
1385 nametable.file_write (out_file, out_file_str);
1387 assert_file_size (out_file, expected_bytes);
1390 /* This represents an index file being written (work-in-progress).
1392 The data is initially written to a temporary file. When the finalize method
1393 is called, the file is closed and moved to its final location.
1395 On failure (if this object is being destroyed with having called finalize),
1396 the temporary file is closed and deleted. */
1398 struct index_wip_file
1400 index_wip_file (const char *dir, const char *basename,
1403 filename = (std::string (dir) + SLASH_STRING + basename
1406 filename_temp = make_temp_filename (filename);
1408 scoped_fd out_file_fd = gdb_mkostemp_cloexec (filename_temp.data (),
1410 if (out_file_fd.get () == -1)
1411 perror_with_name (("mkstemp"));
1413 out_file = out_file_fd.to_file ("wb");
1415 if (out_file == nullptr)
1416 error (_("Can't open `%s' for writing"), filename_temp.data ());
1418 unlink_file.emplace (filename_temp.data ());
1423 /* We want to keep the file. */
1424 unlink_file->keep ();
1426 /* Close and move the str file in place. */
1427 unlink_file.reset ();
1428 if (rename (filename_temp.data (), filename.c_str ()) != 0)
1429 perror_with_name (("rename"));
1432 std::string filename;
1433 gdb::char_vector filename_temp;
1435 /* Order matters here; we want FILE to be closed before
1436 FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1437 delete a file that is still open. So, we wrap the unlinker in an
1438 optional and emplace it once we know the file name. */
1439 gdb::optional<gdb::unlinker> unlink_file;
1441 gdb_file_up out_file;
1444 /* See dwarf-index-write.h. */
1447 write_dwarf_index (dwarf2_per_objfile *per_objfile, const char *dir,
1448 const char *basename, const char *dwz_basename,
1449 dw_index_kind index_kind)
1451 struct objfile *objfile = per_objfile->objfile;
1453 if (per_objfile->per_bfd->index_table == nullptr)
1454 error (_("No debugging symbols"));
1455 cooked_index_vector *table
1456 = per_objfile->per_bfd->index_table->index_for_writing ();
1458 if (per_objfile->per_bfd->types.size () > 1)
1459 error (_("Cannot make an index when the file has multiple .debug_types sections"));
1462 gdb_assert ((objfile->flags & OBJF_NOT_FILENAME) == 0);
1464 const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1465 ? INDEX5_SUFFIX : INDEX4_SUFFIX);
1467 index_wip_file objfile_index_wip (dir, basename, index_suffix);
1468 gdb::optional<index_wip_file> dwz_index_wip;
1470 if (dwz_basename != NULL)
1471 dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
1473 if (index_kind == dw_index_kind::DEBUG_NAMES)
1475 index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1477 write_debug_names (per_objfile, table, objfile_index_wip.out_file.get (),
1478 str_wip_file.out_file.get ());
1480 str_wip_file.finalize ();
1483 write_gdbindex (per_objfile, table, objfile_index_wip.out_file.get (),
1484 (dwz_index_wip.has_value ()
1485 ? dwz_index_wip->out_file.get () : NULL));
1487 objfile_index_wip.finalize ();
1489 if (dwz_index_wip.has_value ())
1490 dwz_index_wip->finalize ();
1493 /* Implementation of the `save gdb-index' command.
1495 Note that the .gdb_index file format used by this command is
1496 documented in the GDB manual. Any changes here must be documented
1500 save_gdb_index_command (const char *arg, int from_tty)
1502 const char dwarf5space[] = "-dwarf-5 ";
1503 dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1508 arg = skip_spaces (arg);
1509 if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1511 index_kind = dw_index_kind::DEBUG_NAMES;
1512 arg += strlen (dwarf5space);
1513 arg = skip_spaces (arg);
1517 error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1519 for (objfile *objfile : current_program_space->objfiles ())
1521 /* If the objfile does not correspond to an actual file, skip it. */
1522 if ((objfile->flags & OBJF_NOT_FILENAME) != 0)
1525 dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
1527 if (per_objfile != NULL)
1531 const char *basename = lbasename (objfile_name (objfile));
1532 const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
1533 const char *dwz_basename = NULL;
1536 dwz_basename = lbasename (dwz->filename ());
1538 write_dwarf_index (per_objfile, arg, basename, dwz_basename,
1541 catch (const gdb_exception_error &except)
1543 exception_fprintf (gdb_stderr, except,
1544 _("Error while writing index for `%s': "),
1545 objfile_name (objfile));
1552 void _initialize_dwarf_index_write ();
1554 _initialize_dwarf_index_write ()
1556 cmd_list_element *c = add_cmd ("gdb-index", class_files,
1557 save_gdb_index_command, _("\
1558 Save a gdb-index file.\n\
1559 Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1561 No options create one file with .gdb-index extension for pre-DWARF-5\n\
1562 compatible .gdb_index section. With -dwarf-5 creates two files with\n\
1563 extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1565 set_cmd_completer (c, filename_completer);