gdb/dwarf2/index-write.c

   1 /* DWARF index writing support for GDB.
   2
   3    Copyright (C) 1994-2022 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21
  22 #include "dwarf2/index-write.h"
  23
  24 #include "addrmap.h"
  25 #include "cli/cli-decode.h"
  26 #include "gdbsupport/byte-vector.h"
  27 #include "gdbsupport/filestuff.h"
  28 #include "gdbsupport/gdb_unlinker.h"
  29 #include "gdbsupport/pathstuff.h"
  30 #include "gdbsupport/scoped_fd.h"
  31 #include "complaints.h"
  32 #include "dwarf2/index-common.h"
  33 #include "dwarf2.h"
  34 #include "dwarf2/read.h"
  35 #include "dwarf2/dwz.h"
  36 #include "gdb/gdb-index.h"
  37 #include "gdbcmd.h"
  38 #include "objfiles.h"
  39 #include "psympriv.h"
  40 #include "ada-lang.h"
  41
  42 #include <algorithm>
  43 #include <cmath>
  44 #include <forward_list>
  45 #include <set>
  46 #include <unordered_map>
  47 #include <unordered_set>
  48
  49 /* Ensure only legit values are used.  */
  50 #define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
  51   do { \
  52     gdb_assert ((unsigned int) (value) <= 1); \
  53     GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
  54   } while (0)
  55
  56 /* Ensure only legit values are used.  */
  57 #define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
  58   do { \
  59     gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
  60                 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
  61     GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
  62   } while (0)
  63
  64 /* Ensure we don't use more than the allotted number of bits for the CU.  */
  65 #define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
  66   do { \
  67     gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
  68     GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
  69   } while (0)
  70
  71 /* The "save gdb-index" command.  */
  72
  73 /* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
  74    error checking.  */
  75
  76 static void
  77 file_write (FILE *file, const void *data, size_t size)
  78 {
  79   if (fwrite (data, 1, size, file) != size)
  80     error (_("couldn't data write to file"));
  81 }
  82
  83 /* Write the contents of VEC to FILE, with error checking.  */
  84
  85 template<typename Elem, typename Alloc>
  86 static void
  87 file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
  88 {
  89   if (!vec.empty ())
  90     file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
  91 }
  92
  93 /* In-memory buffer to prepare data to be written later to a file.  */
  94 class data_buf
  95 {
  96 public:
  97   /* Copy ARRAY to the end of the buffer.  */
  98   void append_array (gdb::array_view<const gdb_byte> array)
  99   {
 100     std::copy (array.begin (), array.end (), grow (array.size ()));
 101   }
 102
 103   /* Copy CSTR (a zero-terminated string) to the end of buffer.  The
 104      terminating zero is appended too.  */
 105   void append_cstr0 (const char *cstr)
 106   {
 107     const size_t size = strlen (cstr) + 1;
 108     std::copy (cstr, cstr + size, grow (size));
 109   }
 110
 111   /* Store INPUT as ULEB128 to the end of buffer.  */
 112   void append_unsigned_leb128 (ULONGEST input)
 113   {
 114     for (;;)
 115       {
 116         gdb_byte output = input & 0x7f;
 117         input >>= 7;
 118         if (input)
 119           output |= 0x80;
 120         m_vec.push_back (output);
 121         if (input == 0)
 122           break;
 123       }
 124   }
 125
 126   /* Accept a host-format integer in VAL and append it to the buffer
 127      as a target-format integer which is LEN bytes long.  */
 128   void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
 129   {
 130     ::store_unsigned_integer (grow (len), len, byte_order, val);
 131   }
 132
 133   /* Copy VALUE to the end of the buffer, little-endian.  */
 134   void append_offset (offset_type value)
 135   {
 136     append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
 137   }
 138
 139   /* Return the size of the buffer.  */
 140   size_t size () const
 141   {
 142     return m_vec.size ();
 143   }
 144
 145   /* Return true iff the buffer is empty.  */
 146   bool empty () const
 147   {
 148     return m_vec.empty ();
 149   }
 150
 151   /* Write the buffer to FILE.  */
 152   void file_write (FILE *file) const
 153   {
 154     ::file_write (file, m_vec);
 155   }
 156
 157 private:
 158   /* Grow SIZE bytes at the end of the buffer.  Returns a pointer to
 159      the start of the new block.  */
 160   gdb_byte *grow (size_t size)
 161   {
 162     m_vec.resize (m_vec.size () + size);
 163     return &*(m_vec.end () - size);
 164   }
 165
 166   gdb::byte_vector m_vec;
 167 };
 168
 169 /* An entry in the symbol table.  */
 170 struct symtab_index_entry
 171 {
 172   /* The name of the symbol.  */
 173   const char *name;
 174   /* The offset of the name in the constant pool.  */
 175   offset_type index_offset;
 176   /* A sorted vector of the indices of all the CUs that hold an object
 177      of this name.  */
 178   std::vector<offset_type> cu_indices;
 179 };
 180
 181 /* The symbol table.  This is a power-of-2-sized hash table.  */
 182 struct mapped_symtab
 183 {
 184   mapped_symtab ()
 185   {
 186     data.resize (1024);
 187   }
 188
 189   offset_type n_elements = 0;
 190   std::vector<symtab_index_entry> data;
 191
 192   /* Temporary storage for Ada names.  */
 193   auto_obstack m_string_obstack;
 194 };
 195
 196 /* Find a slot in SYMTAB for the symbol NAME.  Returns a reference to
 197    the slot.
 198
 199    Function is used only during write_hash_table so no index format backward
 200    compatibility is needed.  */
 201
 202 static symtab_index_entry &
 203 find_slot (struct mapped_symtab *symtab, const char *name)
 204 {
 205   offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
 206
 207   index = hash & (symtab->data.size () - 1);
 208   step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
 209
 210   for (;;)
 211     {
 212       if (symtab->data[index].name == NULL
 213           || strcmp (name, symtab->data[index].name) == 0)
 214         return symtab->data[index];
 215       index = (index + step) & (symtab->data.size () - 1);
 216     }
 217 }
 218
 219 /* Expand SYMTAB's hash table.  */
 220
 221 static void
 222 hash_expand (struct mapped_symtab *symtab)
 223 {
 224   auto old_entries = std::move (symtab->data);
 225
 226   symtab->data.clear ();
 227   symtab->data.resize (old_entries.size () * 2);
 228
 229   for (auto &it : old_entries)
 230     if (it.name != NULL)
 231       {
 232         auto &ref = find_slot (symtab, it.name);
 233         ref = std::move (it);
 234       }
 235 }
 236
 237 /* Add an entry to SYMTAB.  NAME is the name of the symbol.
 238    CU_INDEX is the index of the CU in which the symbol appears.
 239    IS_STATIC is one if the symbol is static, otherwise zero (global).  */
 240
 241 static void
 242 add_index_entry (struct mapped_symtab *symtab, const char *name,
 243                  int is_static, gdb_index_symbol_kind kind,
 244                  offset_type cu_index)
 245 {
 246   offset_type cu_index_and_attrs;
 247
 248   ++symtab->n_elements;
 249   if (4 * symtab->n_elements / 3 >= symtab->data.size ())
 250     hash_expand (symtab);
 251
 252   symtab_index_entry &slot = find_slot (symtab, name);
 253   if (slot.name == NULL)
 254     {
 255       slot.name = name;
 256       /* index_offset is set later.  */
 257     }
 258
 259   cu_index_and_attrs = 0;
 260   DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
 261   DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
 262   DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
 263
 264   /* We don't want to record an index value twice as we want to avoid the
 265      duplication.
 266      We process all global symbols and then all static symbols
 267      (which would allow us to avoid the duplication by only having to check
 268      the last entry pushed), but a symbol could have multiple kinds in one CU.
 269      To keep things simple we don't worry about the duplication here and
 270      sort and uniquify the list after we've processed all symbols.  */
 271   slot.cu_indices.push_back (cu_index_and_attrs);
 272 }
 273
 274 /* Sort and remove duplicates of all symbols' cu_indices lists.  */
 275
 276 static void
 277 uniquify_cu_indices (struct mapped_symtab *symtab)
 278 {
 279   for (auto &entry : symtab->data)
 280     {
 281       if (entry.name != NULL && !entry.cu_indices.empty ())
 282         {
 283           auto &cu_indices = entry.cu_indices;
 284           std::sort (cu_indices.begin (), cu_indices.end ());
 285           auto from = std::unique (cu_indices.begin (), cu_indices.end ());
 286           cu_indices.erase (from, cu_indices.end ());
 287         }
 288     }
 289 }
 290
 291 /* A form of 'const char *' suitable for container keys.  Only the
 292    pointer is stored.  The strings themselves are compared, not the
 293    pointers.  */
 294 class c_str_view
 295 {
 296 public:
 297   c_str_view (const char *cstr)
 298     : m_cstr (cstr)
 299   {}
 300
 301   bool operator== (const c_str_view &other) const
 302   {
 303     return strcmp (m_cstr, other.m_cstr) == 0;
 304   }
 305
 306   /* Return the underlying C string.  Note, the returned string is
 307      only a reference with lifetime of this object.  */
 308   const char *c_str () const
 309   {
 310     return m_cstr;
 311   }
 312
 313 private:
 314   friend class c_str_view_hasher;
 315   const char *const m_cstr;
 316 };
 317
 318 /* A std::unordered_map::hasher for c_str_view that uses the right
 319    hash function for strings in a mapped index.  */
 320 class c_str_view_hasher
 321 {
 322 public:
 323   size_t operator () (const c_str_view &x) const
 324   {
 325     return mapped_index_string_hash (INT_MAX, x.m_cstr);
 326   }
 327 };
 328
 329 /* A std::unordered_map::hasher for std::vector<>.  */
 330 template<typename T>
 331 class vector_hasher
 332 {
 333 public:
 334   size_t operator () (const std::vector<T> &key) const
 335   {
 336     return iterative_hash (key.data (),
 337                            sizeof (key.front ()) * key.size (), 0);
 338   }
 339 };
 340
 341 /* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
 342    constant pool entries going into the data buffer CPOOL.  */
 343
 344 static void
 345 write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
 346 {
 347   {
 348     /* Elements are sorted vectors of the indices of all the CUs that
 349        hold an object of this name.  */
 350     std::unordered_map<std::vector<offset_type>, offset_type,
 351                        vector_hasher<offset_type>>
 352       symbol_hash_table;
 353
 354     /* We add all the index vectors to the constant pool first, to
 355        ensure alignment is ok.  */
 356     for (symtab_index_entry &entry : symtab->data)
 357       {
 358         if (entry.name == NULL)
 359           continue;
 360         gdb_assert (entry.index_offset == 0);
 361
 362         /* Finding before inserting is faster than always trying to
 363            insert, because inserting always allocates a node, does the
 364            lookup, and then destroys the new node if another node
 365            already had the same key.  C++17 try_emplace will avoid
 366            this.  */
 367         const auto found
 368           = symbol_hash_table.find (entry.cu_indices);
 369         if (found != symbol_hash_table.end ())
 370           {
 371             entry.index_offset = found->second;
 372             continue;
 373           }
 374
 375         symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
 376         entry.index_offset = cpool.size ();
 377         cpool.append_offset (entry.cu_indices.size ());
 378         for (const auto index : entry.cu_indices)
 379           cpool.append_offset (index);
 380       }
 381   }
 382
 383   /* Now write out the hash table.  */
 384   std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
 385   for (const auto &entry : symtab->data)
 386     {
 387       offset_type str_off, vec_off;
 388
 389       if (entry.name != NULL)
 390         {
 391           const auto insertpair = str_table.emplace (entry.name, cpool.size ());
 392           if (insertpair.second)
 393             cpool.append_cstr0 (entry.name);
 394           str_off = insertpair.first->second;
 395           vec_off = entry.index_offset;
 396         }
 397       else
 398         {
 399           /* While 0 is a valid constant pool index, it is not valid
 400              to have 0 for both offsets.  */
 401           str_off = 0;
 402           vec_off = 0;
 403         }
 404
 405       output.append_offset (str_off);
 406       output.append_offset (vec_off);
 407     }
 408 }
 409
 410 typedef std::unordered_map<partial_symtab *, unsigned int> psym_index_map;
 411
 412 /* Helper struct for building the address table.  */
 413 struct addrmap_index_data
 414 {
 415   addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_)
 416     : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_)
 417   {}
 418
 419   data_buf &addr_vec;
 420   psym_index_map &cu_index_htab;
 421
 422   int operator() (CORE_ADDR start_addr, void *obj);
 423
 424   /* True if the previous_* fields are valid.
 425      We can't write an entry until we see the next entry (since it is only then
 426      that we know the end of the entry).  */
 427   bool previous_valid = false;
 428   /* Index of the CU in the table of all CUs in the index file.  */
 429   unsigned int previous_cu_index = 0;
 430   /* Start address of the CU.  */
 431   CORE_ADDR previous_cu_start = 0;
 432 };
 433
 434 /* Write an address entry to ADDR_VEC.  */
 435
 436 static void
 437 add_address_entry (data_buf &addr_vec,
 438                    CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
 439 {
 440   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
 441   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
 442   addr_vec.append_offset (cu_index);
 443 }
 444
 445 /* Worker function for traversing an addrmap to build the address table.  */
 446
 447 int
 448 addrmap_index_data::operator() (CORE_ADDR start_addr, void *obj)
 449 {
 450   partial_symtab *pst = (partial_symtab *) obj;
 451
 452   if (previous_valid)
 453     add_address_entry (addr_vec,
 454                        previous_cu_start, start_addr,
 455                        previous_cu_index);
 456
 457   previous_cu_start = start_addr;
 458   if (pst != NULL)
 459     {
 460       const auto it = cu_index_htab.find (pst);
 461       gdb_assert (it != cu_index_htab.cend ());
 462       previous_cu_index = it->second;
 463       previous_valid = true;
 464     }
 465   else
 466     previous_valid = false;
 467
 468   return 0;
 469 }
 470
 471 /* Write PER_BFD's address map to ADDR_VEC.
 472    CU_INDEX_HTAB is used to map addrmap entries to their CU indices
 473    in the index file.  */
 474
 475 static void
 476 write_address_map (dwarf2_per_bfd *per_bfd, data_buf &addr_vec,
 477                    psym_index_map &cu_index_htab)
 478 {
 479   struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
 480
 481   addrmap_foreach (per_bfd->partial_symtabs->psymtabs_addrmap,
 482                    addrmap_index_data);
 483
 484   /* It's highly unlikely the last entry (end address = 0xff...ff)
 485      is valid, but we should still handle it.
 486      The end address is recorded as the start of the next region, but that
 487      doesn't work here.  To cope we pass 0xff...ff, this is a rare situation
 488      anyway.  */
 489   if (addrmap_index_data.previous_valid)
 490     add_address_entry (addr_vec,
 491                        addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
 492                        addrmap_index_data.previous_cu_index);
 493 }
 494
 495 /* Return the symbol kind of PSYM.  */
 496
 497 static gdb_index_symbol_kind
 498 symbol_kind (struct partial_symbol *psym)
 499 {
 500   domain_enum domain = psym->domain;
 501   enum address_class aclass = psym->aclass;
 502
 503   switch (domain)
 504     {
 505     case VAR_DOMAIN:
 506       switch (aclass)
 507         {
 508         case LOC_BLOCK:
 509           return GDB_INDEX_SYMBOL_KIND_FUNCTION;
 510         case LOC_TYPEDEF:
 511           return GDB_INDEX_SYMBOL_KIND_TYPE;
 512         case LOC_COMPUTED:
 513         case LOC_CONST_BYTES:
 514         case LOC_OPTIMIZED_OUT:
 515         case LOC_STATIC:
 516           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 517         case LOC_CONST:
 518           /* Note: It's currently impossible to recognize psyms as enum values
 519              short of reading the type info.  For now punt.  */
 520           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 521         default:
 522           /* There are other LOC_FOO values that one might want to classify
 523              as variables, but dwarf2read.c doesn't currently use them.  */
 524           return GDB_INDEX_SYMBOL_KIND_OTHER;
 525         }
 526     case STRUCT_DOMAIN:
 527       return GDB_INDEX_SYMBOL_KIND_TYPE;
 528     default:
 529       return GDB_INDEX_SYMBOL_KIND_OTHER;
 530     }
 531 }
 532
 533 /* Add a list of partial symbols to SYMTAB.  */
 534
 535 static void
 536 write_psymbols (struct mapped_symtab *symtab,
 537                 std::unordered_set<partial_symbol *> &psyms_seen,
 538                 const std::vector<partial_symbol *> &symbols,
 539                 offset_type cu_index,
 540                 int is_static)
 541 {
 542   for (partial_symbol *psym : symbols)
 543     {
 544       const char *name = psym->ginfo.search_name ();
 545
 546       if (psym->ginfo.language () == language_ada)
 547         {
 548           /* We want to ensure that the Ada main function's name appears
 549              verbatim in the index.  However, this name will be of the
 550              form "_ada_mumble", and will be rewritten by ada_decode.
 551              So, recognize it specially here and add it to the index by
 552              hand.  */
 553           if (strcmp (main_name (), name) == 0)
 554             {
 555               gdb_index_symbol_kind kind = symbol_kind (psym);
 556
 557               add_index_entry (symtab, name, is_static, kind, cu_index);
 558             }
 559
 560           /* In order for the index to work when read back into gdb, it
 561              has to supply a funny form of the name: it should be the
 562              encoded name, with any suffixes stripped.  Using the
 563              ordinary encoded name will not work properly with the
 564              searching logic in find_name_components_bounds; nor will
 565              using the decoded name.  Furthermore, an Ada "verbatim"
 566              name (of the form "<MumBle>") must be entered without the
 567              angle brackets.  Note that the current index is unusual,
 568              see PR symtab/24820 for details.  */
 569           std::string decoded = ada_decode (name);
 570           if (decoded[0] == '<')
 571             name = (char *) obstack_copy0 (&symtab->m_string_obstack,
 572                                            decoded.c_str () + 1,
 573                                            decoded.length () - 2);
 574           else
 575             name = obstack_strdup (&symtab->m_string_obstack,
 576                                    ada_encode (decoded.c_str ()));
 577         }
 578
 579       /* Only add a given psymbol once.  */
 580       if (psyms_seen.insert (psym).second)
 581         {
 582           gdb_index_symbol_kind kind = symbol_kind (psym);
 583
 584           add_index_entry (symtab, name, is_static, kind, cu_index);
 585         }
 586     }
 587 }
 588
 589 /* Recurse into all "included" dependencies and count their symbols as
 590    if they appeared in this psymtab.  */
 591
 592 static void
 593 recursively_count_psymbols (partial_symtab *psymtab,
 594                             size_t &psyms_seen)
 595 {
 596   for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 597     if (psymtab->dependencies[i]->user != NULL)
 598       recursively_count_psymbols (psymtab->dependencies[i],
 599                                   psyms_seen);
 600
 601   psyms_seen += psymtab->global_psymbols.size ();
 602   psyms_seen += psymtab->static_psymbols.size ();
 603 }
 604
 605 /* Recurse into all "included" dependencies and write their symbols as
 606    if they appeared in this psymtab.  */
 607
 608 static void
 609 recursively_write_psymbols (struct objfile *objfile,
 610                             partial_symtab *psymtab,
 611                             struct mapped_symtab *symtab,
 612                             std::unordered_set<partial_symbol *> &psyms_seen,
 613                             offset_type cu_index)
 614 {
 615   int i;
 616
 617   for (i = 0; i < psymtab->number_of_dependencies; ++i)
 618     if (psymtab->dependencies[i]->user != NULL)
 619       recursively_write_psymbols (objfile,
 620                                   psymtab->dependencies[i],
 621                                   symtab, psyms_seen, cu_index);
 622
 623   write_psymbols (symtab, psyms_seen,
 624                   psymtab->global_psymbols, cu_index,
 625                   0);
 626   write_psymbols (symtab, psyms_seen,
 627                   psymtab->static_psymbols, cu_index,
 628                   1);
 629 }
 630
 631 /* DWARF-5 .debug_names builder.  */
 632 class debug_names
 633 {
 634 public:
 635   debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
 636                bfd_endian dwarf5_byte_order)
 637     : m_dwarf5_byte_order (dwarf5_byte_order),
 638       m_dwarf32 (dwarf5_byte_order),
 639       m_dwarf64 (dwarf5_byte_order),
 640       m_dwarf (is_dwarf64
 641                ? static_cast<dwarf &> (m_dwarf64)
 642                : static_cast<dwarf &> (m_dwarf32)),
 643       m_name_table_string_offs (m_dwarf.name_table_string_offs),
 644       m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
 645       m_debugstrlookup (per_objfile)
 646   {}
 647
 648   int dwarf5_offset_size () const
 649   {
 650     const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
 651     return dwarf5_is_dwarf64 ? 8 : 4;
 652   }
 653
 654   /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit?  */
 655   enum class unit_kind { cu, tu };
 656
 657   /* Insert one symbol.  */
 658   void insert (const partial_symbol *psym, int cu_index, bool is_static,
 659                unit_kind kind)
 660   {
 661     const int dwarf_tag = psymbol_tag (psym);
 662     if (dwarf_tag == 0)
 663       return;
 664     const char *name = psym->ginfo.search_name ();
 665
 666     if (psym->ginfo.language () == language_ada)
 667       {
 668         /* We want to ensure that the Ada main function's name appears
 669            verbatim in the index.  However, this name will be of the
 670            form "_ada_mumble", and will be rewritten by ada_decode.
 671            So, recognize it specially here and add it to the index by
 672            hand.  */
 673         if (strcmp (main_name (), name) == 0)
 674           {
 675             const auto insertpair
 676               = m_name_to_value_set.emplace (c_str_view (name),
 677                                              std::set<symbol_value> ());
 678             std::set<symbol_value> &value_set = insertpair.first->second;
 679             value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
 680                                              kind));
 681           }
 682
 683         /* In order for the index to work when read back into gdb, it
 684            has to supply a funny form of the name: it should be the
 685            encoded name, with any suffixes stripped.  Using the
 686            ordinary encoded name will not work properly with the
 687            searching logic in find_name_components_bounds; nor will
 688            using the decoded name.  Furthermore, an Ada "verbatim"
 689            name (of the form "<MumBle>") must be entered without the
 690            angle brackets.  Note that the current index is unusual,
 691            see PR symtab/24820 for details.  */
 692         std::string decoded = ada_decode (name);
 693         if (decoded[0] == '<')
 694           name = (char *) obstack_copy0 (&m_string_obstack,
 695                                          decoded.c_str () + 1,
 696                                          decoded.length () - 2);
 697         else
 698           name = obstack_strdup (&m_string_obstack,
 699                                  ada_encode (decoded.c_str ()));
 700       }
 701
 702     const auto insertpair
 703       = m_name_to_value_set.emplace (c_str_view (name),
 704                                      std::set<symbol_value> ());
 705     std::set<symbol_value> &value_set = insertpair.first->second;
 706     value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
 707   }
 708
 709   /* Build all the tables.  All symbols must be already inserted.
 710      This function does not call file_write, caller has to do it
 711      afterwards.  */
 712   void build ()
 713   {
 714     /* Verify the build method has not be called twice.  */
 715     gdb_assert (m_abbrev_table.empty ());
 716     const size_t name_count = m_name_to_value_set.size ();
 717     m_bucket_table.resize
 718       (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
 719     m_hash_table.reserve (name_count);
 720     m_name_table_string_offs.reserve (name_count);
 721     m_name_table_entry_offs.reserve (name_count);
 722
 723     /* Map each hash of symbol to its name and value.  */
 724     struct hash_it_pair
 725     {
 726       uint32_t hash;
 727       decltype (m_name_to_value_set)::const_iterator it;
 728     };
 729     std::vector<std::forward_list<hash_it_pair>> bucket_hash;
 730     bucket_hash.resize (m_bucket_table.size ());
 731     for (decltype (m_name_to_value_set)::const_iterator it
 732            = m_name_to_value_set.cbegin ();
 733          it != m_name_to_value_set.cend ();
 734          ++it)
 735       {
 736         const char *const name = it->first.c_str ();
 737         const uint32_t hash = dwarf5_djb_hash (name);
 738         hash_it_pair hashitpair;
 739         hashitpair.hash = hash;
 740         hashitpair.it = it;
 741         auto &slot = bucket_hash[hash % bucket_hash.size()];
 742         slot.push_front (std::move (hashitpair));
 743       }
 744     for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
 745       {
 746         const std::forward_list<hash_it_pair> &hashitlist
 747           = bucket_hash[bucket_ix];
 748         if (hashitlist.empty ())
 749           continue;
 750         uint32_t &bucket_slot = m_bucket_table[bucket_ix];
 751         /* The hashes array is indexed starting at 1.  */
 752         store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
 753                                 sizeof (bucket_slot), m_dwarf5_byte_order,
 754                                 m_hash_table.size () + 1);
 755         for (const hash_it_pair &hashitpair : hashitlist)
 756           {
 757             m_hash_table.push_back (0);
 758             store_unsigned_integer (reinterpret_cast<gdb_byte *>
 759                                                         (&m_hash_table.back ()),
 760                                     sizeof (m_hash_table.back ()),
 761                                     m_dwarf5_byte_order, hashitpair.hash);
 762             const c_str_view &name = hashitpair.it->first;
 763             const std::set<symbol_value> &value_set = hashitpair.it->second;
 764             m_name_table_string_offs.push_back_reorder
 765               (m_debugstrlookup.lookup (name.c_str ()));
 766             m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
 767             gdb_assert (!value_set.empty ());
 768             for (const symbol_value &value : value_set)
 769               {
 770                 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
 771                                                         value.is_static,
 772                                                         value.kind)];
 773                 if (idx == 0)
 774                   {
 775                     idx = m_idx_next++;
 776                     m_abbrev_table.append_unsigned_leb128 (idx);
 777                     m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
 778                     m_abbrev_table.append_unsigned_leb128
 779                               (value.kind == unit_kind::cu ? DW_IDX_compile_unit
 780                                                            : DW_IDX_type_unit);
 781                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
 782                     m_abbrev_table.append_unsigned_leb128 (value.is_static
 783                                                            ? DW_IDX_GNU_internal
 784                                                            : DW_IDX_GNU_external);
 785                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
 786
 787                     /* Terminate attributes list.  */
 788                     m_abbrev_table.append_unsigned_leb128 (0);
 789                     m_abbrev_table.append_unsigned_leb128 (0);
 790                   }
 791
 792                 m_entry_pool.append_unsigned_leb128 (idx);
 793                 m_entry_pool.append_unsigned_leb128 (value.cu_index);
 794               }
 795
 796             /* Terminate the list of CUs.  */
 797             m_entry_pool.append_unsigned_leb128 (0);
 798           }
 799       }
 800     gdb_assert (m_hash_table.size () == name_count);
 801
 802     /* Terminate tags list.  */
 803     m_abbrev_table.append_unsigned_leb128 (0);
 804   }
 805
 806   /* Return .debug_names bucket count.  This must be called only after
 807      calling the build method.  */
 808   uint32_t bucket_count () const
 809   {
 810     /* Verify the build method has been already called.  */
 811     gdb_assert (!m_abbrev_table.empty ());
 812     const uint32_t retval = m_bucket_table.size ();
 813
 814     /* Check for overflow.  */
 815     gdb_assert (retval == m_bucket_table.size ());
 816     return retval;
 817   }
 818
 819   /* Return .debug_names names count.  This must be called only after
 820      calling the build method.  */
 821   uint32_t name_count () const
 822   {
 823     /* Verify the build method has been already called.  */
 824     gdb_assert (!m_abbrev_table.empty ());
 825     const uint32_t retval = m_hash_table.size ();
 826
 827     /* Check for overflow.  */
 828     gdb_assert (retval == m_hash_table.size ());
 829     return retval;
 830   }
 831
 832   /* Return number of bytes of .debug_names abbreviation table.  This
 833      must be called only after calling the build method.  */
 834   uint32_t abbrev_table_bytes () const
 835   {
 836     gdb_assert (!m_abbrev_table.empty ());
 837     return m_abbrev_table.size ();
 838   }
 839
 840   /* Recurse into all "included" dependencies and store their symbols
 841      as if they appeared in this psymtab.  */
 842   void recursively_write_psymbols
 843     (struct objfile *objfile,
 844      partial_symtab *psymtab,
 845      std::unordered_set<partial_symbol *> &psyms_seen,
 846      int cu_index)
 847   {
 848     for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 849       if (psymtab->dependencies[i]->user != NULL)
 850         recursively_write_psymbols
 851           (objfile, psymtab->dependencies[i], psyms_seen, cu_index);
 852
 853     write_psymbols (psyms_seen, psymtab->global_psymbols,
 854                     cu_index, false, unit_kind::cu);
 855     write_psymbols (psyms_seen, psymtab->static_psymbols,
 856                     cu_index, true, unit_kind::cu);
 857   }
 858
 859   /* Return number of bytes the .debug_names section will have.  This
 860      must be called only after calling the build method.  */
 861   size_t bytes () const
 862   {
 863     /* Verify the build method has been already called.  */
 864     gdb_assert (!m_abbrev_table.empty ());
 865     size_t expected_bytes = 0;
 866     expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
 867     expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
 868     expected_bytes += m_name_table_string_offs.bytes ();
 869     expected_bytes += m_name_table_entry_offs.bytes ();
 870     expected_bytes += m_abbrev_table.size ();
 871     expected_bytes += m_entry_pool.size ();
 872     return expected_bytes;
 873   }
 874
 875   /* Write .debug_names to FILE_NAMES and .debug_str addition to
 876      FILE_STR.  This must be called only after calling the build
 877      method.  */
 878   void file_write (FILE *file_names, FILE *file_str) const
 879   {
 880     /* Verify the build method has been already called.  */
 881     gdb_assert (!m_abbrev_table.empty ());
 882     ::file_write (file_names, m_bucket_table);
 883     ::file_write (file_names, m_hash_table);
 884     m_name_table_string_offs.file_write (file_names);
 885     m_name_table_entry_offs.file_write (file_names);
 886     m_abbrev_table.file_write (file_names);
 887     m_entry_pool.file_write (file_names);
 888     m_debugstrlookup.file_write (file_str);
 889   }
 890
 891 private:
 892
 893   /* Storage for symbol names mapping them to their .debug_str section
 894      offsets.  */
 895   class debug_str_lookup
 896   {
 897   public:
 898
 899     /* Object constructor to be called for current DWARF2_PER_OBJFILE.
 900        All .debug_str section strings are automatically stored.  */
 901     debug_str_lookup (dwarf2_per_objfile *per_objfile)
 902       : m_abfd (per_objfile->objfile->obfd),
 903         m_per_objfile (per_objfile)
 904     {
 905       per_objfile->per_bfd->str.read (per_objfile->objfile);
 906       if (per_objfile->per_bfd->str.buffer == NULL)
 907         return;
 908       for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
 909            data < (per_objfile->per_bfd->str.buffer
 910                    + per_objfile->per_bfd->str.size);)
 911         {
 912           const char *const s = reinterpret_cast<const char *> (data);
 913           const auto insertpair
 914             = m_str_table.emplace (c_str_view (s),
 915                                    data - per_objfile->per_bfd->str.buffer);
 916           if (!insertpair.second)
 917             complaint (_("Duplicate string \"%s\" in "
 918                          ".debug_str section [in module %s]"),
 919                        s, bfd_get_filename (m_abfd));
 920           data += strlen (s) + 1;
 921         }
 922     }
 923
 924     /* Return offset of symbol name S in the .debug_str section.  Add
 925        such symbol to the section's end if it does not exist there
 926        yet.  */
 927     size_t lookup (const char *s)
 928     {
 929       const auto it = m_str_table.find (c_str_view (s));
 930       if (it != m_str_table.end ())
 931         return it->second;
 932       const size_t offset = (m_per_objfile->per_bfd->str.size
 933                              + m_str_add_buf.size ());
 934       m_str_table.emplace (c_str_view (s), offset);
 935       m_str_add_buf.append_cstr0 (s);
 936       return offset;
 937     }
 938
 939     /* Append the end of the .debug_str section to FILE.  */
 940     void file_write (FILE *file) const
 941     {
 942       m_str_add_buf.file_write (file);
 943     }
 944
 945   private:
 946     std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
 947     bfd *const m_abfd;
 948     dwarf2_per_objfile *m_per_objfile;
 949
 950     /* Data to add at the end of .debug_str for new needed symbol names.  */
 951     data_buf m_str_add_buf;
 952   };
 953
 954   /* Container to map used DWARF tags to their .debug_names abbreviation
 955      tags.  */
 956   class index_key
 957   {
 958   public:
 959     index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
 960       : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
 961     {
 962     }
 963
 964     bool
 965     operator== (const index_key &other) const
 966     {
 967       return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
 968               && kind == other.kind);
 969     }
 970
 971     const int dwarf_tag;
 972     const bool is_static;
 973     const unit_kind kind;
 974   };
 975
 976   /* Provide std::unordered_map::hasher for index_key.  */
 977   class index_key_hasher
 978   {
 979   public:
 980     size_t
 981     operator () (const index_key &key) const
 982     {
 983       return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
 984     }
 985   };
 986
 987   /* Parameters of one symbol entry.  */
 988   class symbol_value
 989   {
 990   public:
 991     const int dwarf_tag, cu_index;
 992     const bool is_static;
 993     const unit_kind kind;
 994
 995     symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
 996                   unit_kind kind_)
 997       : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
 998         kind (kind_)
 999     {}
1000
1001     bool
1002     operator< (const symbol_value &other) const
1003     {
1004 #define X(n) \
1005   do \
1006     { \
1007       if (n < other.n) \
1008         return true; \
1009       if (n > other.n) \
1010         return false; \
1011     } \
1012   while (0)
1013       X (dwarf_tag);
1014       X (is_static);
1015       X (kind);
1016       X (cu_index);
1017 #undef X
1018       return false;
1019     }
1020   };
1021
1022   /* Abstract base class to unify DWARF-32 and DWARF-64 name table
1023      output.  */
1024   class offset_vec
1025   {
1026   protected:
1027     const bfd_endian dwarf5_byte_order;
1028   public:
1029     explicit offset_vec (bfd_endian dwarf5_byte_order_)
1030       : dwarf5_byte_order (dwarf5_byte_order_)
1031     {}
1032
1033     /* Call std::vector::reserve for NELEM elements.  */
1034     virtual void reserve (size_t nelem) = 0;
1035
1036     /* Call std::vector::push_back with store_unsigned_integer byte
1037        reordering for ELEM.  */
1038     virtual void push_back_reorder (size_t elem) = 0;
1039
1040     /* Return expected output size in bytes.  */
1041     virtual size_t bytes () const = 0;
1042
1043     /* Write name table to FILE.  */
1044     virtual void file_write (FILE *file) const = 0;
1045   };
1046
1047   /* Template to unify DWARF-32 and DWARF-64 output.  */
1048   template<typename OffsetSize>
1049   class offset_vec_tmpl : public offset_vec
1050   {
1051   public:
1052     explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
1053       : offset_vec (dwarf5_byte_order_)
1054     {}
1055
1056     /* Implement offset_vec::reserve.  */
1057     void reserve (size_t nelem) override
1058     {
1059       m_vec.reserve (nelem);
1060     }
1061
1062     /* Implement offset_vec::push_back_reorder.  */
1063     void push_back_reorder (size_t elem) override
1064     {
1065       m_vec.push_back (elem);
1066       /* Check for overflow.  */
1067       gdb_assert (m_vec.back () == elem);
1068       store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
1069                               sizeof (m_vec.back ()), dwarf5_byte_order, elem);
1070     }
1071
1072     /* Implement offset_vec::bytes.  */
1073     size_t bytes () const override
1074     {
1075       return m_vec.size () * sizeof (m_vec[0]);
1076     }
1077
1078     /* Implement offset_vec::file_write.  */
1079     void file_write (FILE *file) const override
1080     {
1081       ::file_write (file, m_vec);
1082     }
1083
1084   private:
1085     std::vector<OffsetSize> m_vec;
1086   };
1087
1088   /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
1089      respecting name table width.  */
1090   class dwarf
1091   {
1092   public:
1093     offset_vec &name_table_string_offs, &name_table_entry_offs;
1094
1095     dwarf (offset_vec &name_table_string_offs_,
1096            offset_vec &name_table_entry_offs_)
1097       : name_table_string_offs (name_table_string_offs_),
1098         name_table_entry_offs (name_table_entry_offs_)
1099     {
1100     }
1101   };
1102
1103   /* Template to unify DWARF-32 and DWARF-64 .debug_names output
1104      respecting name table width.  */
1105   template<typename OffsetSize>
1106   class dwarf_tmpl : public dwarf
1107   {
1108   public:
1109     explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
1110       : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
1111         m_name_table_string_offs (dwarf5_byte_order_),
1112         m_name_table_entry_offs (dwarf5_byte_order_)
1113     {}
1114
1115   private:
1116     offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
1117     offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
1118   };
1119
1120   /* Try to reconstruct original DWARF tag for given partial_symbol.
1121      This function is not DWARF-5 compliant but it is sufficient for
1122      GDB as a DWARF-5 index consumer.  */
1123   static int psymbol_tag (const struct partial_symbol *psym)
1124   {
1125     domain_enum domain = psym->domain;
1126     enum address_class aclass = psym->aclass;
1127
1128     switch (domain)
1129       {
1130       case VAR_DOMAIN:
1131         switch (aclass)
1132           {
1133           case LOC_BLOCK:
1134             return DW_TAG_subprogram;
1135           case LOC_TYPEDEF:
1136             return DW_TAG_typedef;
1137           case LOC_COMPUTED:
1138           case LOC_CONST_BYTES:
1139           case LOC_OPTIMIZED_OUT:
1140           case LOC_STATIC:
1141             return DW_TAG_variable;
1142           case LOC_CONST:
1143             /* Note: It's currently impossible to recognize psyms as enum values
1144                short of reading the type info.  For now punt.  */
1145             return DW_TAG_variable;
1146           default:
1147             /* There are other LOC_FOO values that one might want to classify
1148                as variables, but dwarf2read.c doesn't currently use them.  */
1149             return DW_TAG_variable;
1150           }
1151       case STRUCT_DOMAIN:
1152         return DW_TAG_structure_type;
1153       case MODULE_DOMAIN:
1154         return DW_TAG_module;
1155       default:
1156         return 0;
1157       }
1158   }
1159
1160   /* Call insert for all partial symbols and mark them in PSYMS_SEEN.  */
1161   void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
1162                        const std::vector<partial_symbol *> &symbols,
1163                        int cu_index, bool is_static, unit_kind kind)
1164   {
1165     for (partial_symbol *psym : symbols)
1166       {
1167         /* Only add a given psymbol once.  */
1168         if (psyms_seen.insert (psym).second)
1169           insert (psym, cu_index, is_static, kind);
1170       }
1171   }
1172
1173   /* Store value of each symbol.  */
1174   std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
1175     m_name_to_value_set;
1176
1177   /* Tables of DWARF-5 .debug_names.  They are in object file byte
1178      order.  */
1179   std::vector<uint32_t> m_bucket_table;
1180   std::vector<uint32_t> m_hash_table;
1181
1182   const bfd_endian m_dwarf5_byte_order;
1183   dwarf_tmpl<uint32_t> m_dwarf32;
1184   dwarf_tmpl<uint64_t> m_dwarf64;
1185   dwarf &m_dwarf;
1186   offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
1187   debug_str_lookup m_debugstrlookup;
1188
1189   /* Map each used .debug_names abbreviation tag parameter to its
1190      index value.  */
1191   std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1192
1193   /* Next unused .debug_names abbreviation tag for
1194      m_indexkey_to_idx.  */
1195   int m_idx_next = 1;
1196
1197   /* .debug_names abbreviation table.  */
1198   data_buf m_abbrev_table;
1199
1200   /* .debug_names entry pool.  */
1201   data_buf m_entry_pool;
1202
1203   /* Temporary storage for Ada names.  */
1204   auto_obstack m_string_obstack;
1205 };
1206
1207 /* Return iff any of the needed offsets does not fit into 32-bit
1208    .debug_names section.  */
1209
1210 static bool
1211 check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
1212 {
1213   for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
1214     {
1215       if (to_underlying (per_cu->sect_off)
1216           >= (static_cast<uint64_t> (1) << 32))
1217         return true;
1218     }
1219   return false;
1220 }
1221
1222 /* The psyms_seen set is potentially going to be largish (~40k
1223    elements when indexing a -g3 build of GDB itself).  Estimate the
1224    number of elements in order to avoid too many rehashes, which
1225    require rebuilding buckets and thus many trips to
1226    malloc/free.  */
1227
1228 static size_t
1229 psyms_seen_size (dwarf2_per_objfile *per_objfile)
1230 {
1231   size_t psyms_count = 0;
1232   for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
1233     {
1234       partial_symtab *psymtab = per_cu->v.psymtab;
1235
1236       if (psymtab != NULL && psymtab->user == NULL)
1237         recursively_count_psymbols (psymtab, psyms_count);
1238     }
1239   /* Generating an index for gdb itself shows a ratio of
1240      TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5.  4 seems like a good bet.  */
1241   return psyms_count / 4;
1242 }
1243
1244 /* Assert that FILE's size is EXPECTED_SIZE.  Assumes file's seek
1245    position is at the end of the file.  */
1246
1247 static void
1248 assert_file_size (FILE *file, size_t expected_size)
1249 {
1250   const auto file_size = ftell (file);
1251   if (file_size == -1)
1252     perror_with_name (("ftell"));
1253   gdb_assert (file_size == expected_size);
1254 }
1255
1256 /* Write a gdb index file to OUT_FILE from all the sections passed as
1257    arguments.  */
1258
1259 static void
1260 write_gdbindex_1 (FILE *out_file,
1261                   const data_buf &cu_list,
1262                   const data_buf &types_cu_list,
1263                   const data_buf &addr_vec,
1264                   const data_buf &symtab_vec,
1265                   const data_buf &constant_pool)
1266 {
1267   data_buf contents;
1268   const offset_type size_of_header = 6 * sizeof (offset_type);
1269   offset_type total_len = size_of_header;
1270
1271   /* The version number.  */
1272   contents.append_offset (8);
1273
1274   /* The offset of the CU list from the start of the file.  */
1275   contents.append_offset (total_len);
1276   total_len += cu_list.size ();
1277
1278   /* The offset of the types CU list from the start of the file.  */
1279   contents.append_offset (total_len);
1280   total_len += types_cu_list.size ();
1281
1282   /* The offset of the address table from the start of the file.  */
1283   contents.append_offset (total_len);
1284   total_len += addr_vec.size ();
1285
1286   /* The offset of the symbol table from the start of the file.  */
1287   contents.append_offset (total_len);
1288   total_len += symtab_vec.size ();
1289
1290   /* The offset of the constant pool from the start of the file.  */
1291   contents.append_offset (total_len);
1292   total_len += constant_pool.size ();
1293
1294   gdb_assert (contents.size () == size_of_header);
1295
1296   contents.file_write (out_file);
1297   cu_list.file_write (out_file);
1298   types_cu_list.file_write (out_file);
1299   addr_vec.file_write (out_file);
1300   symtab_vec.file_write (out_file);
1301   constant_pool.file_write (out_file);
1302
1303   assert_file_size (out_file, total_len);
1304 }
1305
1306 /* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1307    If OBJFILE has an associated dwz file, write contents of a .gdb_index
1308    section for that dwz file into DWZ_OUT_FILE.  If OBJFILE does not have an
1309    associated dwz file, DWZ_OUT_FILE must be NULL.  */
1310
1311 static void
1312 write_gdbindex (dwarf2_per_objfile *per_objfile, FILE *out_file,
1313                 FILE *dwz_out_file)
1314 {
1315   struct objfile *objfile = per_objfile->objfile;
1316   mapped_symtab symtab;
1317   data_buf objfile_cu_list;
1318   data_buf dwz_cu_list;
1319
1320   /* While we're scanning CU's create a table that maps a psymtab pointer
1321      (which is what addrmap records) to its index (which is what is recorded
1322      in the index file).  This will later be needed to write the address
1323      table.  */
1324   psym_index_map cu_index_htab;
1325   cu_index_htab.reserve (per_objfile->per_bfd->all_comp_units.size ());
1326
1327   /* Store out the .debug_type CUs, if any.  */
1328   data_buf types_cu_list;
1329
1330   /* The CU list is already sorted, so we don't need to do additional
1331      work here.  Also, the debug_types entries do not appear in
1332      all_comp_units, but only in their own hash table.  */
1333
1334   std::unordered_set<partial_symbol *> psyms_seen
1335     (psyms_seen_size (per_objfile));
1336   int counter = 0;
1337   int types_counter = 0;
1338   for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1339     {
1340       dwarf2_per_cu_data *per_cu
1341         = per_objfile->per_bfd->all_comp_units[i].get ();
1342       partial_symtab *psymtab = per_cu->v.psymtab;
1343
1344       int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1345
1346       if (psymtab != NULL)
1347         {
1348           if (psymtab->user == NULL)
1349             recursively_write_psymbols (objfile, psymtab, &symtab,
1350                                         psyms_seen, this_counter);
1351
1352           const auto insertpair = cu_index_htab.emplace (psymtab,
1353                                                          this_counter);
1354           gdb_assert (insertpair.second);
1355         }
1356
1357       /* The all_comp_units list contains CUs read from the objfile as well as
1358          from the eventual dwz file.  We need to place the entry in the
1359          corresponding index.  */
1360       data_buf &cu_list = (per_cu->is_debug_types
1361                            ? types_cu_list
1362                            : per_cu->is_dwz ? dwz_cu_list : objfile_cu_list);
1363       cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1364                            to_underlying (per_cu->sect_off));
1365       if (per_cu->is_debug_types)
1366         {
1367           signatured_type *sig_type = (signatured_type *) per_cu;
1368           cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1369                                to_underlying (sig_type->type_offset_in_tu));
1370           cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1371                                sig_type->signature);
1372         }
1373       else
1374         cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1375
1376       ++this_counter;
1377     }
1378
1379   /* Dump the address map.  */
1380   data_buf addr_vec;
1381   write_address_map (per_objfile->per_bfd, addr_vec, cu_index_htab);
1382
1383   /* Now that we've processed all symbols we can shrink their cu_indices
1384      lists.  */
1385   uniquify_cu_indices (&symtab);
1386
1387   data_buf symtab_vec, constant_pool;
1388   if (symtab.n_elements == 0)
1389     symtab.data.resize (0);
1390
1391   write_hash_table (&symtab, symtab_vec, constant_pool);
1392
1393   write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1394                    symtab_vec, constant_pool);
1395
1396   if (dwz_out_file != NULL)
1397     write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1398   else
1399     gdb_assert (dwz_cu_list.empty ());
1400 }
1401
1402 /* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension.  */
1403 static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1404
1405 /* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1406    needed addition to .debug_str section to OUT_FILE_STR.  Return how
1407    many bytes were expected to be written into OUT_FILE.  */
1408
1409 static void
1410 write_debug_names (dwarf2_per_objfile *per_objfile,
1411                    FILE *out_file, FILE *out_file_str)
1412 {
1413   const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1414   struct objfile *objfile = per_objfile->objfile;
1415   const enum bfd_endian dwarf5_byte_order
1416     = gdbarch_byte_order (objfile->arch ());
1417
1418   /* The CU list is already sorted, so we don't need to do additional
1419      work here.  Also, the debug_types entries do not appear in
1420      all_comp_units, but only in their own hash table.  */
1421   data_buf cu_list;
1422   data_buf types_cu_list;
1423   debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
1424   std::unordered_set<partial_symbol *>
1425     psyms_seen (psyms_seen_size (per_objfile));
1426   int counter = 0;
1427   int types_counter = 0;
1428   for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1429     {
1430       const dwarf2_per_cu_data *per_cu
1431         = per_objfile->per_bfd->all_comp_units[i].get ();
1432       partial_symtab *psymtab = per_cu->v.psymtab;
1433
1434       int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1435       data_buf &this_list = per_cu->is_debug_types ? types_cu_list : cu_list;
1436
1437       if (psymtab != nullptr && psymtab->user == nullptr)
1438         nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen,
1439                                               this_counter);
1440
1441       this_list.append_uint (nametable.dwarf5_offset_size (),
1442                              dwarf5_byte_order,
1443                              to_underlying (per_cu->sect_off));
1444       ++this_counter;
1445     }
1446
1447    /* Verify that all units are represented.  */
1448   gdb_assert (counter == (per_objfile->per_bfd->all_comp_units.size ()
1449                           - per_objfile->per_bfd->tu_stats.nr_tus));
1450   gdb_assert (types_counter == per_objfile->per_bfd->tu_stats.nr_tus);
1451
1452   nametable.build ();
1453
1454   /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC.  */
1455
1456   const offset_type bytes_of_header
1457     = ((dwarf5_is_dwarf64 ? 12 : 4)
1458        + 2 + 2 + 7 * 4
1459        + sizeof (dwarf5_gdb_augmentation));
1460   size_t expected_bytes = 0;
1461   expected_bytes += bytes_of_header;
1462   expected_bytes += cu_list.size ();
1463   expected_bytes += types_cu_list.size ();
1464   expected_bytes += nametable.bytes ();
1465   data_buf header;
1466
1467   if (!dwarf5_is_dwarf64)
1468     {
1469       const uint64_t size64 = expected_bytes - 4;
1470       gdb_assert (size64 < 0xfffffff0);
1471       header.append_uint (4, dwarf5_byte_order, size64);
1472     }
1473   else
1474     {
1475       header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1476       header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1477     }
1478
1479   /* The version number.  */
1480   header.append_uint (2, dwarf5_byte_order, 5);
1481
1482   /* Padding.  */
1483   header.append_uint (2, dwarf5_byte_order, 0);
1484
1485   /* comp_unit_count - The number of CUs in the CU list.  */
1486   header.append_uint (4, dwarf5_byte_order, counter);
1487
1488   /* local_type_unit_count - The number of TUs in the local TU
1489      list.  */
1490   header.append_uint (4, dwarf5_byte_order, types_counter);
1491
1492   /* foreign_type_unit_count - The number of TUs in the foreign TU
1493      list.  */
1494   header.append_uint (4, dwarf5_byte_order, 0);
1495
1496   /* bucket_count - The number of hash buckets in the hash lookup
1497      table.  */
1498   header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1499
1500   /* name_count - The number of unique names in the index.  */
1501   header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1502
1503   /* abbrev_table_size - The size in bytes of the abbreviations
1504      table.  */
1505   header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1506
1507   /* augmentation_string_size - The size in bytes of the augmentation
1508      string.  This value is rounded up to a multiple of 4.  */
1509   static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1510   header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1511   header.append_array (dwarf5_gdb_augmentation);
1512
1513   gdb_assert (header.size () == bytes_of_header);
1514
1515   header.file_write (out_file);
1516   cu_list.file_write (out_file);
1517   types_cu_list.file_write (out_file);
1518   nametable.file_write (out_file, out_file_str);
1519
1520   assert_file_size (out_file, expected_bytes);
1521 }
1522
1523 /* This represents an index file being written (work-in-progress).
1524
1525    The data is initially written to a temporary file.  When the finalize method
1526    is called, the file is closed and moved to its final location.
1527
1528    On failure (if this object is being destroyed with having called finalize),
1529    the temporary file is closed and deleted.  */
1530
1531 struct index_wip_file
1532 {
1533   index_wip_file (const char *dir, const char *basename,
1534                   const char *suffix)
1535   {
1536     filename = (std::string (dir) + SLASH_STRING + basename
1537                 + suffix);
1538
1539     filename_temp = make_temp_filename (filename);
1540
1541     scoped_fd out_file_fd = gdb_mkostemp_cloexec (filename_temp.data (),
1542                                                   O_BINARY);
1543     if (out_file_fd.get () == -1)
1544       perror_with_name (("mkstemp"));
1545
1546     out_file = out_file_fd.to_file ("wb");
1547
1548     if (out_file == nullptr)
1549       error (_("Can't open `%s' for writing"), filename_temp.data ());
1550
1551     unlink_file.emplace (filename_temp.data ());
1552   }
1553
1554   void finalize ()
1555   {
1556     /* We want to keep the file.  */
1557     unlink_file->keep ();
1558
1559     /* Close and move the str file in place.  */
1560     unlink_file.reset ();
1561     if (rename (filename_temp.data (), filename.c_str ()) != 0)
1562       perror_with_name (("rename"));
1563   }
1564
1565   std::string filename;
1566   gdb::char_vector filename_temp;
1567
1568   /* Order matters here; we want FILE to be closed before
1569      FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1570      delete a file that is still open.  So, we wrap the unlinker in an
1571      optional and emplace it once we know the file name.  */
1572   gdb::optional<gdb::unlinker> unlink_file;
1573
1574   gdb_file_up out_file;
1575 };
1576
1577 /* See dwarf-index-write.h.  */
1578
1579 void
1580 write_psymtabs_to_index (dwarf2_per_objfile *per_objfile, const char *dir,
1581                          const char *basename, const char *dwz_basename,
1582                          dw_index_kind index_kind)
1583 {
1584   dwarf2_per_bfd *per_bfd = per_objfile->per_bfd;
1585   struct objfile *objfile = per_objfile->objfile;
1586
1587   if (per_objfile->per_bfd->using_index)
1588     error (_("Cannot use an index to create the index"));
1589
1590   if (per_objfile->per_bfd->types.size () > 1)
1591     error (_("Cannot make an index when the file has multiple .debug_types sections"));
1592
1593   if (per_bfd->partial_symtabs == nullptr
1594       || !per_bfd->partial_symtabs->psymtabs
1595       || !per_bfd->partial_symtabs->psymtabs_addrmap)
1596     return;
1597
1598   struct stat st;
1599   if (stat (objfile_name (objfile), &st) < 0)
1600     perror_with_name (objfile_name (objfile));
1601
1602   const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1603                               ? INDEX5_SUFFIX : INDEX4_SUFFIX);
1604
1605   index_wip_file objfile_index_wip (dir, basename, index_suffix);
1606   gdb::optional<index_wip_file> dwz_index_wip;
1607
1608   if (dwz_basename != NULL)
1609       dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
1610
1611   if (index_kind == dw_index_kind::DEBUG_NAMES)
1612     {
1613       index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1614
1615       write_debug_names (per_objfile, objfile_index_wip.out_file.get (),
1616                          str_wip_file.out_file.get ());
1617
1618       str_wip_file.finalize ();
1619     }
1620   else
1621     write_gdbindex (per_objfile, objfile_index_wip.out_file.get (),
1622                     (dwz_index_wip.has_value ()
1623                      ? dwz_index_wip->out_file.get () : NULL));
1624
1625   objfile_index_wip.finalize ();
1626
1627   if (dwz_index_wip.has_value ())
1628     dwz_index_wip->finalize ();
1629 }
1630
1631 /* Implementation of the `save gdb-index' command.
1632
1633    Note that the .gdb_index file format used by this command is
1634    documented in the GDB manual.  Any changes here must be documented
1635    there.  */
1636
1637 static void
1638 save_gdb_index_command (const char *arg, int from_tty)
1639 {
1640   const char dwarf5space[] = "-dwarf-5 ";
1641   dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1642
1643   if (!arg)
1644     arg = "";
1645
1646   arg = skip_spaces (arg);
1647   if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1648     {
1649       index_kind = dw_index_kind::DEBUG_NAMES;
1650       arg += strlen (dwarf5space);
1651       arg = skip_spaces (arg);
1652     }
1653
1654   if (!*arg)
1655     error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1656
1657   for (objfile *objfile : current_program_space->objfiles ())
1658     {
1659       struct stat st;
1660
1661       /* If the objfile does not correspond to an actual file, skip it.  */
1662       if (stat (objfile_name (objfile), &st) < 0)
1663         continue;
1664
1665       dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
1666
1667       if (per_objfile != NULL)
1668         {
1669           try
1670             {
1671               const char *basename = lbasename (objfile_name (objfile));
1672               const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
1673               const char *dwz_basename = NULL;
1674
1675               if (dwz != NULL)
1676                 dwz_basename = lbasename (dwz->filename ());
1677
1678               write_psymtabs_to_index (per_objfile, arg, basename, dwz_basename,
1679                                        index_kind);
1680             }
1681           catch (const gdb_exception_error &except)
1682             {
1683               exception_fprintf (gdb_stderr, except,
1684                                  _("Error while writing index for `%s': "),
1685                                  objfile_name (objfile));
1686             }
1687             }
1688
1689     }
1690 }
1691
1692 void _initialize_dwarf_index_write ();
1693 void
1694 _initialize_dwarf_index_write ()
1695 {
1696   cmd_list_element *c = add_cmd ("gdb-index", class_files,
1697                                  save_gdb_index_command, _("\
1698 Save a gdb-index file.\n\
1699 Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1700 \n\
1701 No options create one file with .gdb-index extension for pre-DWARF-5\n\
1702 compatible .gdb_index section.  With -dwarf-5 creates two files with\n\
1703 extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1704                &save_cmdlist);
1705   set_cmd_completer (c, filename_completer);
1706 }