diff --git a/_gdb.spec.Patch.include b/_gdb.spec.Patch.include index c5b0468..fe6b4d1 100644 --- a/_gdb.spec.Patch.include +++ b/_gdb.spec.Patch.include @@ -207,3 +207,23 @@ Patch047: gdb-rhbz2233965-memory-leak.patch # generating a gdb-index file (RH BZ 1773651). Patch048: gdb-rhbz1773651-gdb-index-internal-error.patch +# Back-port upstream commit 1f0fab7ff86 as part of a fix for +# non-deterministic gdb-index generation (RH BZ 2232086). +Patch049: gdb-rhbz2232086-refactor-selftest-support.patch + +# Back-port upstream commit aa19bc1d259 as part of a fix for +# non-deterministic gdb-index generation (RH BZ 2232086). +Patch050: gdb-rhbz-2232086-reduce-size-of-gdb-index.patch + +# Back-port upstream commit acc117b57f7 as part of a fix for +# non-deterministic gdb-index generation (RH BZ 2232086). +Patch051: gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch + +# Back-port upstream commit aff250145af as part of a fix for +# non-deterministic gdb-index generation (RH BZ 2232086). +Patch052: gdb-rhbz-2232086-generate-gdb-index-consistently.patch + +# Back-port upstream commit 3644f41dc80 as part of a fix for +# non-deterministic gdb-index generation (RH BZ 2232086). +Patch053: gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch + diff --git a/_gdb.spec.patch.include b/_gdb.spec.patch.include index ef55dd0..8ec13e9 100644 --- a/_gdb.spec.patch.include +++ b/_gdb.spec.patch.include @@ -46,3 +46,8 @@ %patch -p1 -P046 %patch -p1 -P047 %patch -p1 -P048 +%patch -p1 -P049 +%patch -p1 -P050 +%patch -p1 -P051 +%patch -p1 -P052 +%patch -p1 -P053 diff --git a/_patch_order b/_patch_order index e5f5b36..713f850 100644 --- a/_patch_order +++ b/_patch_order @@ -46,3 +46,8 @@ gdb-bz2237392-dwarf-obstack-allocation.patch gdb-rhbz2233961-CVE-2022-4806.patch gdb-rhbz2233965-memory-leak.patch gdb-rhbz1773651-gdb-index-internal-error.patch +gdb-rhbz2232086-refactor-selftest-support.patch +gdb-rhbz-2232086-reduce-size-of-gdb-index.patch +gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch +gdb-rhbz-2232086-generate-gdb-index-consistently.patch +gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch diff --git a/gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch b/gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch new file mode 100644 index 0000000..4accca6 --- /dev/null +++ b/gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch @@ -0,0 +1,264 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess +Date: Sat, 25 Nov 2023 10:35:37 +0000 +Subject: gdb-rhbz-2232086-cpp-ify-mapped-symtab.patch + +;; Back-port upstream commit acc117b57f7 as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb: C++-ify mapped_symtab from dwarf2/index-write.c + +Make static the functions add_index_entry, find_slot, and hash_expand, +member functions of the mapped_symtab class. + +Fold an additional snippet of code from write_gdbindex into +mapped_symtab::minimize, this code relates to minimisation, so this +seems like a good home for it. + +Make the n_elements, data, and m_string_obstack member variables of +mapped_symtab private. Provide a new obstack() member function to +provide access to the obstack when needed, and also add member +functions begin(), end(), cbegin(), and cend() so that the +mapped_symtab class can be treated like a contained and iterated +over. + +I've also taken this opportunity to split out the logic for whether +the hash table (m_data) needs expanding, this is the new function +hash_needs_expanding. This will be useful in a later commit. + +There should be no user visible changes after this commit. + +Approved-By: Tom Tromey + +diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c +--- a/gdb/dwarf2/index-write.c ++++ b/gdb/dwarf2/index-write.c +@@ -187,86 +187,135 @@ struct mapped_symtab + { + mapped_symtab () + { +- data.resize (1024); ++ m_data.resize (1024); + } + +- /* Minimize each entry in the symbol table, removing duplicates. */ ++ /* If there are no elements in the symbol table, then reduce the table ++ size to zero. Otherwise call symtab_index_entry::minimize each entry ++ in the symbol table. */ ++ + void minimize () + { +- for (symtab_index_entry &item : data) ++ if (m_element_count == 0) ++ m_data.resize (0); ++ ++ for (symtab_index_entry &item : m_data) + item.minimize (); + } + +- offset_type n_elements = 0; +- std::vector data; ++ /* Add an entry to SYMTAB. NAME is the name of the symbol. CU_INDEX is ++ the index of the CU in which the symbol appears. IS_STATIC is one if ++ the symbol is static, otherwise zero (global). */ ++ ++ void add_index_entry (const char *name, int is_static, ++ gdb_index_symbol_kind kind, offset_type cu_index); ++ ++ /* Access the obstack. */ ++ struct obstack *obstack () ++ { return &m_string_obstack; } ++ ++private: ++ ++ /* Find a slot in SYMTAB for the symbol NAME. Returns a reference to ++ the slot. ++ ++ Function is used only during write_hash_table so no index format ++ backward compatibility is needed. */ ++ ++ symtab_index_entry &find_slot (const char *name); ++ ++ /* Expand SYMTAB's hash table. */ ++ ++ void hash_expand (); ++ ++ /* Return true if the hash table in data needs to grow. */ ++ ++ bool hash_needs_expanding () const ++ { return 4 * m_element_count / 3 >= m_data.size (); } ++ ++ /* A vector that is used as a hash table. */ ++ std::vector m_data; ++ ++ /* The number of elements stored in the m_data hash. */ ++ offset_type m_element_count = 0; + + /* Temporary storage for names. */ + auto_obstack m_string_obstack; +-}; + +-/* Find a slot in SYMTAB for the symbol NAME. Returns a reference to +- the slot. ++public: ++ using iterator = decltype (m_data)::iterator; ++ using const_iterator = decltype (m_data)::const_iterator; + +- Function is used only during write_hash_table so no index format backward +- compatibility is needed. */ ++ iterator begin () ++ { return m_data.begin (); } + +-static symtab_index_entry & +-find_slot (struct mapped_symtab *symtab, const char *name) ++ iterator end () ++ { return m_data.end (); } ++ ++ const_iterator cbegin () ++ { return m_data.cbegin (); } ++ ++ const_iterator cend () ++ { return m_data.cend (); } ++}; ++ ++/* See class definition. */ ++ ++symtab_index_entry & ++mapped_symtab::find_slot (const char *name) + { + offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name); + +- index = hash & (symtab->data.size () - 1); +- step = ((hash * 17) & (symtab->data.size () - 1)) | 1; ++ index = hash & (m_data.size () - 1); ++ step = ((hash * 17) & (m_data.size () - 1)) | 1; + + for (;;) + { +- if (symtab->data[index].name == NULL +- || strcmp (name, symtab->data[index].name) == 0) +- return symtab->data[index]; +- index = (index + step) & (symtab->data.size () - 1); ++ if (m_data[index].name == NULL ++ || strcmp (name, m_data[index].name) == 0) ++ return m_data[index]; ++ index = (index + step) & (m_data.size () - 1); + } + } + +-/* Expand SYMTAB's hash table. */ ++/* See class definition. */ + +-static void +-hash_expand (struct mapped_symtab *symtab) ++void ++mapped_symtab::hash_expand () + { +- auto old_entries = std::move (symtab->data); ++ auto old_entries = std::move (m_data); + +- symtab->data.clear (); +- symtab->data.resize (old_entries.size () * 2); ++ gdb_assert (m_data.size () == 0); ++ m_data.resize (old_entries.size () * 2); + + for (auto &it : old_entries) + if (it.name != NULL) + { +- auto &ref = find_slot (symtab, it.name); ++ auto &ref = this->find_slot (it.name); + ref = std::move (it); + } + } + +-/* Add an entry to SYMTAB. NAME is the name of the symbol. +- CU_INDEX is the index of the CU in which the symbol appears. +- IS_STATIC is one if the symbol is static, otherwise zero (global). */ ++/* See class definition. */ + +-static void +-add_index_entry (struct mapped_symtab *symtab, const char *name, +- int is_static, gdb_index_symbol_kind kind, +- offset_type cu_index) ++void ++mapped_symtab::add_index_entry (const char *name, int is_static, ++ gdb_index_symbol_kind kind, ++ offset_type cu_index) + { +- symtab_index_entry *slot = &find_slot (symtab, name); ++ symtab_index_entry *slot = &this->find_slot (name); + if (slot->name == NULL) + { + /* This is a new element in the hash table. */ +- ++symtab->n_elements; ++ ++this->m_element_count; + + /* We might need to grow the hash table. */ +- if (4 * symtab->n_elements / 3 >= symtab->data.size ()) ++ if (this->hash_needs_expanding ()) + { +- hash_expand (symtab); ++ this->hash_expand (); + + /* This element will have a different slot in the new table. */ +- slot = &find_slot (symtab, name); ++ slot = &this->find_slot (name); + + /* But it should still be a new element in the hash table. */ + gdb_assert (slot->name == nullptr); +@@ -388,7 +437,7 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) + + /* We add all the index vectors to the constant pool first, to + ensure alignment is ok. */ +- for (symtab_index_entry &entry : symtab->data) ++ for (symtab_index_entry &entry : *symtab) + { + if (entry.name == NULL) + continue; +@@ -417,7 +466,7 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) + + /* Now write out the hash table. */ + std::unordered_map str_table; +- for (const auto &entry : symtab->data) ++ for (const auto &entry : *symtab) + { + offset_type str_off, vec_off; + +@@ -1149,7 +1198,7 @@ write_cooked_index (cooked_index_vector *table, + const auto it = cu_index_htab.find (entry->per_cu); + gdb_assert (it != cu_index_htab.cend ()); + +- const char *name = entry->full_name (&symtab->m_string_obstack); ++ const char *name = entry->full_name (symtab->obstack ()); + + if (entry->per_cu->lang () == language_ada) + { +@@ -1169,7 +1218,7 @@ write_cooked_index (cooked_index_vector *table, + gdb, it has to use the encoded name, with any + suffixes stripped. */ + std::string encoded = ada_encode (name, false); +- name = obstack_strdup (&symtab->m_string_obstack, ++ name = obstack_strdup (symtab->obstack (), + encoded.c_str ()); + } + } +@@ -1202,8 +1251,8 @@ write_cooked_index (cooked_index_vector *table, + else + kind = GDB_INDEX_SYMBOL_KIND_TYPE; + +- add_index_entry (symtab, name, (entry->flags & IS_STATIC) != 0, +- kind, it->second); ++ symtab->add_index_entry (name, (entry->flags & IS_STATIC) != 0, ++ kind, it->second); + } + } + +@@ -1281,8 +1330,6 @@ write_gdbindex (dwarf2_per_objfile *per_objfile, + symtab.minimize (); + + data_buf symtab_vec, constant_pool; +- if (symtab.n_elements == 0) +- symtab.data.resize (0); + + write_hash_table (&symtab, symtab_vec, constant_pool); + diff --git a/gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch b/gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch new file mode 100644 index 0000000..6b21c14 --- /dev/null +++ b/gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch @@ -0,0 +1,101 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess +Date: Mon, 27 Nov 2023 13:19:39 +0000 +Subject: gdb-rhbz-2232086-generate-dwarf-5-index-consistently.patch + +;; Back-port upstream commit 3644f41dc80 as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb: generate dwarf-5 index identically as worker-thread count changes + +Similar to the previous commit, this commit ensures that the dwarf-5 +index files are generated identically as the number of worker-threads +changes. + +Building the dwarf-5 index makes use of a closed hash table, the +bucket_hash local within debug_names::build(). Entries are added to +bucket_hash from m_name_to_value_set, which, in turn, is populated +by calls to debug_names::insert() in write_debug_names. The insert +calls are ordered based on the entries within the cooked_index, and +the ordering within cooked_index depends on the number of worker +threads that GDB is using. + +My proposal is to sort each chain within the bucket_hash closed hash +table prior to using this to build the dwarf-5 index. + +The buckets within bucket_hash will always have the same ordering (for +a given GDB build with a given executable), and by sorting the chains +within each bucket, we can be sure that GDB will see each entry in a +deterministic order. + +I've extended the index creation test to cover this case. + +Approved-By: Tom Tromey + +diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c +--- a/gdb/dwarf2/index-write.c ++++ b/gdb/dwarf2/index-write.c +@@ -453,6 +453,11 @@ class c_str_view + return strcmp (m_cstr, other.m_cstr) == 0; + } + ++ bool operator< (const c_str_view &other) const ++ { ++ return strcmp (m_cstr, other.m_cstr) < 0; ++ } ++ + /* Return the underlying C string. Note, the returned string is + only a reference with lifetime of this object. */ + const char *c_str () const +@@ -770,10 +775,18 @@ class debug_names + } + for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix) + { +- const std::forward_list &hashitlist +- = bucket_hash[bucket_ix]; ++ std::forward_list &hashitlist = bucket_hash[bucket_ix]; + if (hashitlist.empty ()) + continue; ++ ++ /* Sort the items within each bucket. This ensures that the ++ generated index files will be the same no matter the order in ++ which symbols were added into the index. */ ++ hashitlist.sort ([] (const hash_it_pair &a, const hash_it_pair &b) ++ { ++ return a.it->first < b.it->first; ++ }); ++ + uint32_t &bucket_slot = m_bucket_table[bucket_ix]; + /* The hashes array is indexed starting at 1. */ + store_unsigned_integer (reinterpret_cast (&bucket_slot), +diff --git a/gdb/testsuite/gdb.gdb/index-file.exp b/gdb/testsuite/gdb.gdb/index-file.exp +--- a/gdb/testsuite/gdb.gdb/index-file.exp ++++ b/gdb/testsuite/gdb.gdb/index-file.exp +@@ -47,6 +47,9 @@ remote_exec host "mkdir -p ${dir1}" + with_timeout_factor $timeout_factor { + gdb_test_no_output "save gdb-index $dir1" \ + "create gdb-index file" ++ ++ gdb_test_no_output "save gdb-index -dwarf-5 $dir1" \ ++ "create dwarf-index files" + } + + # Close GDB. +@@ -143,13 +146,16 @@ if { $worker_threads > 1 } { + with_timeout_factor $timeout_factor { + gdb_test_no_output "save gdb-index $dir2" \ + "create second gdb-index file" ++ ++ gdb_test_no_output "save gdb-index -dwarf-5 $dir2" \ ++ "create second dwarf-index files" + } + + # Close GDB. + gdb_exit + + # Now check that the index files are identical. +- foreach suffix { gdb-index } { ++ foreach suffix { gdb-index debug_names debug_str } { + set result \ + [remote_exec host \ + "cmp -s \"$dir1/${index_filename_base}.${suffix}\" \"$dir2/${index_filename_base}.${suffix}\""] diff --git a/gdb-rhbz-2232086-generate-gdb-index-consistently.patch b/gdb-rhbz-2232086-generate-gdb-index-consistently.patch new file mode 100644 index 0000000..d0e5c95 --- /dev/null +++ b/gdb-rhbz-2232086-generate-gdb-index-consistently.patch @@ -0,0 +1,230 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess +Date: Fri, 24 Nov 2023 12:04:36 +0000 +Subject: gdb-rhbz-2232086-generate-gdb-index-consistently.patch + +;; Back-port upstream commit aff250145af as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb: generate gdb-index identically regardless of work thread count + +It was observed that changing the number of worker threads that GDB +uses (maintenance set worker-threads NUM) would have an impact on the +layout of the generated gdb-index. + +The cause seems to be how the CU are distributed between threads, and +then symbols that appear in multiple CU can be encountered earlier or +later depending on whether a particular CU moves between threads. + +I certainly found this behaviour was reproducible when generating an +index for GDB itself, like: + + gdb -q -nx -nh -batch \ + -eiex 'maint set worker-threads NUM' \ + -ex 'save gdb-index /tmp/' + +And then setting different values for NUM will change the generated +index. + +Now, the question is: does this matter? + +I would like to suggest that yes, this does matter. At Red Hat we +generate a gdb-index as part of the build process, and we would +ideally like to have reproducible builds: for the same source, +compiled with the same tool-chain, we should get the exact same output +binary. And we do .... except for the index. + +Now we could simply force GDB to only use a single worker thread when +we build the index, but, I don't think the idea of reproducible builds +is that strange, so I think we should ensure that our generated +indexes are always reproducible. + +To achieve this, I propose that we add an extra step when building the +gdb-index file. After constructing the initial symbol hash table +contents, we will pull all the symbols out of the hash, sort them, +then re-insert them in sorted order. This will ensure that the +structure of the generated hash will remain consistent (given the same +set of symbols). + +I've extended the existing index-file test to check that the generated +index doesn't change if we adjust the number of worker threads used. +Given that this test is already rather slow, I've only made one change +to the worker-thread count. Maybe this test should be changed to use +a smaller binary, which is quicker to load, and for which we could +then try many different worker thread counts. + +Approved-By: Tom Tromey + +diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c +--- a/gdb/dwarf2/index-write.c ++++ b/gdb/dwarf2/index-write.c +@@ -210,6 +210,13 @@ struct mapped_symtab + void add_index_entry (const char *name, int is_static, + gdb_index_symbol_kind kind, offset_type cu_index); + ++ /* When entries are originally added into the data hash the order will ++ vary based on the number of worker threads GDB is configured to use. ++ This function will rebuild the hash such that the final layout will be ++ deterministic regardless of the number of worker threads used. */ ++ ++ void sort (); ++ + /* Access the obstack. */ + struct obstack *obstack () + { return &m_string_obstack; } +@@ -296,6 +303,65 @@ mapped_symtab::hash_expand () + } + } + ++/* See mapped_symtab class declaration. */ ++ ++void mapped_symtab::sort () ++{ ++ /* Move contents out of this->data vector. */ ++ std::vector original_data = std::move (m_data); ++ ++ /* Restore the size of m_data, this will avoid having to expand the hash ++ table (and rehash all elements) when we reinsert after sorting. ++ However, we do reset the element count, this allows for some sanity ++ checking asserts during the reinsert phase. */ ++ gdb_assert (m_data.size () == 0); ++ m_data.resize (original_data.size ()); ++ m_element_count = 0; ++ ++ /* Remove empty entries from ORIGINAL_DATA, this makes sorting quicker. */ ++ auto it = std::remove_if (original_data.begin (), original_data.end (), ++ [] (const symtab_index_entry &entry) -> bool ++ { ++ return entry.name == nullptr; ++ }); ++ original_data.erase (it, original_data.end ()); ++ ++ /* Sort the existing contents. */ ++ std::sort (original_data.begin (), original_data.end (), ++ [] (const symtab_index_entry &a, ++ const symtab_index_entry &b) -> bool ++ { ++ /* Return true if A is before B. */ ++ gdb_assert (a.name != nullptr); ++ gdb_assert (b.name != nullptr); ++ ++ return strcmp (a.name, b.name) < 0; ++ }); ++ ++ /* Re-insert each item from the sorted list. */ ++ for (auto &entry : original_data) ++ { ++ /* We know that ORIGINAL_DATA contains no duplicates, this data was ++ taken from a hash table that de-duplicated entries for us, so ++ count this as a new item. ++ ++ As we retained the original size of m_data (see above) then we ++ should never need to grow m_data_ during this re-insertion phase, ++ assert that now. */ ++ ++m_element_count; ++ gdb_assert (!this->hash_needs_expanding ()); ++ ++ /* Lookup a slot. */ ++ symtab_index_entry &slot = this->find_slot (entry.name); ++ ++ /* As discussed above, we should not find duplicates. */ ++ gdb_assert (slot.name == nullptr); ++ ++ /* Move this item into the slot we found. */ ++ slot = std::move (entry); ++ } ++} ++ + /* See class definition. */ + + void +@@ -1325,6 +1391,9 @@ write_gdbindex (dwarf2_per_objfile *per_objfile, + for (auto map : table->get_addrmaps ()) + write_address_map (map, addr_vec, cu_index_htab); + ++ /* Ensure symbol hash is built domestically. */ ++ symtab.sort (); ++ + /* Now that we've processed all symbols we can shrink their cu_indices + lists. */ + symtab.minimize (); +diff --git a/gdb/testsuite/gdb.gdb/index-file.exp b/gdb/testsuite/gdb.gdb/index-file.exp +--- a/gdb/testsuite/gdb.gdb/index-file.exp ++++ b/gdb/testsuite/gdb.gdb/index-file.exp +@@ -38,6 +38,9 @@ with_timeout_factor $timeout_factor { + clean_restart $filename + } + ++# Record how many worker threads GDB is using. ++set worker_threads [gdb_get_worker_threads] ++ + # Generate an index file. + set dir1 [standard_output_file "index_1"] + remote_exec host "mkdir -p ${dir1}" +@@ -116,3 +119,41 @@ proc check_symbol_table_usage { filename } { + + set index_filename_base [file tail $filename] + check_symbol_table_usage "$dir1/${index_filename_base}.gdb-index" ++ ++# If GDB is using more than 1 worker thread then reduce the number of ++# worker threads, regenerate the index, and check that we get the same ++# index file back. At one point the layout of the index would vary ++# based on the number of worker threads used. ++if { $worker_threads > 1 } { ++ # Start GDB, but don't load a file yet. ++ clean_restart ++ ++ # Adjust the number of threads to use. ++ set reduced_threads [expr $worker_threads / 2] ++ gdb_test_no_output "maint set worker-threads $reduced_threads" ++ ++ with_timeout_factor $timeout_factor { ++ # Now load the test binary. ++ gdb_file_cmd $filename ++ } ++ ++ # Generate an index file. ++ set dir2 [standard_output_file "index_2"] ++ remote_exec host "mkdir -p ${dir2}" ++ with_timeout_factor $timeout_factor { ++ gdb_test_no_output "save gdb-index $dir2" \ ++ "create second gdb-index file" ++ } ++ ++ # Close GDB. ++ gdb_exit ++ ++ # Now check that the index files are identical. ++ foreach suffix { gdb-index } { ++ set result \ ++ [remote_exec host \ ++ "cmp -s \"$dir1/${index_filename_base}.${suffix}\" \"$dir2/${index_filename_base}.${suffix}\""] ++ gdb_assert { [lindex $result 0] == 0 } \ ++ "$suffix files are identical" ++ } ++} +diff --git a/gdb/testsuite/lib/gdb.exp b/gdb/testsuite/lib/gdb.exp +--- a/gdb/testsuite/lib/gdb.exp ++++ b/gdb/testsuite/lib/gdb.exp +@@ -9274,6 +9274,21 @@ proc gdb_step_until { regexp {test_name ""} {max_steps 10} } { + } + } + ++# Return the number of worker threads that GDB is currently using. ++ ++proc gdb_get_worker_threads { {testname ""} } { ++ set worker_threads "UNKNOWN" ++ gdb_test_multiple "maintenance show worker-threads" $testname { ++ -wrap -re "The number of worker threads GDB can use is unlimited \\(currently ($::decimal)\\)\\." { ++ set worker_threads $expect_out(1,string) ++ } ++ -wrap -re "The number of worker threads GDB can use is ($::decimal)\\." { ++ set worker_threads $expect_out(1,string) ++ } ++ } ++ return $worker_threads ++} ++ + # Check if the compiler emits epilogue information associated + # with the closing brace or with the last statement line. + # diff --git a/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch b/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch new file mode 100644 index 0000000..9eaf615 --- /dev/null +++ b/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch @@ -0,0 +1,222 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess +Date: Fri, 24 Nov 2023 11:50:35 +0000 +Subject: gdb-rhbz-2232086-reduce-size-of-gdb-index.patch + +;; Back-port upstream commit aa19bc1d259 as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb: reduce size of generated gdb-index file + +I noticed in passing that out algorithm for generating the gdb-index +file is incorrect. When building the hash table in add_index_entry we +count every incoming entry rehash when the number of entries gets too +large. However, some of the incoming entries will be duplicates, +which don't actually result in new items being added to the hash +table. + +As a result, we grow the gdb-index hash table far too often. + +With an unmodified GDB, generating a gdb-index for GDB, I see a file +size of 90M, with a hash usage (in the generated index file) of just +2.6%. + +With a patched GDB, generating a gdb-index for the _same_ GDB binary, +I now see a gdb-index file size of 30M, with a hash usage of 41.9%. + +This is a 67% reduction in gdb-index file size. + +Obviously, not every gdb-index file is going to see such big savings, +however, the larger a program, and the more symbols that are +duplicated between compilation units, the more GDB would over count, +and so, over-grow the index. + +The gdb-index hash table we create has a minimum size of 1024, and +then we grow the hash when it is 75% full, doubling the hash table at +that time. Given this, then we expect that either: + + a. The hash table is size 1024, and less than 75% full, or + b. The hash table is between 37.5% and 75% full. + +I've include a test that checks some of these constraints -- I've not +bothered to check the upper limit, and over full hash table isn't +really a problem here, but if the fill percentage is less than 37.5% +then this indicates that we've done something wrong (obviously, I also +check for the 1024 minimum size). + +Approved-By: Tom Tromey + +diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c +--- a/gdb/dwarf2/index-write.c ++++ b/gdb/dwarf2/index-write.c +@@ -254,20 +254,29 @@ add_index_entry (struct mapped_symtab *symtab, const char *name, + int is_static, gdb_index_symbol_kind kind, + offset_type cu_index) + { +- offset_type cu_index_and_attrs; ++ symtab_index_entry *slot = &find_slot (symtab, name); ++ if (slot->name == NULL) ++ { ++ /* This is a new element in the hash table. */ ++ ++symtab->n_elements; + +- ++symtab->n_elements; +- if (4 * symtab->n_elements / 3 >= symtab->data.size ()) +- hash_expand (symtab); ++ /* We might need to grow the hash table. */ ++ if (4 * symtab->n_elements / 3 >= symtab->data.size ()) ++ { ++ hash_expand (symtab); + +- symtab_index_entry &slot = find_slot (symtab, name); +- if (slot.name == NULL) +- { +- slot.name = name; ++ /* This element will have a different slot in the new table. */ ++ slot = &find_slot (symtab, name); ++ ++ /* But it should still be a new element in the hash table. */ ++ gdb_assert (slot->name == nullptr); ++ } ++ ++ slot->name = name; + /* index_offset is set later. */ + } + +- cu_index_and_attrs = 0; ++ offset_type cu_index_and_attrs = 0; + DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index); + DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static); + DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind); +@@ -279,7 +288,7 @@ add_index_entry (struct mapped_symtab *symtab, const char *name, + the last entry pushed), but a symbol could have multiple kinds in one CU. + To keep things simple we don't worry about the duplication here and + sort and uniquify the list after we've processed all symbols. */ +- slot.cu_indices.push_back (cu_index_and_attrs); ++ slot->cu_indices.push_back (cu_index_and_attrs); + } + + /* See symtab_index_entry. */ +diff --git a/gdb/testsuite/gdb.gdb/index-file.exp b/gdb/testsuite/gdb.gdb/index-file.exp +new file mode 100644 +--- /dev/null ++++ b/gdb/testsuite/gdb.gdb/index-file.exp +@@ -0,0 +1,118 @@ ++# Copyright 2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# Load the GDB executable, and then 'save gdb-index', and make some ++# checks of the generated index file. ++ ++load_lib selftest-support.exp ++ ++# Can't save an index with readnow. ++if {[readnow]} { ++ untested "cannot create an index when readnow is in use" ++ return -1 ++} ++ ++# A multiplier used to ensure slow tasks are less likely to timeout. ++set timeout_factor 20 ++ ++set filename [selftest_prepare] ++if { $filename eq "" } { ++ unsupported "${gdb_test_file_name}.exp" ++ return -1 ++} ++ ++with_timeout_factor $timeout_factor { ++ # Start GDB, load FILENAME. ++ clean_restart $filename ++} ++ ++# Generate an index file. ++set dir1 [standard_output_file "index_1"] ++remote_exec host "mkdir -p ${dir1}" ++with_timeout_factor $timeout_factor { ++ gdb_test_no_output "save gdb-index $dir1" \ ++ "create gdb-index file" ++} ++ ++# Close GDB. ++gdb_exit ++ ++# Validate that the index-file FILENAME has made efficient use of its ++# symbol hash table. Calculate the number of symbols in the hash ++# table and the total hash table size. The hash table starts with ++# 1024 entries, and then doubles each time it is filled to 75%. At ++# 75% filled, doubling the size takes it to 37.5% filled. ++# ++# Thus, the hash table is correctly filled if: ++# 1. Its size is 1024 (i.e. it has not yet had its first doubling), or ++# 2. Its filled percentage is over 37% ++# ++# We could check that it is not over filled, but I don't as that's not ++# really an issue. But we did once have a bug where the table was ++# doubled incorrectly, in which case we'd see a filled percentage of ++# around 2% in some cases, which is a huge waste of disk space. ++proc check_symbol_table_usage { filename } { ++ # Open the file in binary mode and read-only mode. ++ set fp [open $filename rb] ++ ++ # Configure the channel to use binary translation. ++ fconfigure $fp -translation binary ++ ++ # Read the first 8 bytes of the file, which contain the header of ++ # the index section. ++ set header [read $fp [expr 7 * 4]] ++ ++ # Scan the header to get the version, the CU list offset, and the ++ # types CU list offset. ++ binary scan $header iiiiii version \ ++ _ _ _ symbol_table_offset shortcut_offset ++ ++ # The length of the symbol hash table (in entries). ++ set len [expr ($shortcut_offset - $symbol_table_offset) / 8] ++ ++ # Now walk the hash table and count how many entries are in use. ++ set offset $symbol_table_offset ++ set count 0 ++ while { $offset < $shortcut_offset } { ++ seek $fp $offset ++ set entry [read $fp 8] ++ binary scan $entry ii name_ptr flags ++ if { $name_ptr != 0 } { ++ incr count ++ } ++ ++ incr offset 8 ++ } ++ ++ # Close the file. ++ close $fp ++ ++ # Calculate how full the cache is. ++ set pct [expr (100 * double($count)) / $len] ++ ++ # Write our results out to the gdb.log. ++ verbose -log "Hash table size: $len" ++ verbose -log "Hash table entries: $count" ++ verbose -log "Percentage usage: $pct%" ++ ++ # The minimum fill percentage is actually 37.5%, but we give TCL a ++ # little flexibility in case the FP maths give a result a little ++ # off. ++ gdb_assert { $len == 1024 || $pct > 37 } \ ++ "symbol hash table usage" ++} ++ ++set index_filename_base [file tail $filename] ++check_symbol_table_usage "$dir1/${index_filename_base}.gdb-index" diff --git a/gdb-rhbz2232086-refactor-selftest-support.patch b/gdb-rhbz2232086-refactor-selftest-support.patch new file mode 100644 index 0000000..e9febf7 --- /dev/null +++ b/gdb-rhbz2232086-refactor-selftest-support.patch @@ -0,0 +1,77 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess +Date: Fri, 24 Nov 2023 11:10:08 +0000 +Subject: gdb-rhbz2232086-refactor-selftest-support.patch + +;; Back-port upstream commit 1f0fab7ff86 as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb/testsuite: small refactor in selftest-support.exp + +Split out the code that makes a copy of the GDB executable ready for +self testing into a new proc. A later commit in this series wants to +load the GDB executable into GDB (for creating an on-disk debug +index), but doesn't need to make use of the full do_self_tests proc. + +There should be no changes in what is tested after this commit. + +Approved-By: Tom Tromey + +diff --git a/gdb/testsuite/lib/selftest-support.exp b/gdb/testsuite/lib/selftest-support.exp +--- a/gdb/testsuite/lib/selftest-support.exp ++++ b/gdb/testsuite/lib/selftest-support.exp +@@ -92,11 +92,13 @@ proc selftest_setup { executable function } { + return 0 + } + +-# A simple way to run some self-tests. +- +-proc do_self_tests {function body} { +- global GDB tool +- ++# Prepare for running a self-test by moving the GDB executable to a ++# location where we can use it as the inferior. Return the filename ++# of the new location. ++# ++# If the current testing setup is not suitable for running a ++# self-test, then return an empty string. ++proc selftest_prepare {} { + # Are we testing with a remote board? In that case, the target + # won't have access to the GDB's auxilliary data files + # (data-directory, etc.). It's simpler to just skip. +@@ -120,19 +122,31 @@ proc do_self_tests {function body} { + # Run the test with self. Copy the file executable file in case + # this OS doesn't like to edit its own text space. + +- set GDB_FULLPATH [find_gdb $GDB] ++ set gdb_fullpath [find_gdb $::GDB] + + if {[is_remote host]} { +- set xgdb x$tool ++ set xgdb x$::tool + } else { +- set xgdb [standard_output_file x$tool] ++ set xgdb [standard_output_file x$::tool] + } + + # Remove any old copy lying around. + remote_file host delete $xgdb + ++ set filename [remote_download host $gdb_fullpath $xgdb] ++ ++ return $filename ++} ++ ++# A simple way to run some self-tests. ++ ++proc do_self_tests {function body} { ++ set file [selftest_prepare] ++ if { $file eq "" } { ++ return ++ } ++ + gdb_start +- set file [remote_download host $GDB_FULLPATH $xgdb] + + # When debugging GDB with GDB, some operations can take a relatively long + # time, especially if the build is non-optimized. Bump the timeout for the diff --git a/gdb.spec b/gdb.spec index fb99bdd..7e8141f 100644 --- a/gdb.spec +++ b/gdb.spec @@ -57,7 +57,7 @@ Version: 13.2 # The release always contains a leading reserved number, start it at 1. # `upstream' is not a part of `name' to stay fully rpm dependencies compatible for the testing. -Release: 11%{?dist} +Release: 12%{?dist} License: GPL-3.0-or-later AND BSD-3-clause AND FSFAP AND LGPL-2.1-or-later AND GPL-2.0-or-later AND LGPL-2.0-or-later AND LicenseRef-Fedora-Public-Domain AND GFDL-1.3-or-later AND LGPL-2.0-or-later WITH GCC-exception-2.0 AND GPL-3.0-or-later WITH GCC-exception-3.1 AND GPL-2.0-or-later WITH GNU-compiler-exception # Do not provide URL for snapshots as the file lasts there only for 2 days. @@ -1252,6 +1252,13 @@ fi %endif %changelog +* Tue Nov 28 2023 Andrew Burgess +- Backport upstream commits 1f0fab7ff86, aa19bc1d259, acc117b57f7, + aff250145af, and 3644f41dc80. These commits reduce the size of the + generated gdb-index file, and also ensure that the gdb-index and + dwarf-5 index are generated consistently even as the number of + worker threads that GDB uses changes (RHBZ 2232086). + * Thu Oct 19 2023 Alexandra Hájková - Remove gdb-6.5-ia64-libunwind-leak-test.patch. The patch doesn't include any actual fixes, the architecture