[binutils.git] / gdb / bcache.c

/* Implement a cached obstack.
   Written by Fred Fish <[email protected]>
   Rewritten by Jim Blandy <[email protected]>
   Copyright 1999 Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

#include <stddef.h>
#include <stdlib.h>

#include "defs.h"
#include "obstack.h"
#include "bcache.h"
#include "gdb_string.h"		/* For memcpy declaration */


\f
/* The hash function.  */

unsigned long
hash (void *addr, int length)
{
  /* If it's a short string, hash on every character.  Otherwise, sample
     characters from throughout the string.  */
  if (length <= 64)
    {
      char *byte = addr;
      unsigned long h = 0;
      int i;

      for (i = 0; i < length; i++)
	h = h * 65793 ^ (h >> (sizeof (h) * 8 - 6)) ^ byte[i];

      return h;
    }
  else
    {
      char *byte = addr;
      int n, i;
      unsigned long h = 0;

      for (n = i = 0; n < 64; n++)
	{
	  h = h * 65793 + (h >> (sizeof (h) * 8 - 6)) + byte[i];
	  i = h % length;
	}

      return h;
    }
}

\f
/* Growing the bcache's hash table.  */

/* If the average chain length grows beyond this, then we want to
   resize our hash table.  */
#define CHAIN_LENGTH_THRESHOLD (5)

static void
expand_hash_table (struct bcache *bcache)
{
  /* A table of good hash table sizes.  Whenever we grow, we pick the
     next larger size from this table.  sizes[i] is close to 1 << (i+10),
     so we roughly double the table size each time.  After we fall off 
     the end of this table, we just double.  Don't laugh --- there have
     been executables sighted with a gigabyte of debug info.  */
  static unsigned long sizes[] = { 
    1021, 2053, 4099, 8191, 16381, 32771,
    65537, 131071, 262144, 524287, 1048573, 2097143,
    4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
    268435459, 536870923, 1073741827, 2147483659UL
  };
  unsigned int new_num_buckets;
  struct bstring **new_buckets;
  unsigned int i;

  /* Find the next size.  */
  new_num_buckets = bcache->num_buckets * 2;
  for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
    if (sizes[i] > bcache->num_buckets)
      {
	new_num_buckets = sizes[i];
	break;
      }

  /* Allocate the new table.  */
  {
    size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
    new_buckets = (struct bstring **) xmalloc (new_size);
    memset (new_buckets, 0, new_size);

    bcache->structure_size -= (bcache->num_buckets
			       * sizeof (bcache->bucket[0]));
    bcache->structure_size += new_size;
  }

  /* Rehash all existing strings.  */
  for (i = 0; i < bcache->num_buckets; i++)
    {
      struct bstring *s, *next;

      for (s = bcache->bucket[i]; s; s = next)
	{
	  struct bstring **new_bucket;
	  next = s->next;

	  new_bucket = &new_buckets[(hash (&s->d.data, s->length)
				     % new_num_buckets)];
	  s->next = *new_bucket;
	  *new_bucket = s;
	}
    }

  /* Plug in the new table.  */
  if (bcache->bucket)
    free (bcache->bucket);
  bcache->bucket = new_buckets;
  bcache->num_buckets = new_num_buckets;
}

\f
/* Looking up things in the bcache.  */

/* The number of bytes needed to allocate a struct bstring whose data
   is N bytes long.  */
#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))

/* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
   never seen those bytes before, add a copy of them to BCACHE.  In
   either case, return a pointer to BCACHE's copy of that string.  */
void *
bcache (void *addr, int length, struct bcache *bcache)
{
  int hash_index;
  struct bstring *s;

  /* If our average chain length is too high, expand the hash table.  */
  if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
    expand_hash_table (bcache);

  bcache->total_count++;
  bcache->total_size += length;

  hash_index = hash (addr, length) % bcache->num_buckets;

  /* Search the hash bucket for a string identical to the caller's.  */
  for (s = bcache->bucket[hash_index]; s; s = s->next)
    if (s->length == length
	&& ! memcmp (&s->d.data, addr, length))
      return &s->d.data;

  /* The user's string isn't in the list.  Insert it after *ps.  */
  {
    struct bstring *new
      = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
    memcpy (&new->d.data, addr, length);
    new->length = length;
    new->next = bcache->bucket[hash_index];
    bcache->bucket[hash_index] = new;

    bcache->unique_count++;
    bcache->unique_size += length;
    bcache->structure_size += BSTRING_SIZE (length);

    return &new->d.data;
  }
}

\f
/* Freeing bcaches.  */

/* Free all the storage associated with BCACHE.  */
void
free_bcache (struct bcache *bcache)
{
  obstack_free (&bcache->cache, 0);
  free (bcache->bucket);

  /* This isn't necessary, but at least the bcache is always in a
     consistent state.  */
  memset (bcache, 0, sizeof (*bcache));
}


\f
/* Printing statistics.  */

static int
compare_ints (const void *ap, const void *bp)
{
  /* Because we know we're comparing two ints which are positive,
     there's no danger of overflow here.  */
  return * (int *) ap - * (int *) bp;
}


static void
print_percentage (int portion, int total)
{
  if (total == 0)
    printf_filtered ("(not applicable)\n");
  else
    printf_filtered ("%3d%%\n", portion * 100 / total);
}


/* Print statistics on BCACHE's memory usage and efficacity at
   eliminating duplication.  NAME should describe the kind of data
   BCACHE holds.  Statistics are printed using `printf_filtered' and
   its ilk.  */
void
print_bcache_statistics (struct bcache *c, char *type)
{
  int occupied_buckets;
  int max_chain_length;
  int median_chain_length;

  /* Count the number of occupied buckets, and measure chain lengths.  */
  {
    unsigned int b;
    int *chain_length
      = (int *) alloca (c->num_buckets * sizeof (*chain_length));

    occupied_buckets = 0;

    for (b = 0; b < c->num_buckets; b++)
      {
	struct bstring *s = c->bucket[b];

	chain_length[b] = 0;

	if (s)
	  {
	    occupied_buckets++;
	    
	    while (s)
	      {
		chain_length[b]++;
		s = s->next;
	      }
	  }
      }

    /* To compute the median, we need the set of chain lengths sorted.  */
    qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
	   compare_ints);

    if (c->num_buckets > 0)
      {
	max_chain_length = chain_length[c->num_buckets - 1];
	median_chain_length = chain_length[c->num_buckets / 2];
      }
    else
      {
	max_chain_length = 0;
	median_chain_length = 0;
      }
  }

  printf_filtered ("  Cached '%s' statistics:\n", type);
  printf_filtered ("    Total object count:  %ld\n", c->total_count);
  printf_filtered ("    Unique object count: %lu\n", c->unique_count);
  printf_filtered ("    Percentage of duplicates, by count: ");
  print_percentage (c->total_count - c->unique_count, c->total_count);
  printf_filtered ("\n");

  printf_filtered ("    Total object size:   %ld\n", c->total_size);
  printf_filtered ("    Unique object size:  %ld\n", c->unique_size);
  printf_filtered ("    Percentage of duplicates, by size:  ");
  print_percentage (c->total_size - c->unique_size, c->total_size);
  printf_filtered ("\n");

  printf_filtered ("    Total memory used by bcache, including overhead: %ld\n",
		   c->structure_size);
  printf_filtered ("    Percentage memory overhead: ");
  print_percentage (c->structure_size - c->unique_size, c->unique_size);
  printf_filtered ("    Net memory savings:         ");
  print_percentage (c->total_size - c->structure_size, c->total_size);
  printf_filtered ("\n");

  printf_filtered ("    Hash table size:           %3d\n", c->num_buckets);
  printf_filtered ("    Hash table population:     ");
  print_percentage (occupied_buckets, c->num_buckets);
  printf_filtered ("    Median hash chain length:  %3d\n",
		   median_chain_length);
  printf_filtered ("    Average hash chain length: ");
  if (c->num_buckets > 0)
    printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
  else
    printf_filtered ("(not applicable)\n");
  printf_filtered ("    Maximum hash chain length: %3d\n", max_chain_length);
  printf_filtered ("\n");
}
Commit	Line	Data
c906108c	1	/* Implement a cached obstack.
c2d11a7d JM	2	Written by Fred Fish <[email protected]>
	3	Rewritten by Jim Blandy <[email protected]>
	4	Copyright 1999 Free Software Foundation, Inc.
c906108c	5
c5aa993b	6	This file is part of GDB.
c906108c	7
c5aa993b JM	8	This program is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2 of the License, or
	11	(at your option) any later version.
c906108c	12
c5aa993b JM	13	This program is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
c906108c	17
c5aa993b JM	18	You should have received a copy of the GNU General Public License
	19	along with this program; if not, write to the Free Software
	20	Foundation, Inc., 59 Temple Place - Suite 330,
	21	Boston, MA 02111-1307, USA. */
c906108c	22
c2d11a7d JM	23	#include <stddef.h>
	24	#include <stdlib.h>
	25
c906108c SS	26	#include "defs.h"
	27	#include "obstack.h"
	28	#include "bcache.h"
	29	#include "gdb_string.h" /* For memcpy declaration */
	30
c906108c	31
c2d11a7d JM	32	\f
c2d11a7d JM	33	/* The hash function. */
c906108c	34
c2d11a7d JM	35	unsigned long
	36	hash (void *addr, int length)
	37	{
	38	/* If it's a short string, hash on every character. Otherwise, sample
	39	characters from throughout the string. */
	40	if (length <= 64)
	41	{
	42	char *byte = addr;
	43	unsigned long h = 0;
	44	int i;
c906108c	45
c2d11a7d JM	46	for (i = 0; i < length; i++)
c2d11a7d JM	47	h = h * 65793 ^ (h >> (sizeof (h) * 8 - 6)) ^ byte[i];
c906108c	48
c2d11a7d JM	49	return h;
	50	}
	51	else
c906108c	52	{
c2d11a7d JM	53	char *byte = addr;
	54	int n, i;
	55	unsigned long h = 0;
	56
	57	for (n = i = 0; n < 64; n++)
	58	{
	59	h = h * 65793 + (h >> (sizeof (h) * 8 - 6)) + byte[i];
	60	i = h % length;
	61	}
	62
	63	return h;
c906108c	64	}
c906108c SS	65	}
c906108c SS	66
c2d11a7d JM	67	\f
	68	/* Growing the bcache's hash table. */
	69
	70	/* If the average chain length grows beyond this, then we want to
	71	resize our hash table. */
	72	#define CHAIN_LENGTH_THRESHOLD (5)
	73
	74	static void
	75	expand_hash_table (struct bcache *bcache)
c906108c	76	{
c2d11a7d JM	77	/* A table of good hash table sizes. Whenever we grow, we pick the
	78	next larger size from this table. sizes[i] is close to 1 << (i+10),
	79	so we roughly double the table size each time. After we fall off
	80	the end of this table, we just double. Don't laugh --- there have
	81	been executables sighted with a gigabyte of debug info. */
	82	static unsigned long sizes[] = {
	83	1021, 2053, 4099, 8191, 16381, 32771,
	84	65537, 131071, 262144, 524287, 1048573, 2097143,
	85	4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
	86	268435459, 536870923, 1073741827, 2147483659UL
	87	};
745b8ca0	88	unsigned int new_num_buckets;
c2d11a7d	89	struct bstring **new_buckets;
745b8ca0	90	unsigned int i;
c2d11a7d JM	91
c2d11a7d JM	92	/* Find the next size. */
745b8ca0	93	new_num_buckets = bcache->num_buckets * 2;
c2d11a7d JM	94	for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
	95	if (sizes[i] > bcache->num_buckets)
	96	{
	97	new_num_buckets = sizes[i];
	98	break;
	99	}
c2d11a7d JM	100
	101	/* Allocate the new table. */
	102	{
	103	size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
	104	new_buckets = (struct bstring **) xmalloc (new_size);
	105	memset (new_buckets, 0, new_size);
	106
	107	bcache->structure_size -= (bcache->num_buckets
	108	* sizeof (bcache->bucket[0]));
	109	bcache->structure_size += new_size;
	110	}
c906108c	111
c2d11a7d JM	112	/* Rehash all existing strings. */
c2d11a7d JM	113	for (i = 0; i < bcache->num_buckets; i++)
c906108c	114	{
c2d11a7d JM	115	struct bstring s, next;
	116
	117	for (s = bcache->bucket[i]; s; s = next)
c906108c	118	{
c2d11a7d JM	119	struct bstring **new_bucket;
	120	next = s->next;
	121
	122	new_bucket = &new_buckets[(hash (&s->d.data, s->length)
	123	% new_num_buckets)];
	124	s->next = *new_bucket;
	125	*new_bucket = s;
c906108c SS	126	}
c906108c SS	127	}
c2d11a7d JM	128
	129	/* Plug in the new table. */
	130	if (bcache->bucket)
	131	free (bcache->bucket);
	132	bcache->bucket = new_buckets;
	133	bcache->num_buckets = new_num_buckets;
c906108c SS	134	}
c906108c SS	135
c2d11a7d JM	136	\f
	137	/* Looking up things in the bcache. */
	138
	139	/* The number of bytes needed to allocate a struct bstring whose data
	140	is N bytes long. */
	141	#define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
	142
	143	/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has
	144	never seen those bytes before, add a copy of them to BCACHE. In
	145	either case, return a pointer to BCACHE's copy of that string. */
c906108c	146	void *
c2d11a7d	147	bcache (void addr, int length, struct bcache bcache)
c906108c	148	{
c2d11a7d JM	149	int hash_index;
c2d11a7d JM	150	struct bstring *s;
c906108c	151
c2d11a7d JM	152	/* If our average chain length is too high, expand the hash table. */
	153	if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
	154	expand_hash_table (bcache);
	155
	156	bcache->total_count++;
	157	bcache->total_size += length;
	158
	159	hash_index = hash (addr, length) % bcache->num_buckets;
	160
	161	/* Search the hash bucket for a string identical to the caller's. */
	162	for (s = bcache->bucket[hash_index]; s; s = s->next)
	163	if (s->length == length
	164	&& ! memcmp (&s->d.data, addr, length))
	165	return &s->d.data;
	166
	167	/* The user's string isn't in the list. Insert it after ps. /
	168	{
	169	struct bstring *new
	170	= obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
	171	memcpy (&new->d.data, addr, length);
	172	new->length = length;
	173	new->next = bcache->bucket[hash_index];
	174	bcache->bucket[hash_index] = new;
	175
	176	bcache->unique_count++;
	177	bcache->unique_size += length;
	178	bcache->structure_size += BSTRING_SIZE (length);
	179
	180	return &new->d.data;
	181	}
c906108c SS	182	}
c906108c SS	183
c2d11a7d JM	184	\f
	185	/* Freeing bcaches. */
	186
	187	/* Free all the storage associated with BCACHE. */
c906108c	188	void
c2d11a7d	189	free_bcache (struct bcache *bcache)
c906108c	190	{
c2d11a7d JM	191	obstack_free (&bcache->cache, 0);
c2d11a7d JM	192	free (bcache->bucket);
c906108c	193
c2d11a7d JM	194	/* This isn't necessary, but at least the bcache is always in a
	195	consistent state. */
	196	memset (bcache, 0, sizeof (*bcache));
	197	}
	198
	199
	200	\f
	201	/* Printing statistics. */
	202
	203	static int
	204	compare_ints (const void ap, const void bp)
	205	{
	206	/* Because we know we're comparing two ints which are positive,
	207	there's no danger of overflow here. */
	208	return * (int ) ap - (int *) bp;
	209	}
	210
	211
	212	static void
	213	print_percentage (int portion, int total)
	214	{
	215	if (total == 0)
	216	printf_filtered ("(not applicable)\n");
c906108c	217	else
c2d11a7d JM	218	printf_filtered ("%3d%%\n", portion * 100 / total);
	219	}
	220
	221
	222	/* Print statistics on BCACHE's memory usage and efficacity at
	223	eliminating duplication. NAME should describe the kind of data
	224	BCACHE holds. Statistics are printed using `printf_filtered' and
	225	its ilk. */
	226	void
	227	print_bcache_statistics (struct bcache c, char type)
	228	{
	229	int occupied_buckets;
	230	int max_chain_length;
	231	int median_chain_length;
	232
	233	/* Count the number of occupied buckets, and measure chain lengths. */
	234	{
745b8ca0	235	unsigned int b;
c2d11a7d JM	236	int *chain_length
	237	= (int ) alloca (c->num_buckets sizeof (*chain_length));
	238
	239	occupied_buckets = 0;
	240
	241	for (b = 0; b < c->num_buckets; b++)
	242	{
	243	struct bstring *s = c->bucket[b];
	244
	245	chain_length[b] = 0;
	246
	247	if (s)
	248	{
	249	occupied_buckets++;
	250
	251	while (s)
	252	{
	253	chain_length[b]++;
	254	s = s->next;
	255	}
	256	}
	257	}
	258
	259	/* To compute the median, we need the set of chain lengths sorted. */
	260	qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
	261	compare_ints);
	262
	263	if (c->num_buckets > 0)
	264	{
	265	max_chain_length = chain_length[c->num_buckets - 1];
	266	median_chain_length = chain_length[c->num_buckets / 2];
	267	}
	268	else
	269	{
	270	max_chain_length = 0;
	271	median_chain_length = 0;
	272	}
	273	}
	274
	275	printf_filtered (" Cached '%s' statistics:\n", type);
	276	printf_filtered (" Total object count: %ld\n", c->total_count);
745b8ca0	277	printf_filtered (" Unique object count: %lu\n", c->unique_count);
c2d11a7d JM	278	printf_filtered (" Percentage of duplicates, by count: ");
	279	print_percentage (c->total_count - c->unique_count, c->total_count);
	280	printf_filtered ("\n");
	281
	282	printf_filtered (" Total object size: %ld\n", c->total_size);
	283	printf_filtered (" Unique object size: %ld\n", c->unique_size);
	284	printf_filtered (" Percentage of duplicates, by size: ");
	285	print_percentage (c->total_size - c->unique_size, c->total_size);
	286	printf_filtered ("\n");
	287
	288	printf_filtered (" Total memory used by bcache, including overhead: %ld\n",
	289	c->structure_size);
	290	printf_filtered (" Percentage memory overhead: ");
	291	print_percentage (c->structure_size - c->unique_size, c->unique_size);
	292	printf_filtered (" Net memory savings: ");
	293	print_percentage (c->total_size - c->structure_size, c->total_size);
	294	printf_filtered ("\n");
	295
	296	printf_filtered (" Hash table size: %3d\n", c->num_buckets);
	297	printf_filtered (" Hash table population: ");
	298	print_percentage (occupied_buckets, c->num_buckets);
	299	printf_filtered (" Median hash chain length: %3d\n",
	300	median_chain_length);
	301	printf_filtered (" Average hash chain length: ");
	302	if (c->num_buckets > 0)
745b8ca0	303	printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
c906108c	304	else
c2d11a7d JM	305	printf_filtered ("(not applicable)\n");
	306	printf_filtered (" Maximum hash chain length: %3d\n", max_chain_length);
	307	printf_filtered ("\n");
c906108c	308	}