无锁hash

数据结构

LF_HASH

typedef struct st_lf_hash {
  LF_DYNARRAY array;                    /* hash itself */ 这个内部的element size 是sizeof(LF_SLIST *)
  LF_ALLOCATOR alloc;                   /* allocator for elements */
  my_hash_get_key get_key;              /* see HASH */  --》query_cache_table_get_key
  CHARSET_INFO *charset;                /* see HASH */
  lf_hash_func *hash_function;          /* see HASH */
  uint key_offset, key_length;          /* see HASH */
  uint element_size;                    /* size of memcpy'ed area on insert */
  uint flags;                           /* LF_HASH_UNIQUE, etc */
  int32 volatile size;                  /* size of array */
  int32 volatile count;                 /* number of elements in the hash */
  /**
    "Initialize" hook - called to finish initialization of object provided by
     LF_ALLOCATOR (which is pointed by "dst" parameter) and set element key
     from object passed as parameter to lf_hash_insert (pointed by "src"
     parameter). Allows to use LF_HASH with objects which are not "trivially
     copyable".
     NULL value means that element initialization is carried out by copying
     first element_size bytes from object which provided as parameter to
     lf_hash_insert.
  */
  lf_hash_init_func *initialize;
} LF_HASH;

LF_DYNARRAY 是真正存放element 的地方
存放element后，防止在使用的过程中， element 被释放了，因此需要一个allocator, 里面关键是存放pinbox，当使用一个元素时，申请一个lf_pins, 将这个指针放到lf_pins->pin中，所有已回收的LF_PINS还是在数组中，不过使用stack来管理，pinstack_top_ver就是这个栈顶。前面说每个LF_PINS都有一个purgatory链表，链表中每个元素的next指针实际上是在addr的指定偏移位置，这个位置即由free_ptr_offset指定，free_func和free_func_arg则是当某个LF_PINS的purgatory等于10时，真正释放他们的回调函数及参数

LF_DYNARRAY

## LF_DYNARRAY & LF_ALLOCATOR & LF_PINBOX

LF_DYNARRAY

#define LF_DYNARRAY_LEVEL_LENGTH 256
#define LF_DYNARRAY_LEVELS       4

typedef struct {
  void * volatile level[LF_DYNARRAY_LEVELS];
  uint size_of_element;
} LF_DYNARRAY;

level[0]：一个包含256个元素的数组，其中每个元素大小为size_of_element
level[1]：一个包含256个指针的数组，其中每个指针指向1一个同level[0]定义的数组
level[2]：一个包含256个指针的数组，其中每个指针指向1一个同level[1]定义的数组
level[3]：一个包含256个指针的数组，其中每个指针指向1一个同level[2]定义的数组

LF_ALLOCATOR

LF_ALLOCATOR是一个简易的内存分配器，它里面包含一个LF_PINBOX，所有线程首先先从pinbox中申请一个LF_PINS，然后需要申请内存时就向LF_ALLOCATOR来申请（每次申请的大小固定），将申请到的地址pin在自己的LF_PINS当中。LF_ALLOCATOR将线程free掉的内存同样是用free stack来缓存起来，先看定义：

typedef struct st_lf_allocator {
  LF_PINBOX pinbox; --》 这个这样初始化
           //lf_pinbox_init(&allocator->pinbox, 
           //      free_ptr_offset （实际上是offsetof(LF_SLIST, key)）,
           //      (lf_pinbox_free_func *)alloc_free, allocator);
                 
  uchar * volatile top;
  uint element_size;  --》 sizeof(LF_SLIST)+lf_hash->element_size
  uint32 volatile mallocs;
  lf_allocator_func *constructor; /* called, when an object is malloc()'ed */
  lf_allocator_func *destructor;  /* called, when an object is free()'d    */
} LF_ALLOCATOR;

LF_PINBOX

pinstack_top_ver 是要解决aba 的问题的

typedef struct {
  LF_DYNARRAY pinarray;  -->> lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
  lf_pinbox_free_func *free_func;   --》 (lf_pinbox_free_func *)alloc_free
  void *free_func_arg;    --》  //LF_HASH->alloc
  uint free_ptr_offset;    --> offsetof(LF_SLIST, key)
  uint32 volatile pinstack_top_ver;         /* this is a versioned pointer */ 
  uint32 volatile pins_in_array;            /* number of elements in array */
} LF_PINBOX;

LF_PINS


typedef struct st_lf_pins {
  void * volatile pin[LF_PINBOX_PINS];
  LF_PINBOX *pinbox;
  void  *purgatory;
  uint32 purgatory_count;
  uint32 volatile link;
/* we want sizeof(LF_PINS) to be 64 to avoid false sharing */
#if SIZEOF_INT*2+SIZEOF_CHARP*(LF_PINBOX_PINS+2) != 64
  char pad[64-sizeof(uint32)*2-sizeof(void*)*(LF_PINBOX_PINS+2)];
#endif
} LF_PINS;

LF_SLIST

typedef struct {
  intptr volatile link; /* a pointer to the next element in a listand a flag */
  uint32 hashnr;        /* reversed hash number, for sorting                 */
  const uchar *key;
  size_t keylen;
  /*
    data is stored here, directly after the keylen.
    thus the pointer to data is (void*)(slist_element_ptr+1)
  */
} LF_SLIST;

# # 初始化来看一下如何使用它，
query cache 中有一个LF_HASH,
初始化时是这样：

1 2	lf_hash_init(&queries, sizeof(Query_cache_block *), LF_HASH_UNIQUE, 0, 0, query_cache_query_get_key, &my_charset_bin);

真正使用上是这样

#define lf_hash_init(A, B, C, D, E, F, G) \
          lf_hash_init2(A, B, C, D, E, F, G, NULL, NULL, NULL, NULL)
void lf_hash_init2(LF_HASH *hash, uint element_size, uint flags,
                   uint key_offset, uint key_length, my_hash_get_key get_key,
                   CHARSET_INFO *charset, lf_hash_func *hash_function,
                   lf_allocator_func *ctor, lf_allocator_func *dtor,
                   lf_hash_init_func *init);

看看它的数据结构，基本就和初始化部分比较吻合

初始化里面，比较简单，就几个注意点

1 2	hash->charset= charset ? charset : &my_charset_bin; hash->hash_function= hash_function ? hash_function : cset_hash_sort_adapter;

介绍2个子数据结构的初始化 hash->array，在lf_hash_init2函数中

lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));

void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
{
  memset(array, 0, sizeof(*array));
  array->size_of_element= element_size;
}

数据结构如下

第二个子数据结构 hash->alloc，在函数 lf_hash_init2 中
初始化调用

1 2	lf_alloc_init2(&hash->alloc, sizeof(LF_SLIST)+lf_hash->element_size, offsetof(LF_SLIST, key), ctor, dtor);

再来看看数据结构

我们再来看pinbox 数据结构


//初始化过程
void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
                    lf_pinbox_free_func *free_func, void *free_func_arg)
{
  DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
  compile_time_assert(sizeof(LF_PINS) == 64);
  lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
  pinbox->pinstack_top_ver= 0;
  pinbox->pins_in_array= 0;
  pinbox->free_ptr_offset= free_ptr_offset;
  pinbox->free_func= free_func; 
  pinbox->free_func_arg= free_func_arg; 
}

使用

获取pin

lf_hash_get_pins(&queries)

先看一下pin的数据结构

LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) {
  uint32 pins, next, top_ver;
  LF_PINS *el;
  /*
    We have an array of max. 64k elements.
    The highest index currently allocated is pinbox->pins_in_array.
    Freed elements are in a lifo stack, pinstack_top_ver.
    pinstack_top_ver is 32 bits; 16 low bits are the index in the
    array, to the first element of the list. 16 high bits are a version
    (every time the 16 low bits are updated, the 16 high bits are
    incremented). Versioning prevents the ABA problem.
  */
  // 先检查free stack中是否有之前别的线程还回来的
  top_ver = pinbox->pinstack_top_ver;
  do {
    // 因为pinbox->pinarray最多64K个元素，而pinbox->pinstack_top_ver是32bit，
    // 所以他只有低16位是真正的stack top idx，高16位是version
    // 所以这里给top_ver取模LF_PINBOX_MAX_PINS便可以拿到stack top idx
    if (!(pins = top_ver % LF_PINBOX_MAX_PINS)) {
      /* the stack of free elements is empty */
      // 如果stack top index为0代表当前free stack里没有，则扩展pinbox->pinarray
      // 的元素个数，加1，取一个新的位置
      pins = pinbox->pins_in_array.fetch_add(1) + 1;
      if (unlikely(pins >= LF_PINBOX_MAX_PINS)) {
        return 0;
      }
      /*
        note that the first allocated element has index 1 (pins==1).
        index 0 is reserved to mean "NULL pointer"
      */
      拿到新位置对应在pinbox->pins_in_array中的元素地址
      el = (LF_PINS *)lf_dynarray_lvalue(&pinbox->pinarray, pins);
      if (unlikely(!el)) {
        return 0;
      }
      break;
    }
    // 如果free stack里有元素，则弹出top位置的元素
    el = (LF_PINS *)lf_dynarray_value(&pinbox->pinarray, pins);
    next = el->link;
    // CAS更新pinbox->pinstack_top_ver，后面+LF_PINBOX_MAX_PINS是为了
    // 更新前16位的version,这样，每次修改top位置之后，其version都会加1，然后
    // 在CAS中同时比较top idx和version来避免ABA问题
  } while (!atomic_compare_exchange_strong(
      &pinbox->pinstack_top_ver, &top_ver,
      top_ver - pins + next + LF_PINBOX_MAX_PINS));
  /*
    set el->link to the index of el in the dynarray (el->link has two usages:
    - if element is allocated, it's its own index
    - if element is free, it's its next element in the free stack
  */
  // 当LF_PINS分配出去的时候，其link指向的是自己在pinbox->pins_in_array
  // 中的index
  // 当LF_PINS被还回来push到free stack中时，其link指向的是next element
  el->link = pins;
  el->purgatory_count = 0;
  el->pinbox = pinbox;
  return el;
}

Longda's Interesting World

MySQL 源码解读 -- 无锁hash

无锁hash

数据结构

LF_HASH

LF_DYNARRAY

LF_DYNARRAY

LF_DYNARRAY

LF_ALLOCATOR

LF_PINBOX

LF_PINS

LF_SLIST

使用

获取pin