SlideShare a Scribd company logo
1 of 37
Download to read offline
PHP7 HASHTABLE
wangtong@panda.tv
2017-07
 PHP7 Array Featrue
 PHP7 HashTable Struct
 HashTable Collisions
 Translation Table
 PHP7 HashTable Operation
 Example, Run php-src By GDB
 Packed HashTables
 Empty HashTable
 Application Scenarios
 PHP5 Hashtable
 Redis Hashtable
Agenda
We already know about php array
• Array 为 HashTable实现
• map,可字符数字做索引建
• Foreach 快于 for
• Foreach 顺序为插入顺序
• Count($arr) 快
• In_array 慢
• 理想情况Find 时间复杂度为 O(1)
• PHP有current,prev,next,each,end,reset等指针函数
PHP7 HashTable Struct
https://github.com/php/php-src/blob/PHP-7.0.11/Zend/zend_types.h#L176
/* ht struct */
struct Bucket {
zval val;
zend_ulong h;
zend_string *key;
} Bucket;
struct HashTable {
zend_refcounted_h gc;
union {
struct { ZEND_ENDIAN_LOHI_4( /*... ellipsis ...*/) } v;
uint32_t flags;
} u;
uint32_t nTableMask; // -nTableSize, -16; (uint)-16==4294967280
Bucket *arData; // array items,
uint32_t nNumUsed; // next slot available in arData
uint32_t nNumOfElements; // total num of busy elements in arData
uint32_t nTableSize; // table size, always a power of two, min:8
uint32_t nInternalPointer; // Used for iteration
zend_long nNextFreeElement; // next integer-based key available
dtor_func_t pDestructor; // data destructor
};
struct zval {
zend_value value; /* value */
union {
struct {
ZEND_ENDIAN_LOHI_4(
zend_uchar type, /* active type */
zend_uchar type_flags,
zend_uchar const_flags,
zend_uchar reserved) /*call info for EX(This*/
} v;
uint32_t type_info;
} u1;
union {
uint32_t var_flags;
uint32_t next; /* hash collision chain */
uint32_t cache_slot; /* literal cache slot */
uint32_t lineno; /* line number (for ast nodes) */
uint32_t num_args; /* arguments number for EX(This) */
uint32_t fe_pos; /* foreach position */
uint32_t fe_iter_idx; /* foreach iterator index */
} u2;
};
union zend_value {
zend_long lval;
double dval;
zend_refcounted *counted;
zend_string *str;
zend_array *arr;
zend_object *obj;
zend_resource *res;
zend_reference *ref;
zend_ast_ref *ast;
zval *zv;
void *ptr;
zend_class_entry *ce;
zend_function *func;
struct {
uint32_t w1;
uint32_t w2;
} ww;
};
Hashtable collisions
链接法:桶是一个可容纳多个数据的数据结构(例如链表或红黑树)
开放寻址法:所有元素都存放在槽中(装载因子<0.5可考虑)
//Hashtable collisions attack
$beginTime = microtime(true);
$size = pow(2,16);
$k = 0;
for($i=0;$i<$size;$i++){
$arr[$k] = 0;
$k += $size;
}
echo microtime(true) - $beginTime;
echo PHP_EOL;
//php5.6 28s, php7 7s
Translation Table
Hash layout
#define HT_HASH_SIZE(nTableMask) (((size_t)(uint32_t)-(int32_t)(nTableMask)) *
sizeof(uint32_t))
#define HT_DATA_SIZE(nTableSize) ((size_t)(nTableSize) * sizeof(Bucket))
#define HT_SIZE_EX(nTableSize, nTableMask) (HT_DATA_SIZE((nTableSize)) +
HT_HASH_SIZE((nTableMask)))
#define HT_SIZE(ht) HT_SIZE_EX((ht)->nTableSize, (ht)->nTableMask)
Bucket *arData;
arData = emalloc(HT_SIZE(ht)); /* now alloc this */
panda.tv tone shop … Bucket
…
val value …
… … … u1
-1 -1 1 u2.next
92236014990
29192316
0 65536 … h h …
0x… 0x… 0x… … *key *key nTableSize-1
0 1 2 3 4i idx=5 idx=6 7
gc …
u …
nTableMask -8
*arData 0x…
nNumUsed 3
nNumOfElements 3
nTableSize 8
nInternalPointer 0
nNextFreeElement 65537
pDestructor 0x…
…
922360…
7
company
gc
h
len
val[1]
2 0
-8 -7 -6 -5 -4 -3 -2 -1
$arr = [
'company'=>'panda.tv',
0=>’tone',
65536=>'shop'
];
0x0
Translation table bucket
PHP7 HashTable :
nIndex idx
PHP7 HashTable operation
https://github.com/php/php-src/blob/PHP-7.0.11/Zend/zend_hash.c#L552
HashTable Init
ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t
pDestructor, zend_bool persistent ZEND_FILE_LINE_DC)
{
GC_REFCOUNT(ht) = 1;
GC_TYPE_INFO(ht) = IS_ARRAY;
ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION |
HASH_FLAG_STATIC_KEYS;
ht->nTableSize = zend_hash_check_size(nSize);
ht->nTableMask = HT_MIN_MASK; // ((uint32_t) -2)
HT_SET_DATA_ADDR(ht, &uninitialized_bucket); //HT_SET_DATA_ADDR(ht, ptr) do { (ht)-
>arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); } while (0)
ht->nNumUsed = 0;
ht->nNumOfElements = 0;
ht->nInternalPointer = HT_INVALID_IDX;
ht->nNextFreeElement = 0;
ht->pDestructor = pDestructor;
}
Translation Table Demo
/* @auth xuruiliang@panda.tv, 在此感谢许老板帮写的demo*/
#include <iostream>
#include <cstdlib>
#include <cassert>
using namespace std;
struct P {
int x, y;
};
const int P_SIZE = 10;
int main()
{
struct P p1 = (struct P){.x = 100, .y = 101};
P *p = (P *)malloc(P_SIZE* (sizeof(int) + sizeof(P)));
assert(p != NULL);
((int *)p)[0] = 10;
p = (P *)((int *)p + P_SIZE);
p[3] = p1;
}
static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key,
zval *pData, uint32_t flag ZEND_FILE_LINE_DC)
{
ZEND_HASH_IF_FULL_DO_RESIZE(ht); //if ((ht)->nNumUsed >= (ht)->nTableSize)
{ zend_hash_do_resize(ht); }
idx = ht->nNumUsed++; /* take the next avalaible slot number */
ht->nNumOfElements++; /* increment number of elements */
/* ... */
p = ht->arData + idx; /* Get the bucket in that slot from arData */
p->key = key; /* Affect it the key we want to insert at */
/* ... */
p->h = h = ZSTR_H(key); /* save the hash of the current key into the bucket */
ZVAL_COPY_VALUE(&p->val, pData); /* Copy the value into the bucket's value : add */
nIndex = h | ht->nTableMask; /* Get the translation table index */
// p->val.u2.next =
Z_NEXT(p->val) = HT_HASH(ht, nIndex); /* Put the actual element as next of us */
// ((uint32_t*)((ht)->arData))[(int32_t)(nIndex)]=((idx) * sizeof(Bucket))
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); /* Put us into the actual translation slot */
HashTable Add
Hashtable添加更新元素
• nIndex为hash后的索引,idx为顺序索引
• nNumUsed+1, 用于idx,>nTableSize将触发扩容
• nNumOfElements+1, 记录实际元素个数
• nTableSize, 用于申请内存空间的大小
• nNextFreeElement+1, 用于下一个自增数字索引值
• p->val.u2.next 实现了hash冲突的解决
• p = ht->arData + idx, Bucket内存地址
Hashtable del
ZEND_API int ZEND_FASTCALL zend_hash_del(HashTable *ht, zend_string *key)
{
/* ... */
h = zend_string_hash_val(key); /* get the hash from the key (assuming string key here) */
nIndex = h | ht->nTableMask; /* get the translation table index */
idx = HT_HASH(ht, nIndex); /* Get the slot corresponding to that translation index */
while (idx != HT_INVALID_IDX) { /* If there is a corresponding slot */
p = HT_HASH_TO_BUCKET(ht, idx); /* Get the bucket from that slot */
if ((p->key == key) || /* Is it the right bucket ? same key pointer ? */
(p->h == h && /* ... or same hash */
p->key && /* and a key (string key based) */
ZSTR_LEN(p->key) == ZSTR_LEN(key) && /* and same key length */
memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) { /* and same key content ? */
_zend_hash_del_el_ex(ht, idx, p, prev); /* that's us ! delete us */
return SUCCESS;
}
prev = p;
idx = Z_NEXT(p->val); /* get the next corresponding slot from current one */
}
return FAILURE;
}
Hash fragmentation, resizing and compacting
HashTable Resize
static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht)
{
IS_CONSISTENT(ht);
HT_ASSERT(GC_REFCOUNT(ht) == 1);
if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //只有到一定阈值才进行rehash操作
HANDLE_BLOCK_INTERRUPTIONS();
zend_hash_rehash(ht); //重建索引数组
HANDLE_UNBLOCK_INTERRUPTIONS();
} else if (ht->nTableSize < HT_MAX_SIZE) { //扩大为两倍
void *new_data, *old_data = HT_GET_DATA_ADDR(ht);
uint32_t nSize = ht->nTableSize + ht->nTableSize;
Bucket *old_buckets = ht->arData;
HANDLE_BLOCK_INTERRUPTIONS();
new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT); //新分配arData空间,大小
为:(sizeof(Bucket) + sizeof(uint32_t)) * nSize
ht->nTableSize = nSize;
ht->nTableMask = -ht->nTableSize; //nTableSize负值
HT_SET_DATA_ADDR(ht, new_data); //将arData指针偏移到Bucket数组起始位置
memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //将旧的Bucket数组拷到新空间
pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT); //释放旧空间
zend_hash_rehash(ht); //重建索引数组
HANDLE_UNBLOCK_INTERRUPTIONS();
} else {
zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht-
>nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
}
}
Example, Run php-src By GDB
git clone -b PHP-7.0.11 git@github.com:php/php-src.git
cd php-src
~/php-src> ./buildconf
~/php-src> ./configure --disable-all --enable-debug --prefix=$HOME/php-debug
~/php-src> make
~/php-src> make install
gdb --args bin/php -f hashtable-debug.php
break /home/1/php-src/Zend/zend_hash.c:839 if h==589
break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key14")==0
break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key2")==0
break /home/1/php-src/Zend/zend_hash.c:839 if h==4153
break /home/1/php-src/Zend/zend_hash.c:561 if strncmp((char *)&key->val,"class_exists",key-
>len)==0
GDB调试方案
<?PHP
$tmp_user = array(
'name'=>'wangtong',
'worker_id'=>'P589',
'589'=>'see-nNextFreeElement', //here @1
'company'=>'panda.tv',
'email'=>'wangtong@panda.tv',
'location'=>'bj-soho-18',
'department01' => 'g-biz',
'department02' => 'g-tech',
1006440989 => 'see-nTableSize',
'key10' => 'pandatv.com',
'key11' => 'shop.gate.panda.tv',
'key12' => 'mall.gate.panda.tv',
'key13' => 'bag.gate.panda.tv',
'key14' => 'see-nTableSize', // here, @2
);
foreach($tmp_user as $k=>$v){
$user_info[$k]=$v; //here @1 @2
}
unset($user_info['worker_id']);
unset($user_info['589']);
unset($user_info['company']);
unset($user_info['email']);
unset($user_info['location']);
unset($user_info['department01']);
unset($user_info['department02']);
unset($user_info['1006440989']);
$user_info['key2']='see_nNumUsed'; // here @3
unset($user_info['key10']);
unset($user_info['key11']);
unset($user_info['key12']);
unset($user_info['key13']);
unset($user_info['key14']);
$user_info['key3']='val3';
$user_info['key4']='val4';
$user_info['4153'] = 'see-nTableSize';//here @
运行示例
示例运行结果
nTable
Size
nNumUs
ed
nNumOfEl
e
nNextFre
eEle
Func Mark
3个item@1 16 3 3 590 _zend_hash_index_a
dd_or_update_i
Init,nTableSize=16;
nNextFreeEle=589+1
14个
item@2
16 14 14 10064409
90
_zend_hash_add_or_
update_i
nNumUsed = 14;
nNumOfElement = 14;
3个item@1 8 3 3 590 _zend_hash_index_a
dd_or_update_i
Init, nTableSize=8;
14个
item@2
16 14 14 10064409
90
_zend_hash_add_or_
update_i
nTableSize *= 2;
Unset后@3 16 15 7 10064409
90
_zend_hash_add_or_
update_i
nNumUsed!=nNumOfEle;
Hash fragmentation
5个item@4 16 5 5 10064409
90
_zend_hash_index_a
dd_or_update_i
nNumUsed = 5;
resizing and compacting;
Packed HashTables
packed hashtables
• 理解为传统意义上的‘数组’,而不是map
• 在packed hashtables中,arHash数组为NULL,查找只会直接在
arData中进行。
• packed hashtable只会作用于键递增的数组,这些数组的key之间
可以有间隔,但必须总是递增的。
• bucket->h是冗余的; bucket->key的值永远都是NULL
• 最简单的理解:用idx做索引,没有转换表,没有key.
Empty hashtable
空hash表
• arData/arHash 数组只会在插入第一个元素时分配内存
• nTableSize(8)& ht->nTableMask (0) == 0
• arHash 数组只有一个带有 INVALID_IDX 值、下标为 0 的元素
(uninitialized_bucket,并且被静态分配了内存)
• 查找时,我们会一直找到 INVALID_IDX 值,意味着 key(实际上你
只想静态分配创建一个空表)没有被找到
Application Scenarios
应用场景
• 自动扩容会导致多次分配内存及复制操作
• 数字索引比字母索引效率更高
• 不会自动缩容,nNumUsed 达到 nTableSize会压缩
• In_array效率会低
• Hash冲突还是要注意的,Dos攻击。
• Foreach的顺序为插入顺序
• 尽量使用 Packed hashtable
• Time33 hash算法适合英文词汇的hash;Time65适合大小写混写hash
• 理想情况下O(1)的时间复杂度,平均查找复杂度为O(L)
一旦 nNumUsed 达到 nTableSize,PHP会通过丢弃任何 UNDEF 的记录,自动压缩 arData 数组
PHP5 Hashtable
https://github.com/php/php-src/blob/PHP-5.6.9/Zend/zend_hash.h#L67
typedef struct _hashtable {
uint nTableSize;
uint nTableMask;
uint nNumOfElements;
ulong nNextFreeElement;
Bucket *pInternalPointer; /* Used for
element traversal */
Bucket *pListHead;
Bucket *pListTail;
Bucket **arBuckets;
dtor_func_t pDestructor;
zend_bool persistent;
unsigned char nApplyCount;
zend_bool bApplyProtection;
#if ZEND_DEBUG
int inconsistent;
#endif
} HashTable;
typedef struct bucket {
ulong h;
uint nKeyLength;
void *pData;
void *pDataPtr;
struct bucket *pListNext;
struct bucket *pListLast;
struct bucket *pNext;
struct bucket *pLast;
const char *arKey;
} Bucket;
PHP5 vs PHP7
• PHP 5.x 每个元素需要 144 bytes。在 PHP 7 中,降低到了 36 bytes,
或者打包情况下 32 bytes
• Bukets 需要单独分配16bytes内存,冗余且降低缓存效率
• Zvals 需要分开分配会产生额外头开销冗余, 16bytes
• 双向链表中的每个bucket需要4个指针用于链表的连接,32字节
• php7更少的内存占用,更好的CPU缓存利用率,更好的性能
• Php7 在线性的内存地址上进行遍历,而不是在一段内存地址随机
的链表上遍历
<?PHP
$startMemory = memory_get_usage();
//$array = range(1, 100000);
for($i=0;$i<100000; $i++){
$array[$i] = $i;
}
echo memory_get_usage() - $startMemory, "
bytesn";
$array['k'.$i] = $i;
PHP5 vs PHP7 memory
Redis HashTable
https://github.com/antirez/redis/blob/2.8/src/dict.h#L69
Redis hashtable
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in
progress if rehashidx == -1 */
int iterators; /* number of iterators
currently running */
} dict;
Redis vs PHP7
• Redis业务场景在存储,所以需要实现扩容的异步化
• Redis hgetall无序,少有顺序遍历业务场景,无需保证顺序
• Redis 使用的是 MurmurHash2,更适用于规律性强的key
感谢
• 感谢极客好人许老板教我C语言
• 感谢cap与大家给我进步的机会,同我一起学习
• 感谢以下开源贡献者
• http://jpauli.github.io/2016/04/08/hashtables.html
• http://www.laruence.com/2009/08/23/1065.html
• http://www.laruence.com/2009/07/23/994.html
• https://juejin.im/entry/58f87f1c44d9040069ca999c
• https://crispgm.com/page/php7-new-hashtable-implementation.html

More Related Content

What's hot

Yy
YyYy
Yy
yygh
 
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texte
Sai Ef
 
Debugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB TricksDebugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB Tricks
dutor
 
How to write rust instead of c and get away with it
How to write rust instead of c and get away with itHow to write rust instead of c and get away with it
How to write rust instead of c and get away with it
Flavien Raynaud
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - Compilations
HSA Foundation
 
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
CODE BLUE
 

What's hot (19)

Vcs28
Vcs28Vcs28
Vcs28
 
Php&redis presentation
Php&redis presentationPhp&redis presentation
Php&redis presentation
 
C99.php
C99.phpC99.php
C99.php
 
Basic of Exploitation
Basic of ExploitationBasic of Exploitation
Basic of Exploitation
 
Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016
 
Yy
YyYy
Yy
 
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texte
 
Cluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in CCluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in C
 
Debugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB TricksDebugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB Tricks
 
How to write rust instead of c and get away with it
How to write rust instead of c and get away with itHow to write rust instead of c and get away with it
How to write rust instead of c and get away with it
 
Sysprog17
Sysprog17Sysprog17
Sysprog17
 
Codes
CodesCodes
Codes
 
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemWprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
 
Gps c
Gps cGps c
Gps c
 
Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - Compilations
 
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
 
Npc14
Npc14Npc14
Npc14
 
Virtual Machine Constructions for Dummies
Virtual Machine Constructions for DummiesVirtual Machine Constructions for Dummies
Virtual Machine Constructions for Dummies
 

Similar to Php7 hashtable

Program 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdfProgram 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdf
ezzi552
 
Write a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdfWrite a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdf
SANDEEPARIHANT
 
Do the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdfDo the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdf
ahntagencies
 
Describe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdfDescribe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdf
arihantstoneart
 
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
odiliagilby
 
You are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdfYou are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdf
fortmdu
 
In this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdfIn this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdf
charanjit1717
 
This is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdfThis is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdf
kostikjaylonshaewe47
 
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdfC++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
aassecuritysystem
 
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docxGIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
LeonardN9WWelchw
 
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdfincludestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
galagirishp
 
In c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdfIn c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdf
rajkumarm401
 
Unit 4
Unit 4Unit 4
Unit 4
siddr
 
PHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhaustedPHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhausted
Piotr Pasich
 

Similar to Php7 hashtable (20)

Aodv routing protocol code in ns2
Aodv routing protocol code in ns2Aodv routing protocol code in ns2
Aodv routing protocol code in ns2
 
Jamming attack in wireless network
Jamming attack in wireless networkJamming attack in wireless network
Jamming attack in wireless network
 
Program 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdfProgram 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdf
 
Write a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdfWrite a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdf
 
Do the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdfDo the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdf
 
Потоки в перле изнутри
Потоки в перле изнутриПотоки в перле изнутри
Потоки в перле изнутри
 
Describe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdfDescribe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdf
 
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
 
You are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdfYou are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdf
 
In this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdfIn this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdf
 
This is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdfThis is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdf
 
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdfC++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
 
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docxGIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
 
Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2
 
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdfincludestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
 
In c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdfIn c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdf
 
Unit 4
Unit 4Unit 4
Unit 4
 
PHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhaustedPHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhausted
 
Hacking hhvm
Hacking hhvmHacking hhvm
Hacking hhvm
 
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
 

Recently uploaded

CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
giselly40
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
vu2urc
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
Joaquim Jorge
 

Recently uploaded (20)

How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
 
Scaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organizationScaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organization
 
Tech Trends Report 2024 Future Today Institute.pdf
Tech Trends Report 2024 Future Today Institute.pdfTech Trends Report 2024 Future Today Institute.pdf
Tech Trends Report 2024 Future Today Institute.pdf
 
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
 
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time AutomationFrom Event to Action: Accelerate Your Decision Making with Real-Time Automation
From Event to Action: Accelerate Your Decision Making with Real-Time Automation
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Script
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 
Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...
 
A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
 

Php7 hashtable

  • 2.  PHP7 Array Featrue  PHP7 HashTable Struct  HashTable Collisions  Translation Table  PHP7 HashTable Operation  Example, Run php-src By GDB  Packed HashTables  Empty HashTable  Application Scenarios  PHP5 Hashtable  Redis Hashtable Agenda
  • 3. We already know about php array • Array 为 HashTable实现 • map,可字符数字做索引建 • Foreach 快于 for • Foreach 顺序为插入顺序 • Count($arr) 快 • In_array 慢 • 理想情况Find 时间复杂度为 O(1) • PHP有current,prev,next,each,end,reset等指针函数
  • 5. /* ht struct */ struct Bucket { zval val; zend_ulong h; zend_string *key; } Bucket; struct HashTable { zend_refcounted_h gc; union { struct { ZEND_ENDIAN_LOHI_4( /*... ellipsis ...*/) } v; uint32_t flags; } u; uint32_t nTableMask; // -nTableSize, -16; (uint)-16==4294967280 Bucket *arData; // array items, uint32_t nNumUsed; // next slot available in arData uint32_t nNumOfElements; // total num of busy elements in arData uint32_t nTableSize; // table size, always a power of two, min:8 uint32_t nInternalPointer; // Used for iteration zend_long nNextFreeElement; // next integer-based key available dtor_func_t pDestructor; // data destructor };
  • 6. struct zval { zend_value value; /* value */ union { struct { ZEND_ENDIAN_LOHI_4( zend_uchar type, /* active type */ zend_uchar type_flags, zend_uchar const_flags, zend_uchar reserved) /*call info for EX(This*/ } v; uint32_t type_info; } u1; union { uint32_t var_flags; uint32_t next; /* hash collision chain */ uint32_t cache_slot; /* literal cache slot */ uint32_t lineno; /* line number (for ast nodes) */ uint32_t num_args; /* arguments number for EX(This) */ uint32_t fe_pos; /* foreach position */ uint32_t fe_iter_idx; /* foreach iterator index */ } u2; }; union zend_value { zend_long lval; double dval; zend_refcounted *counted; zend_string *str; zend_array *arr; zend_object *obj; zend_resource *res; zend_reference *ref; zend_ast_ref *ast; zval *zv; void *ptr; zend_class_entry *ce; zend_function *func; struct { uint32_t w1; uint32_t w2; } ww; };
  • 7. Hashtable collisions 链接法:桶是一个可容纳多个数据的数据结构(例如链表或红黑树) 开放寻址法:所有元素都存放在槽中(装载因子<0.5可考虑) //Hashtable collisions attack $beginTime = microtime(true); $size = pow(2,16); $k = 0; for($i=0;$i<$size;$i++){ $arr[$k] = 0; $k += $size; } echo microtime(true) - $beginTime; echo PHP_EOL; //php5.6 28s, php7 7s
  • 9. Hash layout #define HT_HASH_SIZE(nTableMask) (((size_t)(uint32_t)-(int32_t)(nTableMask)) * sizeof(uint32_t)) #define HT_DATA_SIZE(nTableSize) ((size_t)(nTableSize) * sizeof(Bucket)) #define HT_SIZE_EX(nTableSize, nTableMask) (HT_DATA_SIZE((nTableSize)) + HT_HASH_SIZE((nTableMask))) #define HT_SIZE(ht) HT_SIZE_EX((ht)->nTableSize, (ht)->nTableMask) Bucket *arData; arData = emalloc(HT_SIZE(ht)); /* now alloc this */
  • 10. panda.tv tone shop … Bucket … val value … … … … u1 -1 -1 1 u2.next 92236014990 29192316 0 65536 … h h … 0x… 0x… 0x… … *key *key nTableSize-1 0 1 2 3 4i idx=5 idx=6 7 gc … u … nTableMask -8 *arData 0x… nNumUsed 3 nNumOfElements 3 nTableSize 8 nInternalPointer 0 nNextFreeElement 65537 pDestructor 0x… … 922360… 7 company gc h len val[1] 2 0 -8 -7 -6 -5 -4 -3 -2 -1 $arr = [ 'company'=>'panda.tv', 0=>’tone', 65536=>'shop' ]; 0x0 Translation table bucket PHP7 HashTable : nIndex idx
  • 12. HashTable Init ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent ZEND_FILE_LINE_DC) { GC_REFCOUNT(ht) = 1; GC_TYPE_INFO(ht) = IS_ARRAY; ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION | HASH_FLAG_STATIC_KEYS; ht->nTableSize = zend_hash_check_size(nSize); ht->nTableMask = HT_MIN_MASK; // ((uint32_t) -2) HT_SET_DATA_ADDR(ht, &uninitialized_bucket); //HT_SET_DATA_ADDR(ht, ptr) do { (ht)- >arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); } while (0) ht->nNumUsed = 0; ht->nNumOfElements = 0; ht->nInternalPointer = HT_INVALID_IDX; ht->nNextFreeElement = 0; ht->pDestructor = pDestructor; }
  • 13. Translation Table Demo /* @auth xuruiliang@panda.tv, 在此感谢许老板帮写的demo*/ #include <iostream> #include <cstdlib> #include <cassert> using namespace std; struct P { int x, y; }; const int P_SIZE = 10; int main() { struct P p1 = (struct P){.x = 100, .y = 101}; P *p = (P *)malloc(P_SIZE* (sizeof(int) + sizeof(P))); assert(p != NULL); ((int *)p)[0] = 10; p = (P *)((int *)p + P_SIZE); p[3] = p1; }
  • 14. static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key, zval *pData, uint32_t flag ZEND_FILE_LINE_DC) { ZEND_HASH_IF_FULL_DO_RESIZE(ht); //if ((ht)->nNumUsed >= (ht)->nTableSize) { zend_hash_do_resize(ht); } idx = ht->nNumUsed++; /* take the next avalaible slot number */ ht->nNumOfElements++; /* increment number of elements */ /* ... */ p = ht->arData + idx; /* Get the bucket in that slot from arData */ p->key = key; /* Affect it the key we want to insert at */ /* ... */ p->h = h = ZSTR_H(key); /* save the hash of the current key into the bucket */ ZVAL_COPY_VALUE(&p->val, pData); /* Copy the value into the bucket's value : add */ nIndex = h | ht->nTableMask; /* Get the translation table index */ // p->val.u2.next = Z_NEXT(p->val) = HT_HASH(ht, nIndex); /* Put the actual element as next of us */ // ((uint32_t*)((ht)->arData))[(int32_t)(nIndex)]=((idx) * sizeof(Bucket)) HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); /* Put us into the actual translation slot */ HashTable Add
  • 15. Hashtable添加更新元素 • nIndex为hash后的索引,idx为顺序索引 • nNumUsed+1, 用于idx,>nTableSize将触发扩容 • nNumOfElements+1, 记录实际元素个数 • nTableSize, 用于申请内存空间的大小 • nNextFreeElement+1, 用于下一个自增数字索引值 • p->val.u2.next 实现了hash冲突的解决 • p = ht->arData + idx, Bucket内存地址
  • 16. Hashtable del ZEND_API int ZEND_FASTCALL zend_hash_del(HashTable *ht, zend_string *key) { /* ... */ h = zend_string_hash_val(key); /* get the hash from the key (assuming string key here) */ nIndex = h | ht->nTableMask; /* get the translation table index */ idx = HT_HASH(ht, nIndex); /* Get the slot corresponding to that translation index */ while (idx != HT_INVALID_IDX) { /* If there is a corresponding slot */ p = HT_HASH_TO_BUCKET(ht, idx); /* Get the bucket from that slot */ if ((p->key == key) || /* Is it the right bucket ? same key pointer ? */ (p->h == h && /* ... or same hash */ p->key && /* and a key (string key based) */ ZSTR_LEN(p->key) == ZSTR_LEN(key) && /* and same key length */ memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) { /* and same key content ? */ _zend_hash_del_el_ex(ht, idx, p, prev); /* that's us ! delete us */ return SUCCESS; } prev = p; idx = Z_NEXT(p->val); /* get the next corresponding slot from current one */ } return FAILURE; }
  • 18. HashTable Resize static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) { IS_CONSISTENT(ht); HT_ASSERT(GC_REFCOUNT(ht) == 1); if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //只有到一定阈值才进行rehash操作 HANDLE_BLOCK_INTERRUPTIONS(); zend_hash_rehash(ht); //重建索引数组 HANDLE_UNBLOCK_INTERRUPTIONS(); } else if (ht->nTableSize < HT_MAX_SIZE) { //扩大为两倍 void *new_data, *old_data = HT_GET_DATA_ADDR(ht); uint32_t nSize = ht->nTableSize + ht->nTableSize; Bucket *old_buckets = ht->arData; HANDLE_BLOCK_INTERRUPTIONS(); new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT); //新分配arData空间,大小 为:(sizeof(Bucket) + sizeof(uint32_t)) * nSize ht->nTableSize = nSize; ht->nTableMask = -ht->nTableSize; //nTableSize负值 HT_SET_DATA_ADDR(ht, new_data); //将arData指针偏移到Bucket数组起始位置 memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //将旧的Bucket数组拷到新空间 pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT); //释放旧空间 zend_hash_rehash(ht); //重建索引数组 HANDLE_UNBLOCK_INTERRUPTIONS(); } else { zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht- >nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket)); } }
  • 20. git clone -b PHP-7.0.11 git@github.com:php/php-src.git cd php-src ~/php-src> ./buildconf ~/php-src> ./configure --disable-all --enable-debug --prefix=$HOME/php-debug ~/php-src> make ~/php-src> make install gdb --args bin/php -f hashtable-debug.php break /home/1/php-src/Zend/zend_hash.c:839 if h==589 break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key14")==0 break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key2")==0 break /home/1/php-src/Zend/zend_hash.c:839 if h==4153 break /home/1/php-src/Zend/zend_hash.c:561 if strncmp((char *)&key->val,"class_exists",key- >len)==0 GDB调试方案
  • 21. <?PHP $tmp_user = array( 'name'=>'wangtong', 'worker_id'=>'P589', '589'=>'see-nNextFreeElement', //here @1 'company'=>'panda.tv', 'email'=>'wangtong@panda.tv', 'location'=>'bj-soho-18', 'department01' => 'g-biz', 'department02' => 'g-tech', 1006440989 => 'see-nTableSize', 'key10' => 'pandatv.com', 'key11' => 'shop.gate.panda.tv', 'key12' => 'mall.gate.panda.tv', 'key13' => 'bag.gate.panda.tv', 'key14' => 'see-nTableSize', // here, @2 ); foreach($tmp_user as $k=>$v){ $user_info[$k]=$v; //here @1 @2 } unset($user_info['worker_id']); unset($user_info['589']); unset($user_info['company']); unset($user_info['email']); unset($user_info['location']); unset($user_info['department01']); unset($user_info['department02']); unset($user_info['1006440989']); $user_info['key2']='see_nNumUsed'; // here @3 unset($user_info['key10']); unset($user_info['key11']); unset($user_info['key12']); unset($user_info['key13']); unset($user_info['key14']); $user_info['key3']='val3'; $user_info['key4']='val4'; $user_info['4153'] = 'see-nTableSize';//here @ 运行示例
  • 22. 示例运行结果 nTable Size nNumUs ed nNumOfEl e nNextFre eEle Func Mark 3个item@1 16 3 3 590 _zend_hash_index_a dd_or_update_i Init,nTableSize=16; nNextFreeEle=589+1 14个 item@2 16 14 14 10064409 90 _zend_hash_add_or_ update_i nNumUsed = 14; nNumOfElement = 14; 3个item@1 8 3 3 590 _zend_hash_index_a dd_or_update_i Init, nTableSize=8; 14个 item@2 16 14 14 10064409 90 _zend_hash_add_or_ update_i nTableSize *= 2; Unset后@3 16 15 7 10064409 90 _zend_hash_add_or_ update_i nNumUsed!=nNumOfEle; Hash fragmentation 5个item@4 16 5 5 10064409 90 _zend_hash_index_a dd_or_update_i nNumUsed = 5; resizing and compacting;
  • 24. packed hashtables • 理解为传统意义上的‘数组’,而不是map • 在packed hashtables中,arHash数组为NULL,查找只会直接在 arData中进行。 • packed hashtable只会作用于键递增的数组,这些数组的key之间 可以有间隔,但必须总是递增的。 • bucket->h是冗余的; bucket->key的值永远都是NULL • 最简单的理解:用idx做索引,没有转换表,没有key.
  • 26. 空hash表 • arData/arHash 数组只会在插入第一个元素时分配内存 • nTableSize(8)& ht->nTableMask (0) == 0 • arHash 数组只有一个带有 INVALID_IDX 值、下标为 0 的元素 (uninitialized_bucket,并且被静态分配了内存) • 查找时,我们会一直找到 INVALID_IDX 值,意味着 key(实际上你 只想静态分配创建一个空表)没有被找到
  • 28. 应用场景 • 自动扩容会导致多次分配内存及复制操作 • 数字索引比字母索引效率更高 • 不会自动缩容,nNumUsed 达到 nTableSize会压缩 • In_array效率会低 • Hash冲突还是要注意的,Dos攻击。 • Foreach的顺序为插入顺序 • 尽量使用 Packed hashtable • Time33 hash算法适合英文词汇的hash;Time65适合大小写混写hash • 理想情况下O(1)的时间复杂度,平均查找复杂度为O(L) 一旦 nNumUsed 达到 nTableSize,PHP会通过丢弃任何 UNDEF 的记录,自动压缩 arData 数组
  • 30. typedef struct _hashtable { uint nTableSize; uint nTableMask; uint nNumOfElements; ulong nNextFreeElement; Bucket *pInternalPointer; /* Used for element traversal */ Bucket *pListHead; Bucket *pListTail; Bucket **arBuckets; dtor_func_t pDestructor; zend_bool persistent; unsigned char nApplyCount; zend_bool bApplyProtection; #if ZEND_DEBUG int inconsistent; #endif } HashTable; typedef struct bucket { ulong h; uint nKeyLength; void *pData; void *pDataPtr; struct bucket *pListNext; struct bucket *pListLast; struct bucket *pNext; struct bucket *pLast; const char *arKey; } Bucket;
  • 31.
  • 32. PHP5 vs PHP7 • PHP 5.x 每个元素需要 144 bytes。在 PHP 7 中,降低到了 36 bytes, 或者打包情况下 32 bytes • Bukets 需要单独分配16bytes内存,冗余且降低缓存效率 • Zvals 需要分开分配会产生额外头开销冗余, 16bytes • 双向链表中的每个bucket需要4个指针用于链表的连接,32字节 • php7更少的内存占用,更好的CPU缓存利用率,更好的性能 • Php7 在线性的内存地址上进行遍历,而不是在一段内存地址随机 的链表上遍历
  • 33. <?PHP $startMemory = memory_get_usage(); //$array = range(1, 100000); for($i=0;$i<100000; $i++){ $array[$i] = $i; } echo memory_get_usage() - $startMemory, " bytesn"; $array['k'.$i] = $i; PHP5 vs PHP7 memory
  • 35. Redis hashtable typedef struct dictEntry { void *key; union { void *val; uint64_t u64; int64_t s64; double d; } v; struct dictEntry *next; } dictEntry; /* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */ typedef struct dictht { dictEntry **table; unsigned long size; unsigned long sizemask; unsigned long used; } dictht; typedef struct dict { dictType *type; void *privdata; dictht ht[2]; long rehashidx; /* rehashing not in progress if rehashidx == -1 */ int iterators; /* number of iterators currently running */ } dict;
  • 36. Redis vs PHP7 • Redis业务场景在存储,所以需要实现扩容的异步化 • Redis hgetall无序,少有顺序遍历业务场景,无需保证顺序 • Redis 使用的是 MurmurHash2,更适用于规律性强的key
  • 37. 感谢 • 感谢极客好人许老板教我C语言 • 感谢cap与大家给我进步的机会,同我一起学习 • 感谢以下开源贡献者 • http://jpauli.github.io/2016/04/08/hashtables.html • http://www.laruence.com/2009/08/23/1065.html • http://www.laruence.com/2009/07/23/994.html • https://juejin.im/entry/58f87f1c44d9040069ca999c • https://crispgm.com/page/php7-new-hashtable-implementation.html