diff --git a/U201912633/CuckooPlus/CuckooPlus.go b/U201912633/CuckooPlus/CuckooPlus.go new file mode 100644 index 0000000000000000000000000000000000000000..0b406b0c61e9c2b0f227d311e0ceb262ea712a8d --- /dev/null +++ b/U201912633/CuckooPlus/CuckooPlus.go @@ -0,0 +1,149 @@ +package main + +import ( + "flag" + "fmt" + "math/rand" + "time" +) + +var HashTableSize int +var PlaceNum int +var Debug bool + +func main() { + fmt.Println("this is a simple trial of the cuckoo hasing.") + // parse arguments + insert := true + search := true + delete := false + HashTableSize_t := flag.Int("HashTableSize", 1000, "the size of the hash table") + PlaceNum_t := flag.Int("PlaceNum", 1, "the place num of one place") + Debug_t := flag.Bool("Debug", false, "if debug") + lf := flag.Float64("lf", 100.0, "the lf you want to stop") + flag.Parse() + + HashTableSize = *HashTableSize_t + PlaceNum = *PlaceNum_t + Debug := *Debug_t + + fmt.Println("Params setting:") + fmt.Println("HashTableSize:", HashTableSize) + fmt.Println("PlaceNum:", PlaceNum) + fmt.Println() + fmt.Println() + + // init hash table + fmt.Println("preparing the hash table...") + var HT CuckooHashTable + var hash Hash + HT.init() + hash.init() + fmt.Println("finsih preparing the hash table") + + // generate sample + fmt.Println("Generate sample...") + bufferInt := make([]int, 2*HashTableSize*PlaceNum) + for i := 0; i < 2*HashTableSize*PlaceNum; i++ { + bufferInt[i] = rand.Int() + } + + // start modeling + // insert test + total_bytes := 0 + specila_byte := 0 + if insert { + fmt.Println("Insert test...") + startTime := time.Now() + rehashTime := int64(0) + + for _, a := range bufferInt { + i := 0 + for suc, cur := HT.insert(&hash, a); !suc; { + j := 0 + rehashStartTime := time.Now() + specila_byte = cur + + // copy + tempHash := hash + var tempHT CuckooHashTable + err := HT.copy(&tempHT) + if err != nil { + panic("copy failed!") + } + + // rehash + for !HT.rehash(&hash, bufferInt, total_bytes) { + j++ + if j == MAX_REHASH { + fmt.Println("rehash failed, can't find another hash function") + i = MAX_INSERT - 1 + err := tempHT.copy(&HT) + if err != nil { + panic("copy failed!") + } + hash = tempHash + break + } + } + subRehashTime := time.Since(rehashStartTime) + rehashTime += int64(subRehashTime) + i++ + if i == MAX_INSERT { + break + } + } + + if i == MAX_INSERT { + fmt.Println("Insert failed, the hash table is full") + break + } + + if Debug { + err := HT.search(&hash, a) + if !err { + panic("error") + } else { + fmt.Printf("Successfully insert %d\n", a) + } + + } + total_bytes++ + if float64(HT.cacu_lf()) >= *lf { + break; + } + } + fmt.Printf("Insert test finish, use %s\n", time.Since(startTime)) + fmt.Printf("Rehash time %s", time.Duration(rehashTime)) + fmt.Println() + } + + // search test + if search { + fmt.Println("Search test start...") + startTime := time.Now() + for i := 0; i < total_bytes; i++ { + if !HT.search(&hash, bufferInt[i]) && bufferInt[i] != specila_byte { + panic("search failed") + } + } + fmt.Printf("Search test finish, use %s", time.Since(startTime)) + fmt.Println() + } + + // delete test + if delete { + fmt.Println("Delete test start...") + for i := 0; i < total_bytes; i++ { + if !HT.delete(&hash, bufferInt[i]) { + panic("delete failed") + } + } + fmt.Println("Delete test finish") + } + + // caculate the load factor + load_factor := HT.cacu_lf() + fmt.Printf("the load factor is %.2f%%\n", load_factor) + fmt.Printf("total_insert:%d\n", HT.OccupiedNum1+HT.OccupiedNum2) +} diff --git a/U201912633/CuckooPlus/CuckooPlus1 b/U201912633/CuckooPlus/CuckooPlus1 new file mode 100644 index 0000000000000000000000000000000000000000..a991d9d2d30c0efed1559d285b3d246bb4f82076 Binary files /dev/null and b/U201912633/CuckooPlus/CuckooPlus1 differ diff --git a/U201912633/CuckooPlus/config.go b/U201912633/CuckooPlus/config.go new file mode 100644 index 0000000000000000000000000000000000000000..8256009e8b17d4ff9c731cb210b40dd33fc69e06 --- /dev/null +++ b/U201912633/CuckooPlus/config.go @@ -0,0 +1,11 @@ +package main + +const ( + // HashTableSize = 1000000 + MAXRANDOM = 30 + MAX_KICK = 1000 // find if loop + MAX_REHASH = 1 // find rehash function + MAX_INSERT = 2 + // PlaceNum = 16 + // Debug = true +) diff --git a/U201912633/CuckooPlus/go.mod b/U201912633/CuckooPlus/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..5166fc9b1e5a8768b9c7215471a47c912cf9d5f8 --- /dev/null +++ b/U201912633/CuckooPlus/go.mod @@ -0,0 +1,3 @@ +module CuckooPlus + +go 1.18 diff --git a/U201912633/CuckooPlus/hash.go b/U201912633/CuckooPlus/hash.go new file mode 100644 index 0000000000000000000000000000000000000000..812cf25c5e37a2ad5bf330b007420ea4870b8a4f --- /dev/null +++ b/U201912633/CuckooPlus/hash.go @@ -0,0 +1,180 @@ +package main + +import ( + "bytes" + "encoding/gob" + "fmt" + mathrand "math/rand" + "time" +) + +type CuckooHashTable struct { + Val1 [][]int + Val2 [][]int + OccupiedNum1 int + OccupiedNum2 int +} + +func (ht *CuckooHashTable) init() { + ht.OccupiedNum1 = 0 + ht.OccupiedNum2 = 0 + ht.Val1 = make([][]int, HashTableSize) + for i := range ht.Val1 { + ht.Val1[i] = make([]int, PlaceNum) + } + ht.Val2 = make([][]int, HashTableSize) + for i := range ht.Val2 { + ht.Val2[i] = make([]int, PlaceNum) + } +} + +// 返回插入是否成功以及一个int,如果成功则返回插入的值,如果失败则返回当前无法插入的值(不一定是要插入的值) +func (ht *CuckooHashTable) insert(hash *Hash, a int) (bool, int) { + if ht.search(hash, a) { + return true, a + } + cur_byte := a + pre_kick := -1 + i := 0 + for i = 0; i < MAX_KICK; i++ { + index1 := hash.hash1(int(cur_byte)) + index2 := hash.hash2(int(cur_byte)) + flag := 0 + for j := 0; j < PlaceNum; j++ { + if ht.Val1[index1][j] == 0 { + ht.Val1[index1][j] = cur_byte + ht.OccupiedNum1++ + flag = 1 + break + } + } + if flag == 1 { + break + } + + for j := 0; j < PlaceNum; j++ { + if ht.Val2[index2][j] == 0 { + ht.Val2[index2][j] = cur_byte + ht.OccupiedNum2++ + flag = 1 + break + } + } + if flag == 1 { + break + } + + if pre_kick == 1 { + temp := ht.Val2[index2][0] + ht.Val2[index2][0] = cur_byte + cur_byte = temp + pre_kick = 2 + } else { + temp := ht.Val1[index1][0] + ht.Val1[index1][0] = cur_byte + cur_byte = temp + pre_kick = 1 + } + } + + if i == MAX_KICK { + return false, cur_byte + } else { + return true, a + } +} + +func (ht *CuckooHashTable) search(hash *Hash, a int) bool { + index1 := hash.hash1(int(a)) + index2 := hash.hash2(int(a)) + for i := 0; i < PlaceNum; i++ { + if ht.Val1[index1][i] == a { + return true + } + } + for i := 0; i < PlaceNum; i++ { + if ht.Val2[index2][i] == a { + return true + } + } + return false +} + +func (ht *CuckooHashTable) delete(hash *Hash, a int) bool { + index1 := hash.hash1(int(a)) + index2 := hash.hash2(int(a)) + for i := 0; i < PlaceNum; i++ { + if ht.Val1[index1][i] == a { + ht.Val1[index1][i] = 0 + ht.OccupiedNum1-- + return true + } + } + for i := 0; i < PlaceNum; i++ { + if ht.Val2[index2][i] == a { + ht.Val2[index2][i] = 0 + ht.OccupiedNum2-- + return true + } + } + + return false +} + +func (ht *CuckooHashTable) cacu_lf() float32 { + return ((float32(ht.OccupiedNum1) + float32(ht.OccupiedNum2)) / float32(2*HashTableSize*PlaceNum)) * 100 +} + +func (ht *CuckooHashTable) rehash(hash *Hash, bufferbytes []int, total_bytes int) bool { + hash.rehash() + ht.init() + for i := 0; i < total_bytes; i++ { + if suc, _ := ht.insert(hash, bufferbytes[i]); !suc { + return false + } + } + return true + +} + +func (ht *CuckooHashTable) copy(dest *CuckooHashTable) error { + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(*ht); err != nil { + fmt.Println(err) + return err + } + return gob.NewDecoder(bytes.NewBuffer(buf.Bytes())).Decode(dest) +} + +type Hash struct { + h1 int + h2 int + h3 int + h4 int +} + +func (hash *Hash) init() { + mathrand.Seed(time.Now().UnixNano()) + hash.h1 = mathrand.Intn(MAXRANDOM) + hash.h2 = mathrand.Intn(MAXRANDOM) + hash.h3 = mathrand.Intn(MAXRANDOM) + hash.h4 = mathrand.Intn(MAXRANDOM) +} + +func (hash *Hash) hash1(h int) int { + h ^= (h << hash.h1) ^ (h << hash.h2) + return ((h ^ (h >> hash.h3) ^ (h >> hash.h4)) & 0x7fffffff) % HashTableSize +} + +func (hash *Hash) hash2(h int) int { + h ^= (h << hash.h4) ^ (h << hash.h3) + return ((h ^ (h >> hash.h2) ^ (h >> hash.h1)) & 0x7fffffff) % HashTableSize +} + +func (hash *Hash) rehash() { + // mathrand.Seed(time.Now().UnixNano()) + hash.h1 = mathrand.Intn(MAXRANDOM) + hash.h2 = mathrand.Intn(MAXRANDOM) + hash.h3 = mathrand.Intn(MAXRANDOM) + hash.h4 = mathrand.Intn(MAXRANDOM) +} diff --git a/U201912633/CuckooPlus/run.sh b/U201912633/CuckooPlus/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..2e85340ef69cc9919d62500bafdd7e2c620ba0f4 --- /dev/null +++ b/U201912633/CuckooPlus/run.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env zsh +# echo "test" + +filepath="result3.txt" +CuckooPlus=/home/ray/junior/BigDataStorage/CuckooHash/CuckooPlus/CuckooPlus1 + +declare -a HashTableSize +declare -a PlaceNum +declare -a lf + +HashTableSize=(250000) +PlaceNum=(4) +lf=(70) + +#display run progress +progress=12 + +for ((i=1;i<=${#HashTableSize[@]};i++)) +do +# run sh +./CuckooPlus1 -HashTableSize=$HashTableSize[i] -Debug=false -PlaceNum=$PlaceNum[i] -lf=$lf[i]>> $filepath + +echo -e "=========================================================> $i/$progress done\n" >> $filepath +echo -e "=========================================================> $i/$progress done\n" + +done diff --git a/U201912633/README.md b/U201912633/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6af1f264b9264408c6d46a52b24389f124be961b --- /dev/null +++ b/U201912633/README.md @@ -0,0 +1,8 @@ +# BigData Storage + +## Lab: cpeh + +## Course: Cuckoo Hashing + + **Note: the run.sh in CuckooPlus should be modified according to your system. For example, it uses the zsh shell, which have slightly difference from the bash, instead of bash to run ** + diff --git "a/U201912633/course_report \345\274\240\347\235\277 U201912633.docx" "b/U201912633/course_report \345\274\240\347\235\277 U201912633.docx" new file mode 100644 index 0000000000000000000000000000000000000000..ed391bcad08bfe6ed89ed914c174804b89882c9d Binary files /dev/null and "b/U201912633/course_report \345\274\240\347\235\277 U201912633.docx" differ diff --git "a/U201912633/course_report \345\274\240\347\235\277 U201912633.pdf" "b/U201912633/course_report \345\274\240\347\235\277 U201912633.pdf" new file mode 100644 index 0000000000000000000000000000000000000000..6bab0bb558ff5882e631db619113d52a980546ea Binary files /dev/null and "b/U201912633/course_report \345\274\240\347\235\277 U201912633.pdf" differ diff --git "a/U201912633/lab_report \345\274\240\347\235\277 U201912633.doc" "b/U201912633/lab_report \345\274\240\347\235\277 U201912633.doc" new file mode 100644 index 0000000000000000000000000000000000000000..9c9634b2d636520a4ea057b045c2c82e8c601eb7 Binary files /dev/null and "b/U201912633/lab_report \345\274\240\347\235\277 U201912633.doc" differ diff --git "a/U201912633/lab_report \345\274\240\347\235\277 U201912633.pdf" "b/U201912633/lab_report \345\274\240\347\235\277 U201912633.pdf" new file mode 100644 index 0000000000000000000000000000000000000000..3fd7890cbf3e525ace7567ba2b91c97d59b8d40e Binary files /dev/null and "b/U201912633/lab_report \345\274\240\347\235\277 U201912633.pdf" differ