定义

map 表示一组无序的键值对，map 中每个 Key 都是唯一的

Key 和 Value 的类型不要求相同，如果 Key 类型和 Value 类型都相同，map 类型才是等价的

m1 := map[string]string{}
m2 := make(map[int]string)

fmt.Printf("%T\n", m1) // map[string]string
fmt.Printf("%T\n", m2) // map[int]string

对 Value 类型没有限制，但对 Key 类型有严格限制
因为 map 类型要保证 Key 类型的唯一性，因此，Key 类型必须支持 == 和 != 两种比较操作

在 Go 中， slice、map、func都只支持与 nil 比较，并不支持同类型变量比较，因此不能作为 map 类型的 Key

s1 := make([]int, 1)
s2 := make([]int, 1)
fmt.Println(s1 == s2) // invalid operation: s1 == s2 (slice can only be compared to nil)

m1 := make(map[string]int)
m2 := make(map[string]int)
fmt.Println(m1 == m2) // invalid operation: m1 == m2 (map can only be compared to nil)

f1 := func() {}
f2 := func() {}
fmt.Println(f1 == f2) // invalid operation: f1 == f2 (func can only be compared to nil)

1
2
3

m1 := make(map[[]int]int)          // invalid map key type []int
m2 := make(map[map[string]int]int) // invalid map key type map[string]int
m3 := make(map[func()]string)      // invalid map key type func()

声明

如果没有显式赋值，则 map 和 slice 的默认值都为 nil

var arr [4]int       // not nil
var sl []int         // nil
var m map[string]int // nil

fmt.Printf("arr: %#v, type: %T\n", arr, arr)              // arr: [4]int{0, 0, 0, 0}, type: [4]int
fmt.Printf("slice is nil: %v, type: %T\n", sl == nil, sl) // slice is nil: true, type: []int
fmt.Printf("map is nil: %v, type: %T\n", m == nil, m)     // map is nil: true, type: map[string]int

slice 支持零值可用，但 map 由于其内部实现的复杂性，不支持零值可用

var sl []int         // nil
var m map[string]int // nil

sl = append(sl, 1)
fmt.Printf("%#v\n", sl) // []int{1}

m["one"] = 1 // panic: assignment to entry in nil map

由于 map 不支持零值可用，因此必须对 map 类型变量进行显式初始化后才能使用

初始化

复合字面量

m := map[int]string{}
fmt.Printf("%#v\n", m)        // map[int]string{}
fmt.Printf("%#v\n", m == nil) // false
m[1] = "A"                    // ok

语法糖：允许省略字面值中元素类型
因为 map 类型声明时已经指定了 Key 类型和 Value 类型，Go 编译器有足够的信息进行类型推导

m := map[int][]string{
  1: []string{"a", "b", "c"},
  2: []string{"d", "e", "f"},
  3: []string{"g", "h", "i"},
}

// 语法糖
m := map[int][]string{
  1: {"a", "b", "c"},
  2: {"d", "e", "f"},
  3: {"g", "h", "i"},
}

type Position struct {
    x float64
    y float64
}

var m = map[Position]string{
    Position{1, 2}: "hello",
    Position{3, 4}: "world",
    Position{5, 6}: "!",
}

// 语法糖
var m = map[Position]string{
    {1, 2}: "hello",
    {3, 4}: "world",
    {5, 6}: "!",
}

make

为 map 类型变量指定键值对的初始容量，但无法进行具体的键值对赋值

当 map 中的键值对数量超过初始容量后，Go Runtime 会对 map 进行自动扩容

//m1 := make(map[string]int)
m2 := make(map[string]int, 1<<0) // 初始容量为1

m2["a"] = 1
fmt.Printf("%v\n", len(m2)) // 1

m2["b"] = 2
fmt.Printf("%v\n", len(m2)) // 2

m2["c"] = 3
fmt.Printf("%v\n", len(m2)) // 3

m2["d"] = 4
fmt.Printf("%v\n", len(m2)) // 4

基本操作

插入

对于一个非 nil 的 map 类型变量，可以插入符合定义的任意新键值对
Go 会保证插入总是成功的，Go Runtime会负责 map 变量内部的内存管理

1
2
3

m := make(map[int]string)
m[1] = "hello"
m[2] = "world"

如果 key 已经存在，则覆盖

m := map[int]string{
  1: "a",
  2: "b",
}
m[1] = "aa"

fmt.Printf("%#v\n", m) // map[int]string{1:"aa", 2:"b"}

数量

通过 len 函数，获取当前变量已经存储的键值对数量

m := map[int]string{
  1: "a",
  2: "b",
}
fmt.Println(len(m)) // 2

m[3] = "c"
fmt.Println(len(m)) // 3

map 不支持 cap 函数

索引

查找 map 中不存在的 key，会返回对应的类型零值

1
2
3

m := make(map[string]int)
v := m["key"]
fmt.Println(v) // 0

comma ok

m := make(map[string]int)
if v, ok := m["key"]; ok {
  fmt.Println(v)
} else {
  fmt.Printf("%T\n", ok)       // bool
  fmt.Println("key not found") // key not found
}

m["key"] = 1
if v, ok := m["key"]; ok {
  fmt.Println(v) // 1
}

删除

delete 函数是从 map 中删除数据的唯一方法（即便 map 不存在，也不会 panic）

m := map[string]int{
  "one": 1,
  "two": 2,
}

fmt.Printf("%#v\n", m) // map[string]int{"one":1, "two":2}
delete(m, "one")
fmt.Printf("%#v\n", m) // map[string]int{"two":2}

m = nil
delete(m, "one")
fmt.Printf("%#v\n", m) // map[string]int(nil)

遍历

只有一种遍历方式：for range

func main() {
    m := map[int]int{
        1: 1,
        2: 4,
        3: 9,
    }

    fmt.Printf("{ ")
    for k, v := range m {
        fmt.Printf("[%d, %d] ", k, v)
    }
    fmt.Printf("}\n")
}

// Output:
//     { [1, 1] [2, 4] [3, 9] }

遍历 map，key 是无序的

func iteration(m map[int]int) {
    fmt.Printf("{ ")
    for k, v := range m {
        fmt.Printf("[%d, %d] ", k, v)
    }
    fmt.Printf("}\n")
}

func main() {
    m := map[int]int{
        1: 1,
        2: 4,
        3: 9,
    }

    for i := 0; i < 1<<2; i++ {
        iteration(m)
    }
}

// Output:
//    { [1, 1] [2, 4] [3, 9] }
//    { [1, 1] [2, 4] [3, 9] }
//    { [1, 1] [2, 4] [3, 9] }
//    { [3, 9] [1, 1] [2, 4] }

传递开销

与 slice 一样，map 也是引用类型，实际传递的并非整个 map 的数据拷贝，因此传递开销是固定，并且是很小的

当 map 被传递到函数或者方法内部后，后续对 map 的修改，在函数或者方法外部是可见的

func f(m map[string]int) {
    m["a"] = 11
    m["c"] = 3
}

func main() {
    m := map[string]int{
        "a": 1,
        "b": 2,
    }

    fmt.Printf("%#v\n", m) // map[string]int{"a":1, "b":2}
    f(m)
    fmt.Printf("%#v\n", m) // map[string]int{"a":11, "b":2, "c":3}
}

内部实现

Go Runtime 使用一张哈希表来实现抽象的 map 类型，实现了 map 类型操作的所有功能

对应关系

在编译阶段，Go 编译器会将语法层面的 map 操作，重写成 Runtime 对应的函数调用

// go
m := make(map[keyType]valType, capacityhint)

// runtime
m := runtime.makemap(maptype, capacityhint, m)

// go
m["key"] = ["value"]

// runtime
v := runtime.mapassign(maptype, m, "key") // v 用于存储后续的 value

// go
v := m["key"]
v, ok := m["key"]

// runtime
v := runtime.mapaccess1(maptype, m, "key")
v := runtime.mapaccess2(maptype, m, "key")

// go
delete(m, "key")

// runtime
runtime.mapdelete(maptype, m, "key")

Runtime

实现原理

与 slice 类似，map 由编译器和运行时联合实现

编译器在编译阶段将语法层面的 map 操作，重写为运行时对应的函数调用

运行时则采用了高效的算法实现了 map 类型的各类操作

其实这是简化了用户的使用门槛，用户本可以像 Java 那边直接感知到各种 map 的实现

初始

hmap

与语法层面 map 类型变量对应的 runtime.hmap 的实例

hmap 类型是 map 类型的头部结构，即变量传递时的描述符（实际的传递开销）

Field	Desc
count	当前 map 中的`元素个数`，使用 `len` 函数，返回 `count` 值
flags	当前 map 所处的`状态标志` `iterator`、`oldlterator`、`hashWriting`、`sameSizeGrow`
B	`2 ^ B` = bucket 数量
noverflow	`overflow bucket` 的`大约`数量
hash0	哈希函数的`种子`
buckets	指向 bucket 数组的`指针`
oldbuckets	在 map `扩容`阶段指向`前一个` bucket 数组的`指针`
nevacuate	在 map 扩容阶段的`扩容进度计数器` 所有下标`小于` nevacuate 的 bucket 都已经完成了数据`排空`和`迁移`操作
extra	如果存在 `overflow bucket`，且 Key 和 Value 都因`不包含指针`而被`内联`的情况下，该字段将存储所有指向 overflow bucket 的指针，保证 overflow bucket 不会被 `GC` 掉

// A header for a Go map.
type hmap struct {
    // Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go.
    // Make sure this stays in sync with the compiler's definition.
    count     int // # live cells == size of map.  Must be first (used by len() builtin)
    flags     uint8
    B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
    noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
    hash0     uint32 // hash seed

    buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
    oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
    nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)

    extra *mapextra // optional fields
}

// mapextra holds fields that are not present on all maps.
type mapextra struct {
    // If both key and elem do not contain pointers and are inline, then we mark bucket
    // type as containing no pointers. This avoids scanning such maps.
    // However, bmap.overflow is a pointer. In order to keep overflow buckets
    // alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow.
    // overflow and oldoverflow are only used if key and elem do not contain pointers.
    // overflow contains overflow buckets for hmap.buckets.
    // oldoverflow contains overflow buckets for hmap.oldbuckets.
    // The indirection allows to store a pointer to the slice in hiter.
    overflow    *[]*bmap
    oldoverflow *[]*bmap

    // nextOverflow holds a pointer to a free overflow bucket.
    nextOverflow *bmap
}

bucket

真正存储键值对数据的是 bucket

每个 bucket 中存储的是 Hash 值低 bit 位数值相同的元素，默认的元素个数为 BUCKETSIZE - 8 = 2^3

cmd/compile/internal/gc/reflect.go

const (
    BUCKETSIZE  = 8
    MAXKEYSIZE  = 128
    MAXELEMSIZE = 128
)

runtime/map.go

const (
    // Maximum number of key/elem pairs a bucket can hold.
    bucketCntBits = 3
    bucketCnt     = 1 << bucketCntBits

    // Maximum average load of a bucket that triggers growth is 6.5.
    // Represent as loadFactorNum/loadFactorDen, to allow integer math.
    loadFactorNum = 13
    loadFactorDen = 2

    // Maximum key or elem size to keep inline (instead of mallocing per element).
    // Must fit in a uint8.
    // Fast versions cannot handle big elems - the cutoff size for
    // fast versions in cmd/compile/internal/gc/walk.go must be at most this elem.
    maxKeySize  = 128
    maxElemSize = 128
)

当某个 bucket 的 8 个 slot 都填满了，且 map 尚未达到扩容的条件，Runtime 会建立 overflow bucket
并将这个 overflow bucket 挂在上面 bucket 末尾的 overflow 指针上
这样多个 bucket 就形成了一个链表，直到下一次 map 扩容

tophash

向 map 插入数据或者从 map 按 key 查询数据
- Runtime 都会使用哈希函数对 key 做哈希运算，并得到一个 hashcode
hashcode 的低位区用于选定 bucket，而 hashcode 的高位区用于在某个 bucket 中确定 key 的位置
每个 bucket 的 tophash 用来快速定位 key 的位置，避免逐个 key 比较（代价很大） - 以空间换时间

key

存储这个 bucket 承载的所有 key 数据
Runtime 在分配 bucket 时需要知道 key size

当声明一个 map 类型变量
Runtime 会为这个变量对应的特定 map 类型，生成一个 runtime.maptype 实例（已存在则复用）
该 runtime.maptype 实例包含了该 map 类型中所有元数据

type maptype struct {
    typ    _type
    key    *_type
    elem   *_type
    bucket *_type // internal type representing a hash bucket
    // function for hashing keys (ptr to key, seed) -> hash
    hasher     func(unsafe.Pointer, uintptr) uintptr
    keysize    uint8  // size of key slot
    elemsize   uint8  // size of elem slot
    bucketsize uint16 // size of bucket
    flags      uint32
}

Runtime 通过 runtime.maptype 实例确定 Key 的类型和大小

runtime.maptype 使得所有 map 类型都共享一套 runtime map 的操作函数

value

存储 key 对应的 value
与 key 一样，该区域的创建也借助了 runtime.maptype 中的信息

Runtime 将 Key 和 Value 分开存储，而不是采用 kv-kv 的紧邻方式存储

增加了算法复杂性
减少因内存对齐带来的内存浪费

Go Runtime 的内存利用率很高

如果 Key 或者 Value 的数据长度超过了 128，那么 Runtime 不会在 bucket 中直接存储数据，而是存储数据的指针

runtime/map.go

const (    
    // Maximum key or elem size to keep inline (instead of mallocing per element).
    // Must fit in a uint8.
    // Fast versions cannot handle big elems - the cutoff size for
    // fast versions in cmd/compile/internal/gc/walk.go must be at most this elem.
    maxKeySize  = 128
    maxElemSize = 128
)

扩容

当 count > LoadFactor * 2 ^ B 或者 overflow bucket 过多时，Runtime 会对 map 进行自动扩容

6.5 / 8 = 0.8125

runtime/map.go

const (
    // Maximum average load of a bucket that triggers growth is 6.5.
    // Represent as loadFactorNum/loadFactorDen, to allow integer math.
    loadFactorNum = 13
    loadFactorDen = 2
)

// Like mapaccess, but allocates a slot for the key if it is not present in the map.
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
  ...
    // If we hit the max load factor or we have too many overflow buckets,
    // and we're not already in the middle of growing, start growing.
    if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
        hashGrow(t, h)
        goto again // Growing the table invalidates everything, so try again
    }
  ...
}

由于 overflow bucket 过多导致的扩容 - 分配不均

Runtime 会新建一个与现有规模一样的 bucket 数组
然后在 assign 和 delete 时做排空和迁移

由于当前数据量超过 LoadFactor 指定水位而导致的扩容 - 容量不足

Runtime 会新建一个两倍于现有规模的 bucket 数组
但真正的排空和迁移工作，也是在 assign 和 delete 时逐步进行
原 bucket 数组会挂在 hmap 的 oldbuckets 指针下
- 直到原 bucket 数组中所有的数据都迁移到新数组，原 bucket 数组才会被释放

map 可以自动扩容，value 的位置可能会发生变化，因此不允许获取 map value 的地址，编译阶段就会报错

1
2
3

m := make(map[string]int)
p := &m["a"] // cannot take the address of m["a"]
fmt.Println(p)

并发

充当 map 描述符的 hmap 实例本身是有状态的（hmap.flags），而且对状态的读写是没有并发保护
即 map 实例并不是并发安全的，也不支持并发读写

并发读写

func read(m map[int]int) {
    for k, v := range m {
        _ = fmt.Sprintf("[%d %d]", k, v)
    }
}

func write(m map[int]int) {
    for k, v := range m {
        m[k] = v + 1
    }
}

// fatal error: concurrent map iteration and map write
func main() {
    m := map[int]int{1: 1, 2: 4, 3: 9}

    go func() {
        for i := 0; i < 1<<10; i++ {
            read(m)
        }
    }()

    go func() {
        for i := 0; i < 1<<10; i++ {
            write(m)
        }
    }()

    time.Sleep((1 << 2) * time.Second)
}

只是并发读

func read(m map[int]int) {
    for k, v := range m {
        _ = fmt.Sprintf("[%d %d]", k, v)
    }
}

// ok
func main() {
    m := map[int]int{1: 1, 2: 4, 3: 9}

    go func() {
        for i := 0; i < 1<<10; i++ {
            read(m)
        }
    }()

    go func() {
        for i := 0; i < 1<<10; i++ {
            read(m)
        }
    }()

    time.Sleep((1 << 2) * time.Second)
}