Golang的内存管理(中篇)-木庄网络博客

本文摘自网络，作者，侵删。

3.2 虚拟地址申请
主要是下面这段代码。

//尝试从不同地址开始申请
for i := 0; i <= 0x7f; i++ {    
    switch {
    case GOARCH == "arm64" && GOOS == "darwin":
        p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
    case GOARCH == "arm64":
        p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)    
    default:
        p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
    }
    pSize = bitmapSize + spansSize + arenaSize + _PageSize
    //向 OS 申请大小为 pSize 的连续的虚拟地址空间
    p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))    
    if p != 0 {        
        break
    }
}

初始化的时候，Golang 向操作系统申请一段连续的地址空间，就是上面的 spans + bitmap + arena。p 就是这段连续地址空间的开始地址，不同平台的 p 取值不一样。像 OS 申请的时候视不同的 OS 版本，调用不同的系统调用，比如 Unix 系统调用 mmap (mmap 想操作系统内核申请新的虚拟地址区间，可指定起始地址和长度)，Windows 系统调用 VirtualAlloc （类似 mmap）。


//bsd
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {    
    if sys.PtrSize == 8 && uint64(n) > 1<<32 || sys.GoosNacl != 0 {
        *reserved = false
        return v
    }

    p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    if uintptr(p) < 4096 {
       return nil
    }
    *reserved = true
    return p
}
//darwin
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
    *reserved = true
    p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)    
    if uintptr(p) < 4096 {        
       return nil
    }   
    return p
}
//linux
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
    ...
    p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    if uintptr(p) < 4096 {
       return nil
    }
    *reserved = true
    return p
}
//windows
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
    *reserved = true
    // v is just a hint.
    // First try at v.
    v = unsafe.Pointer(stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_RESERVE, _PAGE_READWRITE))
    if v != nil {    
        return v
    }    // Next let the kernel choose the address.
    return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_RESERVE, _PAGE_READWRITE))
}

3.3 mheap 初始化
我们上面介绍 mheap 结构的时候知道 spans, bitmap, arena 都是存在于 mheap 中的，从操作系统申请完地址之后就是初始化 mheap 了。


func mallocinit() {
  ...
    p1 := round(p, _PageSize)

    spansStart := p1
    mheap_.bitmap = p1 + spansSize + bitmapSize
    if sys.PtrSize == 4 { 
        // Set arena_start such that we can accept memory
        // reservations located anywhere in the 4GB virtual space.
        mheap_.arena_start = 0
    } else {
        mheap_.arena_start = p1 + (spansSize + bitmapSize)
    }
    mheap_.arena_end = p + pSize
    mheap_.arena_used = p1 + (spansSize + bitmapSize)
    mheap_.arena_reserved = reserved
    if mheap_.arena_start&(_PageSize-1) != 0 {
        println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
        throw("misrounded allocation in mallocinit")
    }    // Initialize the rest of the allocator.
    mheap_.init(spansStart, spansSize)  //获取当前 G
    _g_ := getg()  // 获取 G 上绑定的 M 的 mcache
    _g_.m.mcache = allocmcache()
}

p 是从连续虚拟地址的起始地址，先进行对齐，然后初始化 arena，bitmap，spans 地址。mheap_.init()会初始化 fixalloc 等相关的成员，还有 mcentral 的初始化。


func (h *mheap) init(spansStart, spansBytes uintptr) {
    h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
    h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
    h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
    h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)

    h.spanalloc.zero = false

    // h->mapcache needs no init
    for i := range h.free {
        h.free[i].init()
        h.busy[i].init()
    }

    h.freelarge.init()
    h.busylarge.init()
    for i := range h.central {
        h.central[i].mcentral.init(int32(i))
    }

    sp := (*slice)(unsafe.Pointer(&h.spans))
    sp.array = unsafe.Pointer(spansStart)
    sp.len = 0
    sp.cap = int(spansBytes / sys.PtrSize)
}

mheap 初始化之后，对当前的线程也就是 M 进行初始化。


//获取当前 G
_g_ := getg()
// 获取 G 上绑定的 M 的 mcache
_g_.m.mcache = allocmcache()
3.4 per-P mcache 初始化

上面好像并没有说到针对 P 的 mcache 初始化，因为这个时候还没有初始化 P。我们看一下 bootstrap 的代码。


func schedinit() {
  ...
  mallocinit()
  ...  if procs > _MaxGomaxprocs {
  procs = _MaxGomaxprocs
  }  if procresize(procs) != nil {
  ...
  }
}

其中 mallocinit() 上面说过了。对 P 的初始化在函数 procresize() 中执行，我们下面只看内存相关的部分。

func procresize(nprocs int32) *p {
    ...    // initialize new P's
    for i := int32(0); i < nprocs; i++ {
        pp := allp[i]        if pp == nil {
            pp = new(p)
            pp.id = i
            pp.status = _Pgcstop
            pp.sudogcache = pp.sudogbuf[:0]
            for i := range pp.deferpool {
                pp.deferpool[i] = pp.deferpoolbuf[i][:0]
            }
            atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
        }  // P mcache 初始化
        if pp.mcache == nil {
            if old == 0 && i == 0 {        
                if getg().m.mcache == nil {
                    throw("missing mcache?")
                }  // P[0] 分配给主 Goroutine
                pp.mcache = getg().m.mcache // bootstrap
            } else {  // P[0] 之外的 P 申请 mcache
                pp.mcache = allocmcache()
            }
        }
        ...
    }
  ...
}

所有的 P 都存放在一个全局数组 allp 中，procresize() 的目的就是将 allp 中用到的 P 进行初始化，同时对多余 P 的资源剥离。我们看一下 allocmcache。

func allocmcache() *mcache {
    lock(&mheap_.lock)
    c := (*mcache)(mheap_.cachealloc.alloc())
    unlock(&mheap_.lock)
    for i := 0; i < _NumSizeClasses; i++ {
        c.alloc[i] = &emptymspan
    }
    c.next_sample = nextSample()    return c
}

allocmcache 是调用 mheap 中特定分配器 cachealloc 来分配的。我们前面说过 fixalloc 是一个特定大小内存块的 free list。那么 cachealloc 就是 mcache 的 free list，每次分配也很简单，直接取 list 表头就可以了。mcache.alloc 现在并没有分配，是从 mcentral 中分配的。

4. 内存分配
先说一下给对象 object 分配内存的主要流程：

object size > 32K，则使用 mheap 直接分配。
object size < 16 byte，使用 mcache 的小对象分配器 tiny 直接分配。（其实 tiny 就是一个指针，暂且这么说吧。）
object size > 16 byte && size <=32K byte 时，先使用 mcache 中对应的 size class 分配。
如果 mcache 对应的 size class 的 span 已经没有可用的块，则向 mcentral 请求。
如果 mcentral 也没有可用的块，则向 mheap 申请，并切分。
如果 mheap 也没有合适的 span，则想操作系统申请。

我们看一下在堆上，也就是 arena 区分配内存的相关函数。

package main
func foo() *int {
  x := 1
  return &x
}
func main() {
  x := foo()  println(*x)
}

根据之前介绍的逃逸分析，foo() 中的 x 会被分配到堆上。把上面代码保存为 test1.go 看一下汇编代码。

$ go build -gcflags '-l' -o test1 test1.go
$ go tool objdump -s "main\.foo" test1
TEXT main.foo(SB) /Users/didi/code/go/malloc_example/test2.go
    test2.go:3  0x2040  65488b0c25a0080000  GS MOVQ GS:0x8a0, CX
    test2.go:3  0x2049  483b6110        CMPQ 0x10(CX), SP
    test2.go:3  0x204d  762a            JBE 0x2079
    test2.go:3  0x204f  4883ec10        SUBQ $0x10, SP
    test2.go:4  0x2053  488d1d66460500      LEAQ 0x54666(IP), BX
    test2.go:4  0x205a  48891c24        MOVQ BX, 0(SP)
    test2.go:4  0x205e  e82d8f0000      CALL runtime.newobject(SB)
    test2.go:4  0x2063  488b442408      MOVQ 0x8(SP), AX
    test2.go:4  0x2068  48c70001000000      MOVQ $0x1, 0(AX)
    test2.go:5  0x206f  4889442418      MOVQ AX, 0x18(SP)
    test2.go:5  0x2074  4883c410        ADDQ $0x10, SP
    test2.go:5  0x2078  c3          RET
    test2.go:3  0x2079  e8a28d0400      CALL runtime.morestack_noctxt(SB)
    test2.go:3  0x207e  ebc0            JMP main.foo(SB)

堆上内存分配调用了 runtime 包的 newobject 函数。

func newobject(typ *_type) unsafe.Pointer {
    return mallocgc(typ.size, typ, true)
}
func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    ... 
  c := gomcache()
  var x unsafe.Pointer
    noscan := typ == nil || typ.kind&kindNoPointers != 0
    if size <= maxSmallSize {  // object size <= 32K
        if noscan && size < maxTinySize {  // 小于 16 byte 的小对象分配
            off := c.tinyoffset             
            // Align tiny pointer for required (conservative) alignment.
            if size&7 == 0 {
                off = round(off, 8)
            } else if size&3 == 0 {
                off = round(off, 4)
            } else if size&1 == 0 {
                off = round(off, 2)
            }            
            if off+size <= maxTinySize && c.tiny != 0 {                
            // The object fits into existing tiny block.
                x = unsafe.Pointer(c.tiny + off)
                c.tinyoffset = off + size
                c.local_tinyallocs++
                mp.mallocing = 0
                releasem(mp)                
                return x
            }            // Allocate a new maxTinySize block.
            span := c.alloc[tinySizeClass]
            v := nextFreeFast(span)   
            if v == 0 {
                v, _, shouldhelpgc = c.nextFree(tinySizeClass)
            }
            x = unsafe.Pointer(v)
            (*[2]uint64)(x)[0] = 0
            (*[2]uint64)(x)[1] = 0
            // See if we need to replace the existing tiny block with the new one
            // based on amount of remaining free space.
            if size < c.tinyoffset || c.tiny == 0 {
                c.tiny = uintptr(x)
                c.tinyoffset = size
            }
            size = maxTinySize
        } else {  // object size >= 16 byte  && object size <= 32K byte
            var sizeclass uint8
            if size <= smallSizeMax-8 {
                sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]
            } else {
                sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
            }
            size = uintptr(class_to_size[sizeclass])
            span := c.alloc[sizeclass]
            v := nextFreeFast(span)            
            if v == 0 {
                v, span, shouldhelpgc = c.nextFree(sizeclass)
            }
            x = unsafe.Pointer(v)            
            if needzero && span.needzero != 0 {
                memclrNoHeapPointers(unsafe.Pointer(v), size)
            }
        }
} else {  //object size > 32K byte
        var s *mspan
        shouldhelpgc = true
        systemstack(func() {
            s = largeAlloc(size, needzero)
        })
        s.freeindex = 1
        s.allocCount = 1
        x = unsafe.Pointer(s.base())
        size = s.elemsize
    }
}

整个分配过程可以根据 object size 拆解成三部分：size < 16 byte, 16 byte <= size <= 32 K byte, size > 32 K byte。

4.1 size 小于 16 byte
对于小于 16 byte 的内存块，mcache 有个专门的内存区域 tiny 用来分配，tiny 是指针，指向开始地址。

func mallocgc(...) {
  ...
          off := c.tinyoffset          // 地址对齐
          if size&7 == 0 {
                off = round(off, 8)
            } else if size&3 == 0 {
                off = round(off, 4)
            } else if size&1 == 0 {
                off = round(off, 2)
            }          // 分配
            if off+size <= maxTinySize && c.tiny != 0 {                
                // The object fits into existing tiny block.
                x = unsafe.Pointer(c.tiny + off)
                c.tinyoffset = off + size
                c.local_tinyallocs++
                mp.mallocing = 0
                releasem(mp)                
                return x
            }            // tiny 不够了，为其重新分配一个 16 byte 内存块
            span := c.alloc[tinySizeClass]
            v := nextFreeFast(span)            
            if v == 0 {
                v, _, shouldhelpgc = c.nextFree(tinySizeClass)
            }
            x = unsafe.Pointer(v)          //将申请的内存块全置为 0
            (*[2]uint64)(x)[0] = 0
            (*[2]uint64)(x)[1] = 0
            // See if we need to replace the existing tiny block with the new one
            // based on amount of remaining free space.
          // 如果申请的内存块用不完，则将剩下的给 tiny，用 tinyoffset 记录分配了多少。
            if size < c.tinyoffset || c.tiny == 0 {
                c.tiny = uintptr(x)
                c.tinyoffset = size
            }
            size = maxTinySize
}

如上所示，tinyoffset 表示 tiny 当前分配到什么地址了，之后的分配根据 tinyoffset 寻址。先根据要分配的对象大小进行地址对齐，比如 size 是 8 的倍数，tinyoffset 和 8 对齐。然后就是进行分配。如果 tiny 剩余的空间不够用，则重新申请一个 16 byte 的内存块，并分配给 object。如果有结余，则记录在 tiny 上。

4.2 size 大于 32 K byte
对于大于 32 Kb 的内存分配，直接跳过 mcache 和 mcentral，通过 mheap 分配。

func mallocgc(...) {
    } else {        
        var s *mspan
        shouldhelpgc = true
        systemstack(func() {
            s = largeAlloc(size, needzero)
        })
        s.freeindex = 1
        s.allocCount = 1
        x = unsafe.Pointer(s.base())
        size = s.elemsize
    }
    ...
}
func largeAlloc(size uintptr, needzero bool) *mspan {
    ...
    npages := size >> _PageShift    
    if size&_PageMask != 0 {
        npages++
    }
    ...
    s := mheap_.alloc(npages, 0, true, needzero)    
    if s == nil {
        throw("out of memory")
    }
    s.limit = s.base() + size
    heapBitsForSpan(s.base()).initSpan(s)    
    return s
}

对于大于 32 K 的内存分配都是分配整数页，先右移然后低位与计算需要的页数。

本文来自：51CTO博客

感谢作者：mob604756f0bbf4

查看原文：Golang的内存管理(中篇)