segment .text
        global  popcnt_array           ; let the linker know about popcnt_array

popcnt_array:
        push    r12
        push    r13
        push    r14
        push    r15
        xor     eax, eax
        xor     r8d, r8d
        xor     r9d, r9d
        xor     r14d, r14d
        xor     r15d, r15d
.count_more:
        popcnt  r10, [rcx+r9*8]
        add     rax, r10
        popcnt  r11, [rcx+r9*8+8]
        add     r8, r11
        popcnt  r12, [rcx+r9*8+16]
        add     r14, r12
        popcnt  r13, [rcx+r9*8+24]
        add     r15, r13
        add     r9, 4
        cmp     r9, rdx
        jl      .count_more
        add     rax, r8
        add     rax, r14
        add     rax, r15
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        ret