Skip to content

Commit 7fbce6d

Browse files
committed
better reading for bitpack
1 parent 1d4657f commit 7fbce6d

File tree

5 files changed

+29
-30
lines changed

5 files changed

+29
-30
lines changed

cp-algo/structures/bit_array.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ namespace cp_algo::structures {
1414
void set(size_t x) {
1515
data[x / width] |= 1ULL << (x % width);
1616
}
17+
void reset(size_t x) {
18+
data[x / width] &= ~(1ULL << (x % width));
19+
}
1720
void flip(size_t x) {
1821
data[x / width] ^= 1ULL << (x % width);
1922
}

cp-algo/structures/bitpack.hpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,24 @@
11
#ifndef CP_ALGO_STRUCTURES_BITPACK_HPP
22
#define CP_ALGO_STRUCTURES_BITPACK_HPP
33
#include "../structures/bit_array.hpp"
4+
#include "../util/simd.hpp"
45
#include <cstdint>
56
#include <cstddef>
67
#include <string>
78
#include <array>
89
namespace cp_algo::structures {
9-
template<size_t n, typename Int = uint64_t>
10-
struct bitpack: bit_array<n, Int> {
11-
using Base = bit_array<n, Int>;
10+
template<size_t n>
11+
struct bitpack: bit_array<n, uint64_t> {
12+
using Base = bit_array<n, uint64_t>;
1213
using Base::width, Base::blocks, Base::data;
14+
using Base::set, Base::reset;
1315
auto operator <=> (bitpack const& t) const = default;
1416

1517
bitpack() {}
16-
bitpack(std::string bits) {
17-
size_t rem = size(bits) % width;
18-
if(rem) {
19-
bits += std::string(width - rem, '0');
20-
}
21-
for(size_t i = 0, pos = 0; pos < size(bits); i++, pos += width) {
22-
for(size_t j = width; j; j--) {
23-
data[i] *= 2;
24-
data[i] ^= bits[pos + j - 1] == '1';
25-
}
18+
bitpack(std::string &bits) {
19+
bits.resize((size(bits) + width - 1) / width * width);
20+
for(size_t i = 0; i < blocks; i++) {
21+
data[i] = read_bits64(bits.data() + i * width);
2622
}
2723
}
2824

@@ -42,7 +38,7 @@ namespace cp_algo::structures {
4238
std::string to_string() const {
4339
std::string res(blocks * width, '0');
4440
for(size_t i = 0, pos = 0; i < blocks; i++, pos += width) {
45-
Int block = data[i];
41+
auto block = data[i];
4642
for(size_t j = 0; j < width; j++) {
4743
res[pos + j] = '0' + block % 2;
4844
block /= 2;

cp-algo/util/bit.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef CP_ALGO_UTIL_BIT_HPP
22
#define CP_ALGO_UTIL_BIT_HPP
3-
#include <immintrin.h>
3+
#include "../util/simd.hpp"
44
#include <cstdint>
55
#include <array>
66
#include <bit>
@@ -25,5 +25,12 @@ namespace cp_algo {
2525
callback.template operator()<1ULL << fl>();
2626
}
2727
}
28+
29+
[[gnu::target("avx2"), gnu::always_inline]] inline uint32_t read_bits(char const* p) {
30+
return _mm256_movemask_epi8(__m256i(vector_cast<u8x32 const>(p[0]) + (127 - '0')));
31+
}
32+
[[gnu::always_inline]] inline uint64_t read_bits64(char const* p) {
33+
return read_bits(p) | (uint64_t(read_bits(p + 32)) << 32);
34+
}
2835
}
2936
#endif // CP_ALGO_UTIL_BIT_HPP

cp-algo/util/simd.hpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
namespace cp_algo {
88
template<typename T, size_t len>
99
using simd [[gnu::vector_size(len * sizeof(T))]] = T;
10-
using u32x8 = simd<uint32_t, 8>;
1110
using i64x4 = simd<int64_t, 4>;
1211
using u64x4 = simd<uint64_t, 4>;
12+
using u32x8 = simd<uint32_t, 8>;
1313
using i32x4 = simd<int32_t, 4>;
1414
using u32x4 = simd<uint32_t, 4>;
1515
using i16x4 = simd<int16_t, 4>;
16+
using u8x32 = simd<uint8_t, 32>;
1617
using dx4 = simd<double, 4>;
1718

1819
[[gnu::always_inline]] inline dx4 abs(dx4 a) {
@@ -44,23 +45,14 @@ namespace cp_algo {
4445
[[gnu::always_inline]] inline auto swap_bytes(auto x) {
4546
return decltype(x)(__builtin_shufflevector(u32x8(x), u32x8(x), 1, 0, 3, 2, 5, 4, 7, 6));
4647
}
47-
[[gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
48-
#ifdef __AVX2__
48+
[[gnu::target("avx2"), gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
4949
auto x_ninv = u64x4(_mm256_mul_epu32(__m256i(x), __m256i() + imod));
5050
x += u64x4(_mm256_mul_epu32(__m256i(x_ninv), __m256i() + mod));
51-
#else
52-
auto x_ninv = x * imod;
53-
x += low32(x_ninv) * mod;
54-
#endif
5551
return swap_bytes(x);
5652
}
5753

58-
[[gnu::always_inline]] inline u64x4 montgomery_mul(u64x4 x, u64x4 y, uint32_t mod, uint32_t imod) {
59-
#ifdef __AVX2__
54+
[[gnu::target("avx2"), gnu::always_inline]] inline u64x4 montgomery_mul(u64x4 x, u64x4 y, uint32_t mod, uint32_t imod) {
6055
return montgomery_reduce(u64x4(_mm256_mul_epu32(__m256i(x), __m256i(y))), mod, imod);
61-
#else
62-
return montgomery_reduce(low32(x) * low32(y), mod, imod);
63-
#endif
6456
}
6557
[[gnu::always_inline]] inline u32x8 montgomery_mul(u32x8 x, u32x8 y, uint32_t mod, uint32_t imod) {
6658
return u32x8(montgomery_mul(u64x4(x), u64x4(y), mod, imod)) |

verify/structures/bitpack/prod_mod_2.test.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,12 @@ void solve() {
4040
b[i] = row;
4141
}
4242
cp_algo::checkpoint("read");
43-
for(int j = 0; j < m; j += 64) {
44-
for(int z = 0; z < 64 / K; z++) {
43+
const int width = bitpack<maxn>::width;
44+
for(int j = 0; j < m; j += width) {
45+
for(int z = 0; z < width / K; z++) {
4546
process_precalc(j / K + z);
4647
for(int i = 0; i < n; i++) {
47-
c[i] ^= precalc[uint8_t(a[i].word(j / 64) >> K * z)];
48+
c[i] ^= precalc[uint8_t(a[i].word(j / width) >> K * z)];
4849
}
4950
}
5051
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy