b13a68e129
c521b3ac6 Merge #11: fixup define checks. Cleans up some oopses from #5. 8b1cd3753 fixup define checks. Cleans up some oopses from #5. 6b1508d6d Merge #6: Fixes typo fceb80542 Merge #10: Clean up compile-time warnings (gcc 7.1) 0ec2a343f Clean up compile-time warnings (gcc 7.1) d4c268a35 Merge #5: Move helper functions out of sse4.2 object 8d4eb0847 Add HasAcceleratedCRC32C to port_win.h 77cfbfd25 crc32: move helper functions out of port_posix_sse.cc 4c1e9e016 silence compiler warnings about uninitialized variables 495316485 Merge #2: Prefer std::atomic over MemoryBarrier 2953978ef Fixes typo f134284a1 Merge #1: Merge upstream LevelDB 1.20 ba8a445fd Prefer std::atomic over MemoryBarrier git-subtree-dir: src/leveldb git-subtree-split: c521b3ac654cfbe009c575eacf7e5a6e189bb5bb
110 lines
3 KiB
C++
110 lines
3 KiB
C++
// Copyright 2016 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// A portable implementation of crc32c, optimized to handle
|
|
// four bytes at a time.
|
|
//
|
|
// In a separate source file to allow this accelerated CRC32C function to be
|
|
// compiled with the appropriate compiler flags to enable x86 SSE 4.2
|
|
// instructions.
|
|
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include "port/port.h"
|
|
|
|
#if defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
|
|
#if defined(_MSC_VER)
|
|
#include <intrin.h>
|
|
#elif defined(__GNUC__) && defined(__SSE4_2__)
|
|
#include <nmmintrin.h>
|
|
#endif
|
|
|
|
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
|
|
namespace leveldb {
|
|
namespace port {
|
|
|
|
#if defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
|
|
// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
|
|
static inline uint32_t LE_LOAD32(const uint8_t *p) {
|
|
// SSE is x86 only, so ensured that |p| is always little-endian.
|
|
uint32_t word;
|
|
memcpy(&word, p, sizeof(word));
|
|
return word;
|
|
}
|
|
|
|
#if defined(_M_X64) || defined(__x86_64__) // LE_LOAD64 is only used on x64.
|
|
|
|
// Used to fetch a naturally-aligned 64-bit word in little endian byte-order
|
|
static inline uint64_t LE_LOAD64(const uint8_t *p) {
|
|
uint64_t dword;
|
|
memcpy(&dword, p, sizeof(dword));
|
|
return dword;
|
|
}
|
|
|
|
#endif // defined(_M_X64) || defined(__x86_64__)
|
|
|
|
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
|
|
// For further improvements see Intel publication at:
|
|
// http://download.intel.com/design/intarch/papers/323405.pdf
|
|
uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
|
|
#if !defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
return 0;
|
|
#else
|
|
|
|
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
|
|
const uint8_t *e = p + size;
|
|
uint32_t l = crc ^ 0xffffffffu;
|
|
|
|
#define STEP1 do { \
|
|
l = _mm_crc32_u8(l, *p++); \
|
|
} while (0)
|
|
#define STEP4 do { \
|
|
l = _mm_crc32_u32(l, LE_LOAD32(p)); \
|
|
p += 4; \
|
|
} while (0)
|
|
#define STEP8 do { \
|
|
l = _mm_crc32_u64(l, LE_LOAD64(p)); \
|
|
p += 8; \
|
|
} while (0)
|
|
|
|
if (size > 16) {
|
|
// Process unaligned bytes
|
|
for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
|
|
STEP1;
|
|
}
|
|
|
|
// _mm_crc32_u64 is only available on x64.
|
|
#if defined(_M_X64) || defined(__x86_64__)
|
|
// Process 8 bytes at a time
|
|
while ((e-p) >= 8) {
|
|
STEP8;
|
|
}
|
|
// Process 4 bytes at a time
|
|
if ((e-p) >= 4) {
|
|
STEP4;
|
|
}
|
|
#else // !(defined(_M_X64) || defined(__x86_64__))
|
|
// Process 4 bytes at a time
|
|
while ((e-p) >= 4) {
|
|
STEP4;
|
|
}
|
|
#endif // defined(_M_X64) || defined(__x86_64__)
|
|
}
|
|
// Process the last few bytes
|
|
while (p != e) {
|
|
STEP1;
|
|
}
|
|
#undef STEP8
|
|
#undef STEP4
|
|
#undef STEP1
|
|
return l ^ 0xffffffffu;
|
|
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
|
|
}
|
|
|
|
} // namespace port
|
|
} // namespace leveldb
|