牛奶还是奶牛 · 位域与字节序

在计算机的世界里，数据的存储方式比我们想象的要复杂得多。同样的数字 0x12345678，在不同的机器上可能以完全不同的方式存储。这就像"牛奶"和"奶牛"——虽然是同样的字，但顺序不同，意义完全不一样。

让我们深入探索位域与字节序的奥秘。

位域：精确到比特的控制

位域（Bit Field）允许我们精确控制结构体成员占用的位数：

#include <iostream>

struct PackedFlags
{
    unsigned int isActive : 1;    // 1 位
    unsigned int priority : 3;    // 3 位 (0-7)
    unsigned int category : 4;    // 4 位 (0-15)
    unsigned int reserved : 24;   // 24 位
};

int main()
{
    std::cout << "Size of PackedFlags: " << sizeof(PackedFlags) << " bytes" << std::endl;
    
    PackedFlags flags;
    flags.isActive = 1;
    flags.priority = 5;
    flags.category = 10;
    flags.reserved = 0;
    
    std::cout << "isActive: " << flags.isActive << std::endl;
    std::cout << "priority: " << flags.priority << std::endl;
    std::cout << "category: " << flags.category << std::endl;
    
    // 注意：赋值超出范围会被截断
    flags.priority = 15;  // 只保留低 3 位，结果为 7
    std::cout << "priority after overflow: " << flags.priority << std::endl;
    
    return 0;
}

位域的实际应用

#include <iostream>
#include <cstdint>

// 网络协议头（简化的 TCP 头）
struct TCPHeader
{
    uint16_t sourcePort;
    uint16_t destPort;
    uint32_t sequenceNumber;
    uint32_t ackNumber;
    
    // 位域部分
    uint8_t reserved : 4;
    uint8_t dataOffset : 4;
    
    // 标志位
    uint8_t fin : 1;
    uint8_t syn : 1;
    uint8_t rst : 1;
    uint8_t psh : 1;
    uint8_t ack : 1;
    uint8_t urg : 1;
    uint8_t ece : 1;
    uint8_t cwr : 1;
    
    uint16_t windowSize;
    uint16_t checksum;
    uint16_t urgentPointer;
};

// 硬件寄存器映射
struct GPIORegister
{
    uint32_t pin0  : 1;
    uint32_t pin1  : 1;
    uint32_t pin2  : 1;
    uint32_t pin3  : 1;
    uint32_t pin4  : 1;
    uint32_t pin5  : 1;
    uint32_t pin6  : 1;
    uint32_t pin7  : 1;
    uint32_t mode0 : 2;  // 每个引脚 2 位模式
    uint32_t mode1 : 2;
    uint32_t mode2 : 2;
    uint32_t mode3 : 2;
    uint32_t reserved : 16;
};

int main()
{
    std::cout << "Size of TCPHeader: " << sizeof(TCPHeader) << " bytes" << std::endl;
    std::cout << "Size of GPIORegister: " << sizeof(GPIORegister) << " bytes" << std::endl;
    
    TCPHeader header = {};
    header.sourcePort = 80;
    header.destPort = 443;
    header.syn = 1;
    header.ack = 1;
    
    std::cout << "SYN: " << (int)header.syn << std::endl;
    std::cout << "ACK: " << (int)header.ack << std::endl;
    
    return 0;
}

字节序：大端与小端

字节序（Endianness）描述了多字节数据在内存中的存储顺序：

大端序（Big Endian）：高位字节在前（网络字节序）
小端序（Little Endian）：低位字节在前（x86/x64）

数值 0x12345678 在内存中的存储：

大端序（Big Endian）：
地址    内容
0x00    0x12  (最高位字节)
0x01    0x34
0x02    0x56
0x03    0x78  (最低位字节)

小端序（Little Endian）：
地址    内容
0x00    0x78  (最低位字节)
0x01    0x56
0x02    0x34
0x03    0x12  (最高位字节)

检测字节序

#include <iostream>
#include <cstdint>

enum class Endianness { Little, Big, Unknown };

Endianness detectEndianness()
{
    uint32_t value = 0x01020304;
    uint8_t* bytes = reinterpret_cast<uint8_t*>(&value);
    
    if (bytes[0] == 0x04)
        return Endianness::Little;
    else if (bytes[0] == 0x01)
        return Endianness::Big;
    else
        return Endianness::Unknown;
}

// C++20 方式
#if __cplusplus >= 202002L
#include <bit>

void checkEndianness20()
{
    if constexpr (std::endian::native == std::endian::little)
        std::cout << "Little endian (C++20)" << std::endl;
    else if constexpr (std::endian::native == std::endian::big)
        std::cout << "Big endian (C++20)" << std::endl;
    else
        std::cout << "Mixed endian (C++20)" << std::endl;
}
#endif

int main()
{
    Endianness endian = detectEndianness();
    
    switch (endian)
    {
        case Endianness::Little:
            std::cout << "This machine is Little Endian" << std::endl;
            break;
        case Endianness::Big:
            std::cout << "This machine is Big Endian" << std::endl;
            break;
        default:
            std::cout << "Unknown endianness" << std::endl;
    }
    
    // 可视化存储
    uint32_t value = 0x12345678;
    uint8_t* bytes = reinterpret_cast<uint8_t*>(&value);
    
    std::cout << "\nValue: 0x" << std::hex << value << std::endl;
    std::cout << "Bytes in memory: ";
    for (int i = 0; i < 4; ++i)
    {
        std::cout << "0x" << (int)bytes[i] << " ";
    }
    std::cout << std::endl;
    
    return 0;
}

字节序转换

在网络编程中，需要在主机字节序和网络字节序之间转换：

#include <iostream>
#include <cstdint>

#ifdef _WIN32
    #include <winsock2.h>
#else
    #include <arpa/inet.h>
#endif

// 手动实现字节序转换
uint16_t swapBytes16(uint16_t value)
{
    return (value >> 8) | (value << 8);
}

uint32_t swapBytes32(uint32_t value)
{
    return ((value >> 24) & 0x000000FF) |
           ((value >> 8)  & 0x0000FF00) |
           ((value << 8)  & 0x00FF0000) |
           ((value << 24) & 0xFF000000);
}

uint64_t swapBytes64(uint64_t value)
{
    return ((value >> 56) & 0x00000000000000FFULL) |
           ((value >> 40) & 0x000000000000FF00ULL) |
           ((value >> 24) & 0x0000000000FF0000ULL) |
           ((value >> 8)  & 0x00000000FF000000ULL) |
           ((value << 8)  & 0x000000FF00000000ULL) |
           ((value << 24) & 0x0000FF0000000000ULL) |
           ((value << 40) & 0x00FF000000000000ULL) |
           ((value << 56) & 0xFF00000000000000ULL);
}

// 模板版本
template<typename T>
T swapBytes(T value)
{
    T result = 0;
    uint8_t* src = reinterpret_cast<uint8_t*>(&value);
    uint8_t* dst = reinterpret_cast<uint8_t*>(&result);
    
    for (size_t i = 0; i < sizeof(T); ++i)
    {
        dst[i] = src[sizeof(T) - 1 - i];
    }
    
    return result;
}

int main()
{
    uint32_t hostValue = 0x12345678;
    uint32_t networkValue = htonl(hostValue);  // host to network long
    uint32_t backToHost = ntohl(networkValue); // network to host long
    
    std::cout << std::hex;
    std::cout << "Host value:    0x" << hostValue << std::endl;
    std::cout << "Network value: 0x" << networkValue << std::endl;
    std::cout << "Back to host:  0x" << backToHost << std::endl;
    
    // 自定义实现
    std::cout << "\nManual swap:" << std::endl;
    std::cout << "Original:  0x" << hostValue << std::endl;
    std::cout << "Swapped:   0x" << swapBytes32(hostValue) << std::endl;
    std::cout << "Template:  0x" << swapBytes(hostValue) << std::endl;
    
    return 0;
}

C++23 的 std::byteswap

#if __cplusplus >= 202302L
#include <bit>
#include <iostream>

int main()
{
    uint32_t value = 0x12345678;
    uint32_t swapped = std::byteswap(value);
    
    std::cout << std::hex;
    std::cout << "Original: 0x" << value << std::endl;
    std::cout << "Swapped:  0x" << swapped << std::endl;
    
    return 0;
}
#endif

位操作技巧

#include <iostream>
#include <cstdint>
#include <bitset>

// 常用位操作
class BitOps
{
public:
    // 设置第 n 位
    static uint32_t setBit(uint32_t value, int n)
    {
        return value | (1u << n);
    }
    
    // 清除第 n 位
    static uint32_t clearBit(uint32_t value, int n)
    {
        return value & ~(1u << n);
    }
    
    // 翻转第 n 位
    static uint32_t toggleBit(uint32_t value, int n)
    {
        return value ^ (1u << n);
    }
    
    // 检查第 n 位是否设置
    static bool testBit(uint32_t value, int n)
    {
        return (value & (1u << n)) != 0;
    }
    
    // 获取最低设置位
    static uint32_t lowestSetBit(uint32_t value)
    {
        return value & (-value);
    }
    
    // 清除最低设置位
    static uint32_t clearLowestSetBit(uint32_t value)
    {
        return value & (value - 1);
    }
    
    // 计算设置位数量（人口计数）
    static int popcount(uint32_t value)
    {
        int count = 0;
        while (value)
        {
            count += value & 1;
            value >>= 1;
        }
        return count;
    }
    
    // 更快的 popcount（Brian Kernighan 算法）
    static int popcountFast(uint32_t value)
    {
        int count = 0;
        while (value)
        {
            value &= (value - 1);
            ++count;
        }
        return count;
    }
    
    // 判断是否为 2 的幂
    static bool isPowerOfTwo(uint32_t value)
    {
        return value && !(value & (value - 1));
    }
    
    // 向上取整到 2 的幂
    static uint32_t nextPowerOfTwo(uint32_t value)
    {
        --value;
        value |= value >> 1;
        value |= value >> 2;
        value |= value >> 4;
        value |= value >> 8;
        value |= value >> 16;
        return ++value;
    }
};

int main()
{
    uint32_t value = 0b10110100;
    
    std::cout << "Original: " << std::bitset<8>(value) << std::endl;
    std::cout << "Set bit 0: " << std::bitset<8>(BitOps::setBit(value, 0)) << std::endl;
    std::cout << "Clear bit 2: " << std::bitset<8>(BitOps::clearBit(value, 2)) << std::endl;
    std::cout << "Toggle bit 7: " << std::bitset<8>(BitOps::toggleBit(value, 7)) << std::endl;
    std::cout << "Test bit 4: " << BitOps::testBit(value, 4) << std::endl;
    std::cout << "Popcount: " << BitOps::popcount(value) << std::endl;
    
    std::cout << "\n7 is power of 2: " << BitOps::isPowerOfTwo(7) << std::endl;
    std::cout << "8 is power of 2: " << BitOps::isPowerOfTwo(8) << std::endl;
    std::cout << "Next power of 2 after 100: " << BitOps::nextPowerOfTwo(100) << std::endl;
    
    return 0;
}

位域的可移植性问题

位域的内存布局是实现定义的，不同编译器可能有不同的行为：

#include <iostream>
#include <cstdint>
#include <cstring>

// 不可移植的位域
struct NonPortable
{
    uint32_t a : 5;
    uint32_t b : 3;
    uint32_t c : 8;
    uint32_t d : 16;
};

// 可移植的替代方案
class PortableFlags
{
private:
    uint32_t data;
    
public:
    PortableFlags() : data(0) {}
    
    // a: bits 0-4 (5 bits)
    uint32_t getA() const { return data & 0x1F; }
    void setA(uint32_t v) { data = (data & ~0x1F) | (v & 0x1F); }
    
    // b: bits 5-7 (3 bits)
    uint32_t getB() const { return (data >> 5) & 0x07; }
    void setB(uint32_t v) { data = (data & ~0xE0) | ((v & 0x07) << 5); }
    
    // c: bits 8-15 (8 bits)
    uint32_t getC() const { return (data >> 8) & 0xFF; }
    void setC(uint32_t v) { data = (data & ~0xFF00) | ((v & 0xFF) << 8); }
    
    // d: bits 16-31 (16 bits)
    uint32_t getD() const { return (data >> 16) & 0xFFFF; }
    void setD(uint32_t v) { data = (data & ~0xFFFF0000) | ((v & 0xFFFF) << 16); }
    
    // 直接访问原始数据
    uint32_t getRaw() const { return data; }
    void setRaw(uint32_t v) { data = v; }
};

int main()
{
    // 位域可能不可移植
    NonPortable np;
    std::memset(&np, 0, sizeof(np));
    np.a = 10;
    np.b = 5;
    np.c = 200;
    np.d = 50000;
    
    uint32_t* raw = reinterpret_cast<uint32_t*>(&np);
    std::cout << "NonPortable raw: 0x" << std::hex << *raw << std::endl;
    
    // 可移植的方案
    PortableFlags pf;
    pf.setA(10);
    pf.setB(5);
    pf.setC(200);
    pf.setD(50000);
    
    std::cout << "PortableFlags raw: 0x" << std::hex << pf.getRaw() << std::endl;
    
    std::cout << std::dec;
    std::cout << "\nPortableFlags values:" << std::endl;
    std::cout << "a: " << pf.getA() << std::endl;
    std::cout << "b: " << pf.getB() << std::endl;
    std::cout << "c: " << pf.getC() << std::endl;
    std::cout << "d: " << pf.getD() << std::endl;
    
    return 0;
}

序列化与反序列化

处理跨平台数据交换时，字节序至关重要：

#include <iostream>
#include <fstream>
#include <cstdint>
#include <vector>
#include <cstring>

class BinarySerializer
{
private:
    std::vector<uint8_t> buffer;
    
    // 写入时转换为大端序（网络字节序）
    template<typename T>
    void writeValue(T value)
    {
        // 转换为大端序
        uint8_t bytes[sizeof(T)];
        for (size_t i = 0; i < sizeof(T); ++i)
        {
            bytes[sizeof(T) - 1 - i] = static_cast<uint8_t>(value & 0xFF);
            value >>= 8;
        }
        
        for (size_t i = 0; i < sizeof(T); ++i)
        {
            buffer.push_back(bytes[i]);
        }
    }
    
public:
    void writeUint8(uint8_t value) { buffer.push_back(value); }
    void writeUint16(uint16_t value) { writeValue(value); }
    void writeUint32(uint32_t value) { writeValue(value); }
    void writeUint64(uint64_t value) { writeValue(value); }
    
    void writeString(const std::string& str)
    {
        writeUint32(static_cast<uint32_t>(str.size()));
        for (char c : str)
        {
            buffer.push_back(static_cast<uint8_t>(c));
        }
    }
    
    const std::vector<uint8_t>& getData() const { return buffer; }
    
    void saveToFile(const std::string& filename) const
    {
        std::ofstream file(filename, std::ios::binary);
        file.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
    }
};

class BinaryDeserializer
{
private:
    const uint8_t* data;
    size_t size;
    size_t pos = 0;
    
    template<typename T>
    T readValue()
    {
        T value = 0;
        for (size_t i = 0; i < sizeof(T); ++i)
        {
            value = (value << 8) | data[pos++];
        }
        return value;
    }
    
public:
    BinaryDeserializer(const uint8_t* data, size_t size)
        : data(data), size(size) {}
    
    uint8_t readUint8() { return data[pos++]; }
    uint16_t readUint16() { return readValue<uint16_t>(); }
    uint32_t readUint32() { return readValue<uint32_t>(); }
    uint64_t readUint64() { return readValue<uint64_t>(); }
    
    std::string readString()
    {
        uint32_t length = readUint32();
        std::string result(reinterpret_cast<const char*>(&data[pos]), length);
        pos += length;
        return result;
    }
};

int main()
{
    // 序列化
    BinarySerializer ser;
    ser.writeUint8(42);
    ser.writeUint16(1000);
    ser.writeUint32(100000);
    ser.writeString("Hello, World!");
    
    // 查看序列化结果
    std::cout << "Serialized bytes: ";
    for (uint8_t b : ser.getData())
    {
        std::cout << std::hex << (int)b << " ";
    }
    std::cout << std::dec << std::endl;
    
    // 反序列化
    const auto& data = ser.getData();
    BinaryDeserializer des(data.data(), data.size());
    
    std::cout << "\nDeserialized:" << std::endl;
    std::cout << "uint8: " << (int)des.readUint8() << std::endl;
    std::cout << "uint16: " << des.readUint16() << std::endl;
    std::cout << "uint32: " << des.readUint32() << std::endl;
    std::cout << "string: " << des.readString() << std::endl;
    
    return 0;
}

总结

主题

要点

位域

精确控制位数，节省空间

大端序

高位字节在前，网络标准

小端序

低位字节在前，x86/x64

字节序转换

htonl/ntohl 等函数

位操作

高效的低级数据处理

可移植性

位域布局是实现定义的

序列化

跨平台时统一使用大端序

理解位域和字节序，你就能像区分"牛奶"和"奶牛"一样，准确地控制数据在内存中的每一个比特！

Previous类型身份证 · RTTI

Last updated 3 months ago

#include <iostream> #include <cstdint> #include <bitset> // 常用位操作 class BitOps { public: // 设置第 n 位 static uint32_t setBit(uint32_t value, int n) { return value | (1u << n); } // 清除第 n 位 static uint32_t clearBit(uint32_t value, int n) { return value & ~(1u << n); } // 翻转第 n 位 static uint32_t toggleBit(uint32_t value, int n) { return value ^ (1u << n); } // 检查第 n 位是否设置 static bool testBit(uint32_t value, int n) { return (value & (1u << n)) != 0; } // 获取最低设置位 static uint32_t lowestSetBit(uint32_t value) { return value & (-value); } // 清除最低设置位 static uint32_t clearLowestSetBit(uint32_t value) { return value & (value - 1); } // 计算设置位数量（人口计数） static int popcount(uint32_t value) { int count = 0; while (value) { count += value & 1; value >>= 1; } return count; } // 更快的 popcount（Brian Kernighan 算法） static int popcountFast(uint32_t value) { int count = 0; while (value) { value &= (value - 1); ++count; } return count; } // 判断是否为 2 的幂 static bool isPowerOfTwo(uint32_t value) { return value && !(value & (value - 1)); } // 向上取整到 2 的幂 static uint32_t nextPowerOfTwo(uint32_t value) { --value; value |= value >> 1; value |= value >> 2; value |= value >> 4; value |= value >> 8; value |= value >> 16; return ++value; } }; int main() { uint32_t value = 0b10110100; std::cout << "Original: " << std::bitset<8>(value) << std::endl; std::cout << "Set bit 0: " << std::bitset<8>(BitOps::setBit(value, 0)) << std::endl; std::cout << "Clear bit 2: " << std::bitset<8>(BitOps::clearBit(value, 2)) << std::endl; std::cout << "Toggle bit 7: " << std::bitset<8>(BitOps::toggleBit(value, 7)) << std::endl; std::cout << "Test bit 4: " << BitOps::testBit(value, 4) << std::endl; std::cout << "Popcount: " << BitOps::popcount(value) << std::endl; std::cout << "\n7 is power of 2: " << BitOps::isPowerOfTwo(7) << std::endl; std::cout << "8 is power of 2: " << BitOps::isPowerOfTwo(8) << std::endl; std::cout << "Next power of 2 after 100: " << BitOps::nextPowerOfTwo(100) << std::endl; return 0; }

#include <iostream> #include <fstream> #include <cstdint> #include <vector> #include <cstring> class BinarySerializer { private: std::vector<uint8_t> buffer; // 写入时转换为大端序（网络字节序） template<typename T> void writeValue(T value) { // 转换为大端序 uint8_t bytes[sizeof(T)]; for (size_t i = 0; i < sizeof(T); ++i) { bytes[sizeof(T) - 1 - i] = static_cast<uint8_t>(value & 0xFF); value >>= 8; } for (size_t i = 0; i < sizeof(T); ++i) { buffer.push_back(bytes[i]); } } public: void writeUint8(uint8_t value) { buffer.push_back(value); } void writeUint16(uint16_t value) { writeValue(value); } void writeUint32(uint32_t value) { writeValue(value); } void writeUint64(uint64_t value) { writeValue(value); } void writeString(const std::string& str) { writeUint32(static_cast<uint32_t>(str.size())); for (char c : str) { buffer.push_back(static_cast<uint8_t>(c)); } } const std::vector<uint8_t>& getData() const { return buffer; } void saveToFile(const std::string& filename) const { std::ofstream file(filename, std::ios::binary); file.write(reinterpret_cast<const char*>(buffer.data()), buffer.size()); } }; class BinaryDeserializer { private: const uint8_t* data; size_t size; size_t pos = 0; template<typename T> T readValue() { T value = 0; for (size_t i = 0; i < sizeof(T); ++i) { value = (value << 8) | data[pos++]; } return value; } public: BinaryDeserializer(const uint8_t* data, size_t size) : data(data), size(size) {} uint8_t readUint8() { return data[pos++]; } uint16_t readUint16() { return readValue<uint16_t>(); } uint32_t readUint32() { return readValue<uint32_t>(); } uint64_t readUint64() { return readValue<uint64_t>(); } std::string readString() { uint32_t length = readUint32(); std::string result(reinterpret_cast<const char*>(&data[pos]), length); pos += length; return result; } }; int main() { // 序列化 BinarySerializer ser; ser.writeUint8(42); ser.writeUint16(1000); ser.writeUint32(100000); ser.writeString("Hello, World!"); // 查看序列化结果 std::cout << "Serialized bytes: "; for (uint8_t b : ser.getData()) { std::cout << std::hex << (int)b << " "; } std::cout << std::dec << std::endl; // 反序列化 const auto& data = ser.getData(); BinaryDeserializer des(data.data(), data.size()); std::cout << "\nDeserialized:" << std::endl; std::cout << "uint8: " << (int)des.readUint8() << std::endl; std::cout << "uint16: " << des.readUint16() << std::endl; std::cout << "uint32: " << des.readUint32() << std::endl; std::cout << "string: " << des.readString() << std::endl; return 0; }