zl程序教程

您现在的位置是:首页 >  其他

当前栏目

proto-buf模型格式测试一例

测试 模型 格式 一例 proto Buf
2023-09-11 14:15:47 时间

深度学习神经网络的输出模型有多种格式,这其中有一种格式使用比较广泛,并且背景深厚,它就是protobuf格式,关于这个格式的介绍请参考这篇博客:

ONNX格式解析之google protobuf解析_papaofdoudou的博客-CSDN博客_onnx protoONNX模型是按照google protobuf格式保存的,模型训练的目的就是为了得到变量的权值,只不过是纯数字罢了,但是我们也不能就这样把这些数字一个一个地写入文件,因为在要保存的模型文件里,不光要保存权值,也要告诉之后用这个模型的人,模型结构是怎么样的,所以需要合理地设计保存文件的格式。不同的机器学习框架都有自己的模型保存格式,例如 Keras 的模型格式是 h5,而 Tensorflow 和 onnx 的保存格式就是 protobuf。其实 protobuf 使用起来非常简单方便,就是自己先定义一https://blog.csdn.net/tugouxp/article/details/120583308本文是在这篇博客的基础上开发一个简单的数据模型,在模型上进行序列化和反序列化操作,并检验数据的正确性。

1.编写数据格式描述文件

需要注意的是,为了增加难度,我定义了具有嵌套关系的结构体结构形式。

syntax = "proto3";
package hello;

message NestObj
{
    int32 zilong1 = 6;
    string zilong2 = 7;
    int32 zilong3 = 8;
}
message UserInfo
{
    int32 id = 1;
    int32 age = 2;
    string name = 3;
    bytes raw_data = 4;
    NestObj zilong = 5;
}

2.使用protoc-c编译模型,产生模型格式C代码

protoc-c --c_out=./ zilong.proto

生成的头文件和源文件如下

/* Generated by the protocol buffer compiler.  DO NOT EDIT! */
/* Generated from: zilong.proto */

/* Do not generate deprecated warnings for self */
#ifndef PROTOBUF_C__NO_DEPRECATED
#define PROTOBUF_C__NO_DEPRECATED
#endif

#include "zilong.pb-c.h"
void   hello__nest_obj__init
                     (Hello__NestObj         *message)
{
  static Hello__NestObj init_value = HELLO__NEST_OBJ__INIT;
  *message = init_value;
}
size_t hello__nest_obj__get_packed_size
                     (const Hello__NestObj *message)
{
  assert(message->base.descriptor == &hello__nest_obj__descriptor);
  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
}
size_t hello__nest_obj__pack
                     (const Hello__NestObj *message,
                      uint8_t       *out)
{
  assert(message->base.descriptor == &hello__nest_obj__descriptor);
  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
}
size_t hello__nest_obj__pack_to_buffer
                     (const Hello__NestObj *message,
                      ProtobufCBuffer *buffer)
{
  assert(message->base.descriptor == &hello__nest_obj__descriptor);
  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
}
Hello__NestObj *
       hello__nest_obj__unpack
                     (ProtobufCAllocator  *allocator,
                      size_t               len,
                      const uint8_t       *data)
{
  return (Hello__NestObj *)
     protobuf_c_message_unpack (&hello__nest_obj__descriptor,
                                allocator, len, data);
}
void   hello__nest_obj__free_unpacked
                     (Hello__NestObj *message,
                      ProtobufCAllocator *allocator)
{
  assert(message->base.descriptor == &hello__nest_obj__descriptor);
  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
}
void   hello__user_info__init
                     (Hello__UserInfo         *message)
{
  static Hello__UserInfo init_value = HELLO__USER_INFO__INIT;
  *message = init_value;
}
size_t hello__user_info__get_packed_size
                     (const Hello__UserInfo *message)
{
  assert(message->base.descriptor == &hello__user_info__descriptor);
  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
}
size_t hello__user_info__pack
                     (const Hello__UserInfo *message,
                      uint8_t       *out)
{
  assert(message->base.descriptor == &hello__user_info__descriptor);
  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
}
size_t hello__user_info__pack_to_buffer
                     (const Hello__UserInfo *message,
                      ProtobufCBuffer *buffer)
{
  assert(message->base.descriptor == &hello__user_info__descriptor);
  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
}
Hello__UserInfo *
       hello__user_info__unpack
                     (ProtobufCAllocator  *allocator,
                      size_t               len,
                      const uint8_t       *data)
{
  return (Hello__UserInfo *)
     protobuf_c_message_unpack (&hello__user_info__descriptor,
                                allocator, len, data);
}
void   hello__user_info__free_unpacked
                     (Hello__UserInfo *message,
                      ProtobufCAllocator *allocator)
{
  assert(message->base.descriptor == &hello__user_info__descriptor);
  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
}
static const ProtobufCFieldDescriptor hello__nest_obj__field_descriptors[3] =
{
  {
    "zilong1",
    6,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_INT32,
    offsetof(Hello__NestObj, has_zilong1),
    offsetof(Hello__NestObj, zilong1),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "zilong2",
    7,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_STRING,
    0,   /* quantifier_offset */
    offsetof(Hello__NestObj, zilong2),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "zilong3",
    8,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_INT32,
    offsetof(Hello__NestObj, has_zilong3),
    offsetof(Hello__NestObj, zilong3),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
};
static const unsigned hello__nest_obj__field_indices_by_name[] = {
  0,   /* field[0] = zilong1 */
  1,   /* field[1] = zilong2 */
  2,   /* field[2] = zilong3 */
};
static const ProtobufCIntRange hello__nest_obj__number_ranges[1 + 1] =
{
  { 6, 0 },
  { 0, 3 }
};
const ProtobufCMessageDescriptor hello__nest_obj__descriptor =
{
  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
  "hello.NestObj",
  "NestObj",
  "Hello__NestObj",
  "hello",
  sizeof(Hello__NestObj),
  3,
  hello__nest_obj__field_descriptors,
  hello__nest_obj__field_indices_by_name,
  1,  hello__nest_obj__number_ranges,
  (ProtobufCMessageInit) hello__nest_obj__init,
  NULL,NULL,NULL    /* reserved[123] */
};
static const ProtobufCFieldDescriptor hello__user_info__field_descriptors[5] =
{
  {
    "id",
    1,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_INT32,
    offsetof(Hello__UserInfo, has_id),
    offsetof(Hello__UserInfo, id),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "age",
    2,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_INT32,
    offsetof(Hello__UserInfo, has_age),
    offsetof(Hello__UserInfo, age),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "name",
    3,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_STRING,
    0,   /* quantifier_offset */
    offsetof(Hello__UserInfo, name),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "raw_data",
    4,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_BYTES,
    offsetof(Hello__UserInfo, has_raw_data),
    offsetof(Hello__UserInfo, raw_data),
    NULL,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
  {
    "zilong",
    5,
    PROTOBUF_C_LABEL_OPTIONAL,
    PROTOBUF_C_TYPE_MESSAGE,
    0,   /* quantifier_offset */
    offsetof(Hello__UserInfo, zilong),
    &hello__nest_obj__descriptor,
    NULL,
    0,             /* flags */
    0,NULL,NULL    /* reserved1,reserved2, etc */
  },
};
static const unsigned hello__user_info__field_indices_by_name[] = {
  1,   /* field[1] = age */
  0,   /* field[0] = id */
  2,   /* field[2] = name */
  3,   /* field[3] = raw_data */
  4,   /* field[4] = zilong */
};
static const ProtobufCIntRange hello__user_info__number_ranges[1 + 1] =
{
  { 1, 0 },
  { 0, 5 }
};
const ProtobufCMessageDescriptor hello__user_info__descriptor =
{
  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
  "hello.UserInfo",
  "UserInfo",
  "Hello__UserInfo",
  "hello",
  sizeof(Hello__UserInfo),
  5,
  hello__user_info__field_descriptors,
  hello__user_info__field_indices_by_name,
  1,  hello__user_info__number_ranges,
  (ProtobufCMessageInit) hello__user_info__init,
  NULL,NULL,NULL    /* reserved[123] */
};
/* Generated by the protocol buffer compiler.  DO NOT EDIT! */
/* Generated from: zilong.proto */

#ifndef PROTOBUF_C_zilong_2eproto__INCLUDED
#define PROTOBUF_C_zilong_2eproto__INCLUDED

#include <protobuf-c/protobuf-c.h>

PROTOBUF_C__BEGIN_DECLS

#if PROTOBUF_C_VERSION_NUMBER < 1000000
# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers.
#elif 1002001 < PROTOBUF_C_MIN_COMPILER_VERSION
# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c.
#endif


typedef struct _Hello__NestObj Hello__NestObj;
typedef struct _Hello__UserInfo Hello__UserInfo;


/* --- enums --- */


/* --- messages --- */

struct  _Hello__NestObj
{
  ProtobufCMessage base;
  protobuf_c_boolean has_zilong1;
  int32_t zilong1;
  char *zilong2;
  protobuf_c_boolean has_zilong3;
  int32_t zilong3;
};
#define HELLO__NEST_OBJ__INIT \
 { PROTOBUF_C_MESSAGE_INIT (&hello__nest_obj__descriptor) \
    , 0,0, NULL, 0,0 }


struct  _Hello__UserInfo
{
  ProtobufCMessage base;
  protobuf_c_boolean has_id;
  int32_t id;
  protobuf_c_boolean has_age;
  int32_t age;
  char *name;
  protobuf_c_boolean has_raw_data;
  ProtobufCBinaryData raw_data;
  Hello__NestObj *zilong;
};
#define HELLO__USER_INFO__INIT \
 { PROTOBUF_C_MESSAGE_INIT (&hello__user_info__descriptor) \
    , 0,0, 0,0, NULL, 0,{0,NULL}, NULL }


/* Hello__NestObj methods */
void   hello__nest_obj__init
                     (Hello__NestObj         *message);
size_t hello__nest_obj__get_packed_size
                     (const Hello__NestObj   *message);
size_t hello__nest_obj__pack
                     (const Hello__NestObj   *message,
                      uint8_t             *out);
size_t hello__nest_obj__pack_to_buffer
                     (const Hello__NestObj   *message,
                      ProtobufCBuffer     *buffer);
Hello__NestObj *
       hello__nest_obj__unpack
                     (ProtobufCAllocator  *allocator,
                      size_t               len,
                      const uint8_t       *data);
void   hello__nest_obj__free_unpacked
                     (Hello__NestObj *message,
                      ProtobufCAllocator *allocator);
/* Hello__UserInfo methods */
void   hello__user_info__init
                     (Hello__UserInfo         *message);
size_t hello__user_info__get_packed_size
                     (const Hello__UserInfo   *message);
size_t hello__user_info__pack
                     (const Hello__UserInfo   *message,
                      uint8_t             *out);
size_t hello__user_info__pack_to_buffer
                     (const Hello__UserInfo   *message,
                      ProtobufCBuffer     *buffer);
Hello__UserInfo *
       hello__user_info__unpack
                     (ProtobufCAllocator  *allocator,
                      size_t               len,
                      const uint8_t       *data);
void   hello__user_info__free_unpacked
                     (Hello__UserInfo *message,
                      ProtobufCAllocator *allocator);
/* --- per-message closures --- */

typedef void (*Hello__NestObj_Closure)
                 (const Hello__NestObj *message,
                  void *closure_data);
typedef void (*Hello__UserInfo_Closure)
                 (const Hello__UserInfo *message,
                  void *closure_data);

/* --- services --- */


/* --- descriptors --- */

extern const ProtobufCMessageDescriptor hello__nest_obj__descriptor;
extern const ProtobufCMessageDescriptor hello__user_info__descriptor;

PROTOBUF_C__END_DECLS


#endif  /* PROTOBUF_C_zilong_2eproto__INCLUDED */

测试用例:

编译

protoc-c --c_out=./ zilong.proto
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/caozilong/Workspace/proto-c/install/lib
gcc zilong.pb-c.c main.c -I/home/caozilong/Workspace/proto-c/install/include -L/home/caozilong/Workspace/proto-c/install/lib -lprotobuf-c

运行验证:

 不明白为什么只有字符串才会序列化和反序列化成功,其他的数字类型为0,暂时到这里吧,有时间再查。

问题根因

上面的数字类型输出为0的问题,一次偶然发现了根因,原来上面我编译protoc-c的时候用的是环境的工具,它是通过APT GET 安装的,而GCC连接的的库是另一份protoc-c源码的SDK编译的,所以很可能是因为环境的proto-c和源码环境的protoc-c版本不一致造成的。

知道了原因,解决方法就简单了,用源码编译出的protoc-c重新编译zilong.proto即可

/home/caozilong/Workspace/proto-c/install/bin/protoc-c --c_out=./ zilong.proto

此时我们加重测试用例,将nest_obj也填充上。

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include "zilong.pb-c.h"

static size_t pack_data(unsigned char *out)
{
	unsigned char data[256];
	memset(data, 0x00, 256);
	int i = 0;
	for(i = 0; i < 256; i ++)
		data[i] = i;

	static Hello__UserInfo usr_obj;
	static Hello__NestObj nest_obj;
   
	memset(&usr_obj, 0x00, sizeof(Hello__UserInfo));
	memset(&nest_obj, 0x00, sizeof(Hello__NestObj));

	hello__user_info__init(&usr_obj);
	hello__nest_obj__init(&nest_obj);

	usr_obj.name = "zilongc";
	usr_obj.id = 12;
	usr_obj.age = 37;
	usr_obj.raw_data.data = data;
	usr_obj.raw_data.len = 256;
	usr_obj.zilong = &nest_obj;
	usr_obj.zilong->zilong1 = 0x5a5a5a5a;
	usr_obj.zilong->zilong2 = "zilongcao";
	usr_obj.zilong->zilong3 = 0xa5a5a5a5;

	return hello__user_info__pack(&usr_obj, out);
}

static size_t unpack_data(size_t len, const unsigned char *data)
{
	Hello__UserInfo *tmp;

	tmp = hello__user_info__unpack(NULL, len, data);

	printf("name %s.\n", tmp->name);
	printf("id %d.\n", tmp->id);
	printf("age %d.\n", tmp->age);
	printf("zlong1 %x.\n", tmp->zilong->zilong1);
	printf("zlong2 %s.\n", tmp->zilong->zilong2);
	printf("zlong3 %x.\n", tmp->zilong->zilong3);
	
	int i = 0;
	for(i = 0; i < tmp->raw_data.len; i ++)
	{
		printf("[%3d]->%3d  ", i, tmp->raw_data.data[i]);
	}

	printf("\n");
	hello__user_info__free_unpacked(tmp, NULL);
	return 0;
}

int main(void)
{
	unsigned char buff[1024];

	memset(buff, 0x00, 1024);

	size_t pack_size = pack_data(buff);
	printf("%s line %d, packsize %ld.\n", __func__, __LINE__, pack_size);
	unpack_data(pack_size, buff);
	return 0;
}

重新编译

gcc zilong.pb-c.c main.c -I/home/caozilong/Workspace/proto-c/install/include -L/home/caozilong/Workspace/proto-c/install/lib -lprotobuf-c -static

测试发现,反序列化的数据,每个域都是对的。

至此,我了解了ONNX,PB之类的格式的一些原理。

dump memory bin.

通过GDB将序列化的数据导出来。

得到序列化后的数据内容

对于嵌套MESSAGE 的处理逻辑,重点关注下面的函数实现,可以看到prefixed_message_pack其实是在protobuf_c_message_pack的环境中运行的,但是在其中又调用了protobuf_c_message_pack,这是一种第归调用,递归的退出依赖于你定义的proto中不存在结构体的循环嵌套定义,这里rv_packed_size是子MESSAGE的长度,由于打包格式是 LEN DATA,所以计算出LEN之后,需要将DATA搬动rv_packed_size大小,目的是为了给LEN域留下空间。

很自然想到,如果MESSAGE的指针为空怎么办,为了避免作无谓的序列化操作,代码中用field_is_zeroish作了检测,当遇到指针为NULL,则推出。

又很自然的想到,如果刻意制造一个循环指向,看protobuf能否处理,还是会被戏耍? 说干就干,修改proto ,在NestObj中增加一个对UserInfo的指向。

 代码也很好修改,增加33行的代码即可。

 重新生成proto c代码之后,编译程序,运行CRASH,由于我们引入了递归,怀疑是爆栈导致的。

为了确认,我们用GDB抓取CRASH时的现场:

可以看到,死机时候,调用堆栈深达160多层,并且protobuf_c_message_pack被调用了很多次,确认是堆栈溢出导致。

看来谷歌的工程师并为对这种循环检测增加检测处理,这样我们很容易“戏耍” protobuf导致其溢出。

总结:

除了ONNX格式之外,CAFFE模型也用了PROTBUF来作为格式描述,这个可以从NCNN代码中看出来,在PROTBUF版本不对导致编译失败的情况下,去掉对ONNX和CAFFE的编译就可以通过。

结束