// Protocol description for Tesseract // Compile this file with the Protocol Compiler protoc to generate // the files tfnetwork.pb.cc and tfnetwork.pb.h. // This requires the protocol descriptions for TensorFlow // (included in the TensorFlow sources). // With TensorFlow sources installed in /usr/src/tensorflow/tensorflow-1.10.1, // this command was used on Debian to generate the files: // protoc --cpp_out=$PWD --proto_path=/usr/src/tensorflow/tensorflow-1.10.1 \ // --proto_path=$PWD src/lstm/tfnetwork.proto syntax = "proto3"; package tesseract; import "tensorflow/core/framework/graph.proto"; // This proto is the interface between a python TF graph builder/trainer and // the C++ world. The writer of this proto must provide fields as documented // by the comments below. // The graph must have a placeholder for NetworkIO, Widths and Heights. The // following python code creates the appropriate placeholders: // // input_layer = tf.placeholder(tf.float32, // shape=[batch_size, xsize, ysize, depth_dim], // name='NetworkIO') // widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths') // heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights') // # Flip x and y to the TF convention. // input_layer = tf.transpose(input_layer, [0, 2, 1, 3]) // // The widths and heights will be set to indicate the post-scaling size of the // input image(s). // For now batch_size is ignored and set to 1. // The graph should return a 2-dimensional float32 tensor called 'softmax' of // shape [sequence_length, num_classes], where sequence_length is allowed to // be variable, given by the tensor itself. // TODO(rays) determine whether it is worth providing for batch_size >1 and if // so, how. message TFNetworkModel { // The TF graph definition. Required. tensorflow.GraphDef graph = 1; // The training index. Required to be > 0. int64 global_step = 2; // The original network definition for reference. Optional string spec = 3; // Input tensor parameters. // Values per pixel. Required to be 1 or 3. Inputs assumed to be float32. int32 depth = 4; // Image size. Required. Zero implies flexible sizes, fixed if non-zero. // If x_size > 0, images will be cropped/padded to the given size, after // any scaling required by the y_size. // If y_size > 0, images will be scaled isotropically to the given height. int32 x_size = 5; int32 y_size = 6; // Number of images in a batch. Optional. int32 batch_size = 8; // Output tensor parameters. // Number of output classes. Required to match the depth of the softmax. int32 num_classes = 9; // True if this network needs CTC-like decoding, dropping duplicated labels. // The decoder always drops the null character. bool using_ctc = 10; // Name of input image tensor. string image_input = 11; // Name of image height and width tensors. string image_widths = 12; string image_heights = 13; // Name of output (softmax) tensor. string output_layer = 14; }