Skip to content

Commit dd321f0

Browse files
authored
Use vectors instead of tflite temp buffers (#247)
* Use vectors instead of tflite temp buffers
1 parent a48034d commit dd321f0

File tree

1 file changed

+30
-107
lines changed

1 file changed

+30
-107
lines changed

larq_compute_engine/tflite/kernels/bconv2d.cc

Lines changed: 30 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,16 @@ typedef struct {
6767
// IDs are the arbitrary identifiers used by TF Lite to identify and access
6868
// memory buffers. They are unique in the entire TF Lite context.
6969
int im2col_id = kTensorNotAllocated;
70-
int padding_buffer_id = kTensorNotAllocated;
71-
int bitpacked_weights_buffer_id = kTensorNotAllocated;
7270
// In node->temporaries there is a list of tensor id's that are part
7371
// of this node in particular. The indices below are offsets into this array.
7472
// So in pseudo-code: `node->temporaries[index] = id;`
7573
int32_t im2col_index;
76-
int32_t padding_buffer_index;
77-
int32_t bitpacked_weights_buffer_index;
7874

79-
bool padding_cache_filled = false;
80-
bool bitpacked_weights = false;
75+
std::vector<float> padding_buffer;
76+
bool is_padding_correction_cached = false;
77+
78+
std::vector<std::uint8_t> bitpacked_weights_buffer;
79+
bool is_weight_bitpacked = false;
8180

8281
} TfLiteBConv2DParams;
8382

@@ -228,10 +227,6 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
228227
if (conv_params->need_im2col) {
229228
conv_params->im2col_index = temporaries_count++;
230229
}
231-
if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame) {
232-
conv_params->padding_buffer_index = temporaries_count++;
233-
}
234-
conv_params->bitpacked_weights_buffer_index = temporaries_count++;
235230

236231
// Allocate int array of that size
237232
TfLiteIntArrayFree(node->temporaries);
@@ -245,20 +240,6 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
245240
conv_params->im2col_id;
246241
}
247242
}
248-
249-
if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame) {
250-
if (conv_params->padding_buffer_id == kTensorNotAllocated) {
251-
context->AddTensors(context, 1, &conv_params->padding_buffer_id);
252-
node->temporaries->data[conv_params->padding_buffer_index] =
253-
conv_params->padding_buffer_id;
254-
}
255-
}
256-
if (conv_params->bitpacked_weights_buffer_id == kTensorNotAllocated) {
257-
context->AddTensors(context, 1,
258-
&conv_params->bitpacked_weights_buffer_id);
259-
node->temporaries->data[conv_params->bitpacked_weights_buffer_index] =
260-
conv_params->bitpacked_weights_buffer_id;
261-
}
262243
}
263244

264245
// Resize the im2col tensor
@@ -299,68 +280,8 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
299280
context->ResizeTensor(context, im2col, im2col_size));
300281
}
301282

302-
// Resize the padding buffer tensor and pre-compute the cache
303-
if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame) {
304-
TfLiteTensor* padding_buffer =
305-
GetTemporary(context, node, conv_params->padding_buffer_index);
306-
307-
using PaddingFunctor =
308-
ce::core::PaddingFunctor<float, float, ce::core::FilterFormat::OHWI>;
309-
PaddingFunctor padding_functor;
310-
311-
// Allocate it as a 1-D array
312-
TfLiteIntArray* padding_size = TfLiteIntArrayCreate(1);
313-
padding_size->data[0] = padding_functor.get_cache_size(
314-
conv_params->filter_height, conv_params->filter_width,
315-
conv_params->channels_out, conv_params->dilations[1],
316-
conv_params->dilations[2]);
317-
318-
padding_buffer->type = input->type; // currently still float
319-
padding_buffer->allocation_type = kTfLiteArenaRw;
320-
TF_LITE_ENSURE_OK(
321-
context, context->ResizeTensor(context, padding_buffer, padding_size));
322-
323-
// Ideally we would like to fill the cache now
324-
// However the padding_buffer is not ready yet, because the `ResizeTensor`
325-
// function only *requests* a resize but does not actually do it yet.
326-
// So we do it in Eval but only once.
327-
}
328-
329-
// Resize the packed weight tensor
330-
if (kernel_type == KernelType::kGenericOptimized) {
331-
TfLiteTensor* bitpacked_weights_buffer = GetTemporary(
332-
context, node, conv_params->bitpacked_weights_buffer_index);
333-
334-
TfLiteIntArray* bitpacked_weights_shape = TfLiteIntArrayCreate(2);
335-
if (conv_params->bitpack_before_im2col) {
336-
bitpacked_weights_shape->data[0] =
337-
filter->dims->data[0] * filter->dims->data[1] * filter->dims->data[2];
338-
const auto num_floats = filter->dims->data[3];
339-
const auto num_packed_elements = (num_floats + bitwidth - 1) / bitwidth;
340-
bitpacked_weights_shape->data[1] = num_packed_elements;
341-
} else {
342-
bitpacked_weights_shape->data[0] = filter->dims->data[0];
343-
const auto num_floats =
344-
filter->dims->data[1] * filter->dims->data[2] * filter->dims->data[3];
345-
const auto num_packed_elements = (num_floats + bitwidth - 1) / bitwidth;
346-
bitpacked_weights_shape->data[1] = num_packed_elements;
347-
}
348-
349-
if (bitwidth == 8)
350-
bitpacked_weights_buffer->type = kTfLiteInt8;
351-
else if (bitwidth == 32)
352-
bitpacked_weights_buffer->type = kTfLiteInt32;
353-
else if (bitwidth == 64)
354-
bitpacked_weights_buffer->type = kTfLiteInt64;
355-
else
356-
TF_LITE_ENSURE(context, false);
357-
358-
bitpacked_weights_buffer->allocation_type = kTfLiteArenaRw;
359-
360-
TF_LITE_ENSURE_OK(context,
361-
context->ResizeTensor(context, bitpacked_weights_buffer,
362-
bitpacked_weights_shape));
363-
}
283+
conv_params->is_weight_bitpacked = false;
284+
conv_params->is_padding_correction_cached = false;
364285

365286
return kTfLiteOk;
366287
}
@@ -457,28 +378,32 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
457378
? GetTemporary(context, node, params->im2col_index)
458379
: nullptr;
459380

460-
TfLiteTensor* padding_buffer =
461-
params->padding_type == TfLitePadding::kTfLitePaddingSame
462-
? GetTemporary(context, node, params->padding_buffer_index)
463-
: nullptr;
464-
465-
if (!params->padding_cache_filled &&
381+
if (!params->is_padding_correction_cached &&
466382
params->padding_type == TfLitePadding::kTfLitePaddingSame) {
467383
// In the first run, fill the cache
468384
using PaddingFunctor =
469385
ce::core::PaddingFunctor<T, T, ce::core::FilterFormat::OHWI>;
470386
PaddingFunctor padding_functor;
387+
388+
std::size_t padding_cache_size = padding_functor.get_cache_size(
389+
params->filter_height, params->filter_width, params->channels_out,
390+
params->dilations[1], params->dilations[2]);
391+
392+
params->padding_buffer.resize(padding_cache_size);
393+
471394
padding_functor.cache_correction_values(
472395
GetTensorData<T>(filter), params->filter_height, params->filter_width,
473396
params->channels_out, params->channels_in, params->dilations[1],
474397
params->dilations[2], GetTensorData<T>(fused_multiply),
475-
GetTensorData<T>(padding_buffer));
476-
params->padding_cache_filled = true;
398+
params->padding_buffer.data());
399+
params->is_padding_correction_cached = true;
477400
}
478401

479-
TfLiteTensor* bitpacked_weights =
480-
GetTemporary(context, node, params->bitpacked_weights_buffer_index);
481-
if (!params->bitpacked_weights) {
402+
// Only in the first run:
403+
// Allocate the packed weight buffer and bitpack the weights.
404+
// Ideally we would like to use the filter buffer itself,
405+
// but this is stored in read-only memory-mapped-files..
406+
if (!params->is_weight_bitpacked) {
482407
// The filters have shape
483408
// [output channels, height, width, input channels]
484409
// and we now view it as a matrix of shape
@@ -504,14 +429,11 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
504429

505430
size_t num_bytes = filter_data_bp.size() * sizeof(TBitpacked);
506431

507-
if (num_bytes != bitpacked_weights->bytes) {
508-
context->ReportError(context,
509-
"Error in computation of filter bitpacking size.");
510-
} else {
511-
memcpy(GetTensorData<TBitpacked>(bitpacked_weights),
512-
filter_data_bp.data(), num_bytes);
513-
}
514-
params->bitpacked_weights = true;
432+
params->bitpacked_weights_buffer.resize(num_bytes);
433+
memcpy(params->bitpacked_weights_buffer.data(), filter_data_bp.data(),
434+
num_bytes);
435+
436+
params->is_weight_bitpacked = true;
515437
}
516438

517439
// Using the standard TF Lite ConvParams struct.
@@ -526,11 +448,12 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
526448
// weights data
527449
BConv2D<T, TBitpacked>(
528450
op_params, GetTensorShape(input), GetTensorData<T>(input),
529-
GetTensorShape(filter), GetTensorData<TBitpacked>(bitpacked_weights),
451+
GetTensorShape(filter),
452+
reinterpret_cast<TBitpacked*>(params->bitpacked_weights_buffer.data()),
530453
GetTensorData<float>(fused_multiply), GetTensorData<float>(fused_add),
531454
GetTensorShape(output), GetTensorData<T>(output), GetTensorShape(im2col),
532455
GetTensorData<T>(im2col), params->bitpack_before_im2col,
533-
GetTensorData<T>(padding_buffer),
456+
params->padding_buffer.data(),
534457
CpuBackendContext::GetFromContext(context));
535458
}
536459

0 commit comments

Comments
 (0)