@@ -67,17 +67,16 @@ typedef struct {
67
67
// IDs are the arbitrary identifiers used by TF Lite to identify and access
68
68
// memory buffers. They are unique in the entire TF Lite context.
69
69
int im2col_id = kTensorNotAllocated ;
70
- int padding_buffer_id = kTensorNotAllocated ;
71
- int bitpacked_weights_buffer_id = kTensorNotAllocated ;
72
70
// In node->temporaries there is a list of tensor id's that are part
73
71
// of this node in particular. The indices below are offsets into this array.
74
72
// So in pseudo-code: `node->temporaries[index] = id;`
75
73
int32_t im2col_index;
76
- int32_t padding_buffer_index;
77
- int32_t bitpacked_weights_buffer_index;
78
74
79
- bool padding_cache_filled = false ;
80
- bool bitpacked_weights = false ;
75
+ std::vector<float > padding_buffer;
76
+ bool is_padding_correction_cached = false ;
77
+
78
+ std::vector<std::uint8_t > bitpacked_weights_buffer;
79
+ bool is_weight_bitpacked = false ;
81
80
82
81
} TfLiteBConv2DParams;
83
82
@@ -228,10 +227,6 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
228
227
if (conv_params->need_im2col ) {
229
228
conv_params->im2col_index = temporaries_count++;
230
229
}
231
- if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame ) {
232
- conv_params->padding_buffer_index = temporaries_count++;
233
- }
234
- conv_params->bitpacked_weights_buffer_index = temporaries_count++;
235
230
236
231
// Allocate int array of that size
237
232
TfLiteIntArrayFree (node->temporaries );
@@ -245,20 +240,6 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
245
240
conv_params->im2col_id ;
246
241
}
247
242
}
248
-
249
- if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame ) {
250
- if (conv_params->padding_buffer_id == kTensorNotAllocated ) {
251
- context->AddTensors (context, 1 , &conv_params->padding_buffer_id );
252
- node->temporaries ->data [conv_params->padding_buffer_index ] =
253
- conv_params->padding_buffer_id ;
254
- }
255
- }
256
- if (conv_params->bitpacked_weights_buffer_id == kTensorNotAllocated ) {
257
- context->AddTensors (context, 1 ,
258
- &conv_params->bitpacked_weights_buffer_id );
259
- node->temporaries ->data [conv_params->bitpacked_weights_buffer_index ] =
260
- conv_params->bitpacked_weights_buffer_id ;
261
- }
262
243
}
263
244
264
245
// Resize the im2col tensor
@@ -299,68 +280,8 @@ TfLiteStatus Prepare(KernelType kernel_type, const int bitwidth,
299
280
context->ResizeTensor (context, im2col, im2col_size));
300
281
}
301
282
302
- // Resize the padding buffer tensor and pre-compute the cache
303
- if (conv_params->padding_type == TfLitePadding::kTfLitePaddingSame ) {
304
- TfLiteTensor* padding_buffer =
305
- GetTemporary (context, node, conv_params->padding_buffer_index );
306
-
307
- using PaddingFunctor =
308
- ce::core::PaddingFunctor<float , float , ce::core::FilterFormat::OHWI>;
309
- PaddingFunctor padding_functor;
310
-
311
- // Allocate it as a 1-D array
312
- TfLiteIntArray* padding_size = TfLiteIntArrayCreate (1 );
313
- padding_size->data [0 ] = padding_functor.get_cache_size (
314
- conv_params->filter_height , conv_params->filter_width ,
315
- conv_params->channels_out , conv_params->dilations [1 ],
316
- conv_params->dilations [2 ]);
317
-
318
- padding_buffer->type = input->type ; // currently still float
319
- padding_buffer->allocation_type = kTfLiteArenaRw ;
320
- TF_LITE_ENSURE_OK (
321
- context, context->ResizeTensor (context, padding_buffer, padding_size));
322
-
323
- // Ideally we would like to fill the cache now
324
- // However the padding_buffer is not ready yet, because the `ResizeTensor`
325
- // function only *requests* a resize but does not actually do it yet.
326
- // So we do it in Eval but only once.
327
- }
328
-
329
- // Resize the packed weight tensor
330
- if (kernel_type == KernelType::kGenericOptimized ) {
331
- TfLiteTensor* bitpacked_weights_buffer = GetTemporary (
332
- context, node, conv_params->bitpacked_weights_buffer_index );
333
-
334
- TfLiteIntArray* bitpacked_weights_shape = TfLiteIntArrayCreate (2 );
335
- if (conv_params->bitpack_before_im2col ) {
336
- bitpacked_weights_shape->data [0 ] =
337
- filter->dims ->data [0 ] * filter->dims ->data [1 ] * filter->dims ->data [2 ];
338
- const auto num_floats = filter->dims ->data [3 ];
339
- const auto num_packed_elements = (num_floats + bitwidth - 1 ) / bitwidth;
340
- bitpacked_weights_shape->data [1 ] = num_packed_elements;
341
- } else {
342
- bitpacked_weights_shape->data [0 ] = filter->dims ->data [0 ];
343
- const auto num_floats =
344
- filter->dims ->data [1 ] * filter->dims ->data [2 ] * filter->dims ->data [3 ];
345
- const auto num_packed_elements = (num_floats + bitwidth - 1 ) / bitwidth;
346
- bitpacked_weights_shape->data [1 ] = num_packed_elements;
347
- }
348
-
349
- if (bitwidth == 8 )
350
- bitpacked_weights_buffer->type = kTfLiteInt8 ;
351
- else if (bitwidth == 32 )
352
- bitpacked_weights_buffer->type = kTfLiteInt32 ;
353
- else if (bitwidth == 64 )
354
- bitpacked_weights_buffer->type = kTfLiteInt64 ;
355
- else
356
- TF_LITE_ENSURE (context, false );
357
-
358
- bitpacked_weights_buffer->allocation_type = kTfLiteArenaRw ;
359
-
360
- TF_LITE_ENSURE_OK (context,
361
- context->ResizeTensor (context, bitpacked_weights_buffer,
362
- bitpacked_weights_shape));
363
- }
283
+ conv_params->is_weight_bitpacked = false ;
284
+ conv_params->is_padding_correction_cached = false ;
364
285
365
286
return kTfLiteOk ;
366
287
}
@@ -457,28 +378,32 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
457
378
? GetTemporary (context, node, params->im2col_index )
458
379
: nullptr ;
459
380
460
- TfLiteTensor* padding_buffer =
461
- params->padding_type == TfLitePadding::kTfLitePaddingSame
462
- ? GetTemporary (context, node, params->padding_buffer_index )
463
- : nullptr ;
464
-
465
- if (!params->padding_cache_filled &&
381
+ if (!params->is_padding_correction_cached &&
466
382
params->padding_type == TfLitePadding::kTfLitePaddingSame ) {
467
383
// In the first run, fill the cache
468
384
using PaddingFunctor =
469
385
ce::core::PaddingFunctor<T, T, ce::core::FilterFormat::OHWI>;
470
386
PaddingFunctor padding_functor;
387
+
388
+ std::size_t padding_cache_size = padding_functor.get_cache_size (
389
+ params->filter_height , params->filter_width , params->channels_out ,
390
+ params->dilations [1 ], params->dilations [2 ]);
391
+
392
+ params->padding_buffer .resize (padding_cache_size);
393
+
471
394
padding_functor.cache_correction_values (
472
395
GetTensorData<T>(filter), params->filter_height , params->filter_width ,
473
396
params->channels_out , params->channels_in , params->dilations [1 ],
474
397
params->dilations [2 ], GetTensorData<T>(fused_multiply),
475
- GetTensorData<T>( padding_buffer));
476
- params->padding_cache_filled = true ;
398
+ params-> padding_buffer . data ( ));
399
+ params->is_padding_correction_cached = true ;
477
400
}
478
401
479
- TfLiteTensor* bitpacked_weights =
480
- GetTemporary (context, node, params->bitpacked_weights_buffer_index );
481
- if (!params->bitpacked_weights ) {
402
+ // Only in the first run:
403
+ // Allocate the packed weight buffer and bitpack the weights.
404
+ // Ideally we would like to use the filter buffer itself,
405
+ // but this is stored in read-only memory-mapped-files..
406
+ if (!params->is_weight_bitpacked ) {
482
407
// The filters have shape
483
408
// [output channels, height, width, input channels]
484
409
// and we now view it as a matrix of shape
@@ -504,14 +429,11 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
504
429
505
430
size_t num_bytes = filter_data_bp.size () * sizeof (TBitpacked);
506
431
507
- if (num_bytes != bitpacked_weights->bytes ) {
508
- context->ReportError (context,
509
- " Error in computation of filter bitpacking size." );
510
- } else {
511
- memcpy (GetTensorData<TBitpacked>(bitpacked_weights),
512
- filter_data_bp.data (), num_bytes);
513
- }
514
- params->bitpacked_weights = true ;
432
+ params->bitpacked_weights_buffer .resize (num_bytes);
433
+ memcpy (params->bitpacked_weights_buffer .data (), filter_data_bp.data (),
434
+ num_bytes);
435
+
436
+ params->is_weight_bitpacked = true ;
515
437
}
516
438
517
439
// Using the standard TF Lite ConvParams struct.
@@ -526,11 +448,12 @@ void EvalOpt(TfLiteContext* context, TfLiteNode* node,
526
448
// weights data
527
449
BConv2D<T, TBitpacked>(
528
450
op_params, GetTensorShape (input), GetTensorData<T>(input),
529
- GetTensorShape (filter), GetTensorData<TBitpacked>(bitpacked_weights),
451
+ GetTensorShape (filter),
452
+ reinterpret_cast <TBitpacked*>(params->bitpacked_weights_buffer .data ()),
530
453
GetTensorData<float >(fused_multiply), GetTensorData<float >(fused_add),
531
454
GetTensorShape (output), GetTensorData<T>(output), GetTensorShape (im2col),
532
455
GetTensorData<T>(im2col), params->bitpack_before_im2col ,
533
- GetTensorData<T>( padding_buffer),
456
+ params-> padding_buffer . data ( ),
534
457
CpuBackendContext::GetFromContext (context));
535
458
}
536
459
0 commit comments