// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
#include "structural_svm_object_detection_problem_abstract.h"
#include "../image_processing/object_detector_abstract.h"
#include "../image_processing/box_overlap_testing_abstract.h"
#include "../image_processing/full_object_detection_abstract.h"
#include <chrono>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_scanner_type
>
class structural_object_detection_trainer : noncopyable
{
/*!
REQUIREMENTS ON image_scanner_type
image_scanner_type must be an implementation of
dlib/image_processing/scan_fhog_pyramid_abstract.h or
dlib/image_processing/scan_image_custom_abstract.h or
dlib/image_processing/scan_image_pyramid_abstract.h or
dlib/image_processing/scan_image_boxes_abstract.h
WHAT THIS OBJECT REPRESENTS
This object is a tool for learning to detect objects in images based on a
set of labeled images. The training procedure produces an object_detector
which can be used to predict the locations of objects in new images.
Note that this is just a convenience wrapper around the structural_svm_object_detection_problem
to make it look similar to all the other trainers in dlib.
!*/
public:
typedef double scalar_type;
typedef default_memory_manager mem_manager_type;
typedef object_detector<image_scanner_type> trained_function_type;
explicit structural_object_detection_trainer (
const image_scanner_type& scanner
);
/*!
requires
- scanner.get_num_detection_templates() > 0
ensures
- #get_c() == 1
- this object isn't verbose
- #get_epsilon() == 0.1
- #get_num_threads() == 2
- #get_max_cache_size() == 5
- #get_match_eps() == 0.5
- #get_loss_per_missed_target() == 1
- #get_loss_per_false_alarm() == 1
- This object will attempt to learn a model for the given
scanner object when train() is called.
- #get_scanner() == scanner
(note that only the "configuration" of scanner is copied.
I.e. the copy is done using copy_configuration())
- #auto_set_overlap_tester() == true
- #get_max_runtime() == std::chrono::hours(24*356*290)
(i.e. 290 years, so basically forever)
!*/
const image_scanner_type& get_scanner (
) const;
/*!
ensures
- returns the image scanner used by this object.
!*/
bool auto_set_overlap_tester (
) const;
/*!
ensures
- if (this object will automatically determine an appropriate
state for the overlap tester used for non-max suppression.) then
- returns true
- In this case, it is determined using the find_tight_overlap_tester()
routine based on the truth_object_detections given to the
structural_object_detection_trainer::train() method.
- else
- returns false
!*/
void set_overlap_tester (
const test_box_overlap& tester
);
/*!
ensures
- #get_overlap_tester() == tester
- #auto_set_overlap_tester() == false
!*/
test_box_overlap get_overlap_tester (
) const;
/*!
requires
- auto_set_overlap_tester() == false
ensures
- returns the overlap tester object which will be used to perform non-max suppression.
In particular, this function returns the overlap tester which will populate the
object_detector returned by train().
!*/
void set_num_threads (
unsigned long num
);
/*!
ensures
- #get_num_threads() == num
!*/
unsigned long get_num_threads (
) const;
/*!
ensures
- returns the number of threads used during training. You should
usually set this equal to the number of processing cores on your
machine.
!*/
void set_epsilon (
scalar_type eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer
to train. You can think of this epsilon value as saying "solve the
optimization problem until the average loss per sample is within epsilon
of its optimal value".
!*/
void set_max_runtime (
const std::chrono::nanoseconds& max_runtime
);
/*!
ensures
- #get_max_runtime() == max_runtime
!*/
std::chrono::nanoseconds get_max_runtime (
) const;
/*!
ensures
- returns the maximum amount of time we will let .train() run before
making it terminate.
!*/
void set_max_cache_size (
unsigned long max_size
);
/*!
ensures
- #get_max_cache_size() == max_size
!*/
unsigned long get_max_cache_size (
) const;
/*!
ensures
- During training, this object basically runs the object detector on
each image, over and over. To speed this up, it is possible to cache
the results of these detector invocations. This function returns the
number of cache elements per training sample kept in the cache. Note
that a value of 0 means caching is not used at all. Note also that
each cache element takes up about sizeof(double)*scanner.get_num_dimensions()
memory (where scanner is the scanner given to this object's constructor).
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
void set_oca (
const oca& item
);
/*!
ensures
- #get_oca() == item
!*/
const oca get_oca (
) const;
/*!
ensures
- returns a copy of the optimizer used to solve the structural SVM problem.
!*/
void set_c (
scalar_type C
);
/*!
requires
- C > 0
ensures
- #get_c() = C
!*/
const scalar_type get_c (
) const;
/*!
ensures
- returns the SVM regularization parameter. It is the parameter
that determines the trade-off between trying to fit the training
data (i.e. minimize the loss) or allowing more errors but hopefully
improving the generalization of the resulting detector. Larger
values encourage exact fitting while smaller values of C may encourage
better generalization.
!*/
void set_match_eps (
double eps
);
/*!
requires
- 0 < eps < 1
ensures
- #get_match_eps() == eps
!*/
double get_match_eps (
) const;
/*!
ensures
- returns the amount of alignment necessary for a detection to be considered
as matching with a ground truth rectangle. If it doesn't match then
it is considered to be a false alarm. To define this precisely, let
A and B be two rectangles, then A and B match if and only if:
A.intersect(B).area()/(A+B).area() > get_match_eps()
!*/
double get_loss_per_missed_target (
) const;
/*!
ensures
- returns the amount of loss experienced for failing to detect one of the
targets. If you care more about finding targets than having a low false
alarm rate then you can increase this value.
!*/
void set_loss_per_missed_target (
double loss
);
/*!
requires
- loss > 0
ensures
- #get_loss_per_missed_target() == loss
!*/
double get_loss_per_false_alarm (
) const;
/*!
ensures
- returns the amount of loss experienced for emitting a false alarm detection.
Or in other words, the loss for generating a detection that doesn't correspond
to one of the truth rectangles. If you care more about having a low false
alarm rate than finding all the targets then you can increase this value.
!*/
void set_loss_per_false_alarm (
double loss
);
/*!
requires
- loss > 0
ensures
- #get_loss_per_false_alarm() == loss
!*/
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_object_detections
) const;
/*!
requires
- is_learning_problem(images, truth_object_detections) == true
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- for all valid i, j:
- truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
- all_parts_in_rect(truth_object_detections[i][j]) == true
ensures
- Uses the structural_svm_object_detection_problem to train an object_detector
on the given images and truth_object_detections.
- returns a function F with the following properties:
- F(new_image) == A prediction of what objects are present in new_image. This
is a set of rectangles indicating their positions.
!*/
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_object_detections
) const;
/*!
requires
- is_learning_problem(images, truth_object_detections) == true
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- get_scanner().get_num_movable_components_per_detection_template() == 0
ensures
- This function is identical to the above train(), except that it converts
each element of truth_object_detections into a full_object_detection by
passing it to full_object_detection's constructor taking only a rectangle.
Therefore, this version of train() is a convenience function for for the
case where you don't have any movable components of the detection templates.
!*/
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_object_detections,
const std::vector<std::vector<rectangle> >& ignore,
const test_box_overlap& ignore_overlap_tester = test_box_overlap()
) const;
/*!
requires
- is_learning_problem(images, truth_object_detections) == true
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- ignore.size() == images.size()
- for all valid i, j:
- truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
- all_parts_in_rect(truth_object_detections[i][j]) == true
ensures
- Uses the structural_svm_object_detection_problem to train an object_detector
on the given images and truth_object_detections.
- for all valid i:
- Within images[i] any detections that match against a rectangle in
ignore[i], according to ignore_overlap_tester, are ignored. That is,
the optimizer doesn't care if the detector outputs a detection that
matches any of the ignore rectangles or if it fails to output a
detection for an ignore rectangle. Therefore, if there are objects
in your dataset that you are unsure if you want to detect or otherwise
don't care if the detector gets or doesn't then you can mark them
with ignore rectangles and the optimizer will simply ignore them.
- returns a function F with the following properties:
- F(new_image) == A prediction of what objects are present in new_image. This
is a set of rectangles indicating their positions.
!*/
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_object_detections,
const std::vector<std::vector<rectangle> >& ignore,
const test_box_overlap& ignore_overlap_tester = test_box_overlap()
) const;
/*!
requires
- is_learning_problem(images, truth_object_detections) == true
- ignore.size() == images.size()
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- get_scanner().get_num_movable_components_per_detection_template() == 0
ensures
- This function is identical to the above train(), except that it converts
each element of truth_object_detections into a full_object_detection by
passing it to full_object_detection's constructor taking only a rectangle.
Therefore, this version of train() is a convenience function for for the
case where you don't have any movable components of the detection templates.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_