In the last few decades, signifcant achievements have been attained in predicting where humans look at images through different computational models. However, how to determine contributions of different visual features to overall saliency still remains an open problem. To overcome this issue, a recent class of models formulates saliency estimation as a supervised learning problem and accordingly apply machine learning techniques. In this paper, we also address this challenging problem and propose to use multiple kernel learning (MKL) to combine information coming from different feature dimensions and to perform integration at an intermediate level. Besides, we suggest to use responses of a recently proposed filterbank of object detectors, known as Object-Bank, as additional semantic high-level features. Here we show that our MKL-based framework together with the proposed object-specific features provide state-of-the-art performance as compared to SVM or AdaBoost-based saliency models.
@article{DBLP:journals/corr/KavakEE13,
author = {Yasin Kavak and Erkut Erdem and Aykut Erdem},
title = {Visual saliency estimation by integrating features using multiple kernel learning},
journal = {CoRR},
volume = {abs/1307.5693},
year = {2013},
ee = {http://arxiv.org/abs/1307.5693},
bibsource = {DBLP, http://dblp.uni-trier.de}
}