Skip to content

Commit 9d14472

Browse files
committed
remove additional enncode method in vision encoder
1 parent 19e8b28 commit 9d14472

File tree

14 files changed

+25
-20
lines changed

14 files changed

+25
-20
lines changed

src/cpp/src/visual_language/internvl_chat/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ ov::Tensor get_pixel_values_internvl(const ov::Tensor& image, const ProcessorCon
128128

129129
} // namespace
130130

131-
EncodedImage VisionEncoderInternVLChat::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
131+
EncodedImage VisionEncoderInternVLChat::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
132+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
133+
ov::InferRequest& encoder = infer_request_guard.get();
132134
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
133135

134136
ov::Tensor pixel_values = get_pixel_values_internvl(image, config);

src/cpp/src/visual_language/internvl_chat/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class VisionEncoderInternVLChat : public VisionEncoder {
1616
public:
1717
using VisionEncoder::VisionEncoder;
1818

19-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
19+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
2020
};
2121

2222
class InputsEmbedderInternVLChat : public InputsEmbedder::IInputsEmbedder {

src/cpp/src/visual_language/llava/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ ov::Tensor get_pixel_values_llava(const ov::Tensor& image, const ProcessorConfig
6868

6969
} // namespace
7070

71-
EncodedImage VisionEncoderLLaVA::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
71+
EncodedImage VisionEncoderLLaVA::encode( const ov::Tensor& image, const ov::AnyMap& config_map) {
72+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
73+
ov::InferRequest& encoder = infer_request_guard.get();
7274
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
7375

7476
ov::Tensor pixel_values = get_pixel_values_llava(image, config);

src/cpp/src/visual_language/llava/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class VisionEncoderLLaVA : public VisionEncoder {
1616
public:
1717
using VisionEncoder::VisionEncoder;
1818

19-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
19+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
2020
};
2121

2222
class InputsEmbedderLLaVA : public InputsEmbedder::IInputsEmbedder {

src/cpp/src/visual_language/llava_next/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ ov::Tensor get_pixel_values_llava_next(const ov::Tensor& image, const ProcessorC
4949

5050
} // namespace
5151

52-
EncodedImage VisionEncoderLLaVANext::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
52+
EncodedImage VisionEncoderLLaVANext::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
53+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
54+
ov::InferRequest& encoder = infer_request_guard.get();
5355
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
5456

5557
ov::Tensor pixel_values = get_pixel_values_llava_next(image, config);

src/cpp/src/visual_language/llava_next/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class VisionEncoderLLaVANext : public VisionEncoder {
1515
public:
1616
using VisionEncoder::VisionEncoder;
1717

18-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
18+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
1919
};
2020

2121
class InputsEmbedderLLaVANext : public InputsEmbedderLLaVA {

src/cpp/src/visual_language/minicpm/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,9 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
403403

404404
} // namespace
405405

406-
EncodedImage VisionEncoderMiniCPM::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
406+
EncodedImage VisionEncoderMiniCPM::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
407+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
408+
ov::InferRequest& encoder = infer_request_guard.get();
407409
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
408410

409411
clip_ctx ctx_clip;

src/cpp/src/visual_language/minicpm/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class VisionEncoderMiniCPM : public VisionEncoder {
1616
public:
1717
using VisionEncoder::VisionEncoder;
1818

19-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
19+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
2020
};
2121

2222
class InputsEmbedderMiniCPM : public InputsEmbedder::IInputsEmbedder {

src/cpp/src/visual_language/phi3_vision/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ std::tuple<ov::Tensor, ImageSize> get_pixel_values_phi3_v(const ov::Tensor& imag
211211

212212
} // namespace
213213

214-
EncodedImage VisionEncoderPhi3V::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
214+
EncodedImage VisionEncoderPhi3V::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
215+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
216+
ov::InferRequest& encoder = infer_request_guard.get();
215217
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
216218

217219
const auto& [pixel_values, image_size] = get_pixel_values_phi3_v(image, config);

src/cpp/src/visual_language/phi3_vision/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class VisionEncoderPhi3V : public VisionEncoder {
1616
public:
1717
using VisionEncoder::VisionEncoder;
1818

19-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
19+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
2020
};
2121

2222
class InputsEmbedderPhi3V : public InputsEmbedder::IInputsEmbedder {

src/cpp/src/visual_language/qwen2vl/classes.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ ov::Tensor transpose_image_patches_qwen2vl(const ov::Tensor& reshaped_patches) {
169169

170170
} // namespace
171171

172-
EncodedImage VisionEncoderQwen2VL::encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) {
172+
EncodedImage VisionEncoderQwen2VL::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
173+
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
174+
ov::InferRequest& encoder = infer_request_guard.get();
173175
ProcessorConfig config = utils::from_any_map(config_map, m_processor_config);
174176

175177
ov::Shape image_shape = image.get_shape();

src/cpp/src/visual_language/qwen2vl/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class VisionEncoderQwen2VL : public VisionEncoder {
1616
public:
1717
using VisionEncoder::VisionEncoder;
1818

19-
EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map) override;
19+
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map) override;
2020
};
2121

2222
class InputsEmbedderQwen2VL : public InputsEmbedder::IInputsEmbedder {

src/cpp/src/visual_language/vision_encoder.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ VisionEncoder::VisionEncoder(
4141
m_processor_config = utils::from_config_json_if_exists<ProcessorConfig>(config_dir_path, "preprocessor_config.json");
4242
}
4343

44-
EncodedImage VisionEncoder::encode(const ov::Tensor& image, const ov::AnyMap& config_map) {
45-
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard(this->m_ireq_queue_vision_encoder.get());
46-
return encode(infer_request_guard.get(), image, config_map);
47-
}
48-
4944
ProcessorConfig VisionEncoder::get_processor_config() const {
5045
return m_processor_config;
5146
}

src/cpp/src/visual_language/vision_encoder.hpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class VisionEncoder {
8787
/// instead of the config obtained in constructors.
8888
/// @return Resulting embeddings for the resized source image and
8989
/// its slices.
90-
EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map = {});
90+
virtual EncodedImage encode(const ov::Tensor& image, const ov::AnyMap& config_map = {}) = 0;
9191

9292
/// @brief Gets processor config
9393
/// @return Processor config
@@ -100,8 +100,6 @@ class VisionEncoder {
100100
/// @brief A config to follow.
101101
ProcessorConfig m_processor_config;
102102

103-
virtual EncodedImage encode(ov::InferRequest& encoder, const ov::Tensor& image, const ov::AnyMap& config_map = {}) = 0;
104-
105103
public:
106104
VisionEncoder(
107105
const std::filesystem::path& model_dir,

0 commit comments

Comments
 (0)