Pārlūkot izejas kodu

Broken video stabilization.

jcsyshc 1 gadu atpakaļ
vecāks
revīzija
2fab8aa84d

+ 1 - 1
CMakeLists.txt

@@ -116,7 +116,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LE
 
 # OpenCV config
 cmake_policy(SET CMP0146 OLD)
-find_package(OpenCV REQUIRED COMPONENTS cudaimgproc cudawarping calib3d imgcodecs opencv_cudastereo)
+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc cudawarping calib3d imgcodecs opencv_cudastereo cudaoptflow)
 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
 

+ 3 - 3
src/core/cuda_helper.hpp

@@ -62,9 +62,9 @@ inline cudaStream_t cuda_stream(smart_cuda_stream *stream = nullptr) {
     return (stream == nullptr) ? nullptr : stream->cuda;
 }
 
-inline cv::cuda::Stream &cv_stream(smart_cuda_stream *stream = nullptr) {
-    return (stream == nullptr) ? cv::cuda::Stream::Null() : stream->cv;
-}
+// inline cv::cuda::Stream &cv_stream(smart_cuda_stream *stream = nullptr) {
+//     return (stream == nullptr) ? cv::cuda::Stream::Null() : stream->cv;
+// }
 
 template<typename T>
 inline void extend_pointer_life(const std::shared_ptr<T> &ptr,

+ 1 - 0
src/image_process_v5/CMakeLists.txt

@@ -4,6 +4,7 @@ target_sources(${PROJECT_NAME} PRIVATE
         image_process.cpp
         osg_helper.cpp
         sp_image.cpp
+        video_stabilization.cpp
         process_python/fast_sam.cpp
         process_python/rotate_registration.cpp
 )

+ 5 - 5
src/image_process_v5/image_enhance.cpp

@@ -65,11 +65,11 @@ struct image_enhance_ui::impl {
         auto disable_guard = imgui_disable_guard(ui_conf->passthrough);
         ImGui::DragFloat("Luminance Target", &ui_conf->opts.luminance_target,
                          0.002, 0, 2, "%.3f");
-        float eps_db = 10 * log10f(ui_conf->opts.black_eps_factor);
-        if (ImGui::DragFloat("Black Level (dB)", &eps_db,
-                             0.5, -70, -10, "%.1f")) {
-            ui_conf->opts.black_eps_factor = exp10f(eps_db / 10);
-        }
+        // float eps_db = 10 * log10f(ui_conf->opts.black_eps_factor);
+        // if (ImGui::DragFloat("Black Level (dB)", &eps_db,
+        //                      0.5, -70, -10, "%.1f")) {
+        //     ui_conf->opts.black_eps_factor = exp10f(eps_db / 10);
+        // }
     }
 
     explicit impl(const create_config _conf) : conf(_conf) {

+ 1 - 1
src/image_process_v5/image_enhance.h

@@ -4,7 +4,7 @@
 #include "sp_image.h"
 
 struct image_enhance_options {
-    float black_eps_factor = 0.01;
+    float [[deprecated]] black_eps_factor = 0.01;
     float luminance_target = 0.36;
 };
 

+ 56 - 2
src/image_process_v5/image_process.cpp

@@ -53,17 +53,28 @@ sp_image nv12_chrome_view(const sp_image &img) {
     return img_chrome.cast_view(CV_8UC2);
 }
 
+cv_stream_guard::cv_stream_guard() {
+    push_cuda_stream((cudaStream_t) get_cv_stream().cudaPtr());
+}
+
+cv_stream_guard::~cv_stream_guard() {
+    pop_cuda_stream();
+}
+
+cv::cuda::Stream &cv_stream_guard::cv_stream() {
+    return get_cv_stream();
+}
+
 namespace {
     struct image_opencv_cuda_helper {
         const sp_image *read;
         sp_image *write;
         using proxy_type = auto_memory_info::cuda_proxy;
-        cuda_stream_guard stream_guard;
+        cv_stream_guard stream_guard;
         pair_access_helper<proxy_type, proxy_type> access_helper;
 
         image_opencv_cuda_helper(const sp_image &src, sp_image &dst)
             : read(&src), write(&dst),
-              stream_guard((cudaStream_t) get_cv_stream().cudaPtr()),
               access_helper(read->cuda(), write->cuda()) { (void) 0; }
 
         [[nodiscard]] cv::cuda::GpuMat input() const {
@@ -148,6 +159,39 @@ sp_image image_translate(const sp_image &img, const glm::vec2 offset) {
     return image_warp_affine(img, matrix);
 }
 
+image_stereo_pair image_stereo_split(const sp_image &img) {
+    assert(img.width() % 2 == 0);
+    const auto mono_size = cv::Size(img.width() / 2, img.height());
+    auto ret_left = sp_image::create(img.cv_type(), mono_size);
+    const auto img_left = img.sub_view(mono_size);
+    copy_sp_image(img_left, ret_left);
+    auto ret_right = sp_image::create(img.cv_type(), mono_size);
+    const auto img_right = img.sub_view(mono_size,
+                                        cv::Size(img.width() / 2, 0));
+    copy_sp_image(img_right, ret_right);
+    return std::make_tuple(ret_left, ret_right);
+}
+
+image_stereo_pair image_stereo_split_view(const sp_image &img) {
+    assert(img.width() % 2 == 0);
+    const auto mono_size = cv::Size(img.width() / 2, img.height());
+    const auto img_left = img.sub_view(mono_size);
+    const auto img_right = img.sub_view(mono_size,
+                                        cv::Size(img.width() / 2, 0));
+    return std::make_tuple(img_left, img_right);
+}
+
+sp_image image_stereo_combine(const sp_image &left, const sp_image &right) {
+    assert(left.cv_type() == right.cv_type());
+    assert(left.shape_array() == right.shape_array());
+    const auto stereo_shape = cv::Size(left.width() * 2, left.height());
+    auto ret_img = sp_image::create(left.cv_type(), stereo_shape);
+    auto [left_view, right_view] = image_stereo_split_view(ret_img);
+    copy_sp_image(left, left_view);
+    copy_sp_image(right, right_view);
+    return ret_img;
+}
+
 #include "image_process/cuda_impl/pixel_convert.cuh"
 
 namespace {
@@ -192,6 +236,16 @@ sp_image image_rgb_to_bgra(const sp_image &img) {
     return ret;
 }
 
+sp_image image_rgb_to_gray(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC3);
+    auto ret = sp_image::create<uchar1>(img.cv_size());
+    const auto helper = image_opencv_cuda_helper(img, ret);
+    cv::cuda::cvtColor(helper.input(), helper.output(),
+                       cv::COLOR_RGB2GRAY, 1, get_cv_stream());
+    ret.merge_meta(img);
+    return ret;
+}
+
 sp_image image_rgb_to_nv12(const sp_image &img) {
     assert(img.cv_type() == CV_8UC3);
     auto ret = sp_image::create(CV_8UC1, normal_size_to_nv12(img.cv_size()));

+ 12 - 0
src/image_process_v5/image_process.h

@@ -5,6 +5,12 @@
 
 #include <glm/glm.hpp>
 
+struct cv_stream_guard {
+    cv_stream_guard();
+    ~cv_stream_guard();
+    cv::cuda::Stream &cv_stream();
+};
+
 size_t normal_height_to_nv12(size_t height);
 size_t nv12_height_to_normal(size_t height);
 cv::Size normal_size_to_nv12(cv::Size size);
@@ -19,6 +25,7 @@ sp_image image_resize(const sp_image &img, cv::Size size);
 sp_image image_flip_y(const sp_image &img);
 sp_image image_rgb_to_bgr(const sp_image &img);
 sp_image image_rgb_to_bgra(const sp_image &img);
+sp_image image_rgb_to_gray(const sp_image &img);
 sp_image image_rgb_to_nv12(const sp_image &img);
 sp_image image_nv12_to_rgb(const sp_image &img);
 sp_image image_yuyv_to_rgb(const sp_image &img);
@@ -27,6 +34,11 @@ sp_image image_warp_affine(const sp_image &img, const glm::mat3 &matrix);
 sp_image image_rotate(const sp_image &img, float angle, std::optional<glm::vec2> center);
 sp_image image_translate(const sp_image &img, glm::vec2 offset);
 
+using image_stereo_pair = std::tuple<sp_image, sp_image>;
+image_stereo_pair image_stereo_split(const sp_image &img);
+image_stereo_pair image_stereo_split_view(const sp_image &img);
+sp_image image_stereo_combine(const sp_image &left, const sp_image &right);
+
 void image_save_jpg(const sp_image &img, const std::string &filename); // filename without extension
 void image_save_png(const sp_image &img, const std::string &filename);
 

+ 402 - 0
src/image_process_v5/video_stabilization.cpp

@@ -0,0 +1,402 @@
+#include "video_stabilization.h"
+#include "image_process.h"
+#include "third_party/scope_guard.hpp"
+
+#include <opencv2/calib3d.hpp>
+#include <opencv2/cudaoptflow.hpp>
+#include <opencv2/cudafeatures2d.hpp>
+#include <opencv2/cudaimgproc.hpp>
+
+#include <Eigen/Dense>
+
+#include <cmath>
+#include <deque>
+#include <ranges>
+#include <vector>
+
+namespace {
+    template<typename T>
+    std::vector<T> download(const cv::cuda::GpuMat &d_mat, int type) {
+        auto ret = std::vector<T>(d_mat.cols);
+        auto mat = cv::Mat(1, d_mat.cols, type, ret.data());
+        d_mat.download(mat);
+        return ret;
+    }
+
+    struct point_tracker {
+        cv::Ptr<cv::cuda::CornersDetector> detector;
+        cv::Ptr<cv::cuda::SparsePyrLKOpticalFlow> tracker;
+        sp_image prev;
+
+        using points_type = std::vector<cv::Point2f>;
+
+        struct result_type {
+            points_type prev_pts;
+            points_type curr_pts;
+        };
+
+        using create_config = video_stabilization::point_tracker_config;
+        create_config conf;
+
+        explicit point_tracker(const create_config _conf)
+            : conf(_conf) {
+            detector = cv::cuda::createGoodFeaturesToTrackDetector(
+                CV_8UC1, conf.max_feature_points,
+                0.3, 7, 7);
+            tracker = cv::cuda::SparsePyrLKOpticalFlow::create();
+        }
+
+        // Input RGB image
+        std::optional<result_type> process(const sp_image &img) {
+            auto closer = sg::make_scope_guard([&] { prev = img; });
+            if (prev.empty()) return {};
+
+            auto stream_guard = cv_stream_guard();
+            cv::cuda::GpuMat d_prev_pts;
+            if (true) {
+                const auto prev_gray = image_rgb_to_gray(prev);
+                const auto helper = read_access_helper(prev_gray.cuda());
+                detector->detect(prev_gray.cv_gpu_mat(helper.ptr()), d_prev_pts,
+                                 cv::noArray(), stream_guard.cv_stream());
+            }
+
+            cv::cuda::GpuMat d_curr_pts, d_status;
+            if (true) {
+                const auto prev_helper = read_access_helper(prev.cuda());
+                const auto curr_helper = read_access_helper(img.cuda());
+                tracker->calc(prev.cv_gpu_mat(prev_helper.ptr()),
+                              img.cv_gpu_mat(curr_helper.ptr()),
+                              d_prev_pts, d_curr_pts, d_status);
+            }
+
+            auto status = download<uchar>(d_status, CV_8UC1);
+            auto valid_indices = std::views::iota(0ull, status.size())
+                                 | std::views::filter([&](auto i) { return status[i]; });
+            auto prev_pts = download<cv::Point2f>(d_prev_pts, CV_32FC2);
+            auto curr_pts = download<cv::Point2f>(d_curr_pts, CV_32FC2);
+            auto valid_prev_pts = valid_indices
+                                  | std::views::transform([&](auto i) { return prev_pts[i]; });
+            auto valid_curr_pts = valid_indices
+                                  | std::views::transform([&](auto i) { return curr_pts[i]; });
+            auto ret = result_type();
+            std::ranges::copy(valid_prev_pts, std::back_inserter(ret.prev_pts));
+            std::ranges::copy(valid_curr_pts, std::back_inserter(ret.curr_pts));
+            return ret;
+        }
+    };
+
+    struct transform_stabilization {
+        using matrix_type = Eigen::Matrix<float, 2, 3>;
+        using matrix_param_type = Eigen::Vector4f;
+        using record_type = Eigen::VectorXf;
+        using trajectory_type = Eigen::Matrix4Xf;
+
+        static record_type optimize_simple(const record_type &traj) {
+            const auto num = traj.rows();
+            auto ret = record_type(num);
+            ret.setConstant(traj.mean());
+            return ret;
+        }
+
+        [[nodiscard]] record_type optimize(
+            const record_type &traj,
+            const record_type &prev_traj = {}) const {
+            const auto num = traj.rows();
+            Eigen::MatrixXf mat_Q = Eigen::MatrixXf::Identity(num, num);
+            Eigen::VectorXf vec_b = -2 * traj;
+
+            auto gaussian = [=, this](const float x) {
+                return expf(-9 * powf(x, 2)) / powf(conf.smooth_radius, 2);
+            };
+            if (true) {
+                auto mat_D = Eigen::MatrixXf(num, num);
+                for (auto i = 0; i < num; ++i)
+                    for (auto j = 0; j < num; ++j)
+                        mat_D(i, j) = gaussian(i - j);
+                for (auto i = 0; i < num; ++i)
+                    mat_D(i, i) -= mat_D.col(i).sum();
+                mat_Q += conf.smooth_factor * (mat_D * mat_D.transpose());
+            }
+
+            if (prev_traj.rows() > 0) {
+                assert(prev_traj.rows() == num);
+                Eigen::MatrixXf mat_D = Eigen::MatrixXf::Identity(num, num);
+                mat_D(num - 1, num - 1) = 0;
+                mat_Q += conf.beta * mat_D;
+                vec_b += conf.beta * (-2 * prev_traj);
+            }
+
+            mat_Q *= 2;
+            record_type ret = mat_Q.partialPivLu().solve(-vec_b);
+            return ret;
+        }
+
+        [[nodiscard]] trajectory_type optimize_trajectory(
+            const trajectory_type &traj,
+            const trajectory_type &prev_traj = {}) const {
+            const auto num_dims = traj.rows();
+            auto ret = trajectory_type(num_dims, traj.cols());
+            for (auto i = 0; i < num_dims; ++i) {
+                ret.row(i) = optimize(traj.row(i), prev_traj.row(i));
+            }
+            return ret;
+        }
+
+        static matrix_param_type decompose_transform(const matrix_type &mat) {
+            const auto scale = sqrtf(mat.block<2, 2>(0, 0).determinant());
+            const auto angle = atan2f(mat(1, 0), mat(0, 0));
+            const auto tx = mat(0, 2);
+            const auto ty = mat(1, 2);
+            return {logf(scale), angle, tx, ty};
+        }
+
+        static matrix_type compose_transform(const matrix_param_type &param) {
+            const auto scale = expf(param(0)), angle = param(1);
+            const auto tx = param(2), ty = param(3);
+            auto ret = matrix_type();
+            ret << cosf(angle), -sinf(angle), tx,
+                    sinf(angle), cosf(angle), ty;
+            ret.block<2, 2>(0, 0) *= scale;
+            return ret;
+        }
+
+        using create_config = video_stabilization::transform_stabilization_config;
+        create_config conf;
+
+        size_t buffer_length = 0;
+        std::deque<matrix_param_type> transform_buf;
+        trajectory_type last_traj;
+
+        explicit transform_stabilization(const create_config &_conf)
+            : conf(_conf) {
+            buffer_length = conf.future_frames + conf.history_frames + 1;
+        }
+
+        std::optional<matrix_type> process(const matrix_type &mat) {
+            const auto param = decompose_transform(mat);
+            transform_buf.push_back(param);
+            if (transform_buf.size() <= conf.future_frames) return {};
+            if (transform_buf.size() > buffer_length) {
+                transform_buf.pop_front();
+            }
+
+            if (last_traj.cols() > 0) {
+                last_traj.conservativeResize(4, last_traj.cols() + 1);
+                last_traj.col(last_traj.cols() - 1).setZero();
+                if (last_traj.cols() > buffer_length) {
+                    last_traj.block(0, 0, 4, buffer_length)
+                            = last_traj.block(0, 1, 4, buffer_length);
+                    last_traj.conservativeResize(4, buffer_length);
+                }
+            }
+
+            auto curr_traj = trajectory_type(4, transform_buf.size());
+            for (int i = 0; i < transform_buf.size(); i++) {
+                curr_traj.col(i) = transform_buf[i];
+                if (i > 0) {
+                    curr_traj.col(i) += curr_traj.col(i - 1);
+                }
+            }
+
+            const auto traj_opt = optimize_trajectory(curr_traj, last_traj);
+            last_traj = traj_opt;
+
+            const auto curr_pos = conf.future_frames;
+            const auto diff = traj_opt.col(curr_pos) - curr_traj.col(curr_pos);
+            const auto curr = transform_buf[curr_pos] + diff;
+            // const Eigen::Vector4f diff = traj_opt.col(curr_pos) - curr_traj.col(curr_pos);
+            // const Eigen::Vector4f curr = transform_buf[curr_pos] + diff;
+            // SPDLOG_DEBUG("Transform: {}, {}, {}, {}", curr.x(), curr.y(), curr.z(), curr.w());
+            return compose_transform(curr);
+        }
+    };
+}
+
+struct video_stabilization::impl {
+    using matrix_type = transform_stabilization::matrix_type;
+
+    static matrix_type combine_transform(const matrix_type &mat1, const matrix_type &mat2) {
+        auto ret = matrix_type();
+        const auto r1_mat = mat1.leftCols<2>();
+        ret.leftCols<2>() = r1_mat * mat2.leftCols<2>();
+        ret.rightCols<1>() = r1_mat * mat2.rightCols<1>() + mat1.rightCols<1>();
+        return ret;
+    }
+
+    static matrix_type identity_transform() {
+        auto ret = matrix_type();
+        ret.setZero();
+        ret.leftCols<2>().setIdentity();
+        return ret;
+    }
+
+    static glm::mat3 to_mat3(const matrix_type &mat) {
+        auto ret = glm::mat3();
+        for (auto i = 0; i < 2; ++i)
+            for (auto j = 0; j < 3; ++j) {
+                ret[j][i] = mat(i, j);
+            }
+        return ret;
+    }
+
+    static matrix_type to_transform(const cv::Mat &mat) {
+        assert(mat.rows == 2 && mat.cols == 3);
+        assert(mat.type() == CV_64FC1);
+        auto ret = matrix_type();
+        for (auto i = 0; i < 2; ++i)
+            for (auto j = 0; j < 3; ++j) {
+                ret(i, j) = mat.at<double>(i, j);
+            }
+        return ret;
+    }
+
+    static constexpr auto min_tracked_points = 10;
+
+    create_config conf;
+    std::optional<point_tracker> track_left, track_right;
+    std::optional<transform_stabilization> transform_stab;
+    std::deque<sp_image> img_buf;
+
+    [[nodiscard]] sp_image transform_image(const sp_image &img, matrix_type mat) const {
+        const auto center = cv::Point2f(img.width() / 2.f, img.height() / 2.f);
+        const auto fix_mat = cv::getRotationMatrix2D(center, 0, 1.f + conf.enlarge_factor);
+        mat = combine_transform(to_transform(fix_mat), mat);
+        return image_warp_affine(img, to_mat3(mat));
+    }
+
+    std::optional<sp_image> process_mono(const sp_image &img) {
+        assert(!conf.stereo_mode);
+        const auto pts = track_left->process(img);
+        if (!pts) return {};
+        matrix_type mat;
+        if (pts->prev_pts.size() < min_tracked_points) [[unlikely]] {
+            mat = identity_transform();
+        } else {
+            const auto cv_mat = cv::estimateAffinePartial2D(
+                pts->prev_pts, pts->curr_pts);
+            mat = to_transform(cv_mat);
+        }
+
+        const auto cur_mat = transform_stab->process(mat);
+        img_buf.push_back(img);
+        if (!cur_mat) return {};
+
+        const auto cur_img = img_buf.front();
+        img_buf.pop_front();
+        return transform_image(cur_img, *cur_mat);
+    }
+
+    std::optional<sp_image> process_stereo(const sp_image &img) {
+        assert(conf.stereo_mode);
+        auto [img_left, img_right] = image_stereo_split_view(img);
+        auto pts_left = track_left->process(img_left);
+        auto pts_right = track_right->process(img_right);
+        if (!pts_left || !pts_right) return {};
+
+        auto num_points = pts_left->prev_pts.size() + pts_right->prev_pts.size();
+        matrix_type mat;
+        if (num_points < min_tracked_points) [[unlikely]] {
+            mat = identity_transform();
+        } else {
+            auto prev_pts = std::move(pts_left->prev_pts);
+            prev_pts.reserve(num_points);
+            std::ranges::copy(pts_right->prev_pts, std::back_inserter(prev_pts));
+            auto curr_pts = std::move(pts_left->curr_pts);
+            curr_pts.reserve(num_points);
+            std::ranges::copy(pts_right->curr_pts, std::back_inserter(curr_pts));
+            const auto cv_mat = cv::estimateAffinePartial2D(prev_pts, curr_pts);
+            mat = to_transform(cv_mat);
+        }
+
+        const auto cur_mat = transform_stab->process(mat);
+        img_buf.push_back(img);
+        if (!cur_mat) return {};
+
+        const auto cur_img = img_buf.front();
+        img_buf.pop_front();
+        auto [cur_left, cur_right] = image_stereo_split_view(cur_img);
+        cur_left = transform_image(cur_left, *cur_mat);
+        cur_right = transform_image(cur_right, *cur_mat);
+        return image_stereo_combine(cur_left, cur_right);
+    }
+
+    auto process(const sp_image &img) {
+        if (conf.stereo_mode) {
+            return process_stereo(img);
+        } else {
+            return process_mono(img);
+        }
+    }
+
+    explicit impl(const create_config &_conf) : conf(_conf) {
+        track_left.emplace(conf.pt_conf);
+        if (conf.stereo_mode) {
+            track_right.emplace(conf.pt_conf);
+        }
+        transform_stab.emplace(conf.st_conf);
+    }
+};
+
+video_stabilization::video_stabilization(create_config conf)
+    : pimpl(std::make_unique<impl>(conf)) { (void) 0; }
+
+video_stabilization::~video_stabilization() = default;
+
+std::optional<sp_image> video_stabilization::process(const sp_image &img) const {
+    return pimpl->process(img);
+}
+
+#include "core/imgui_utility.hpp"
+
+struct video_stabilization_ui::impl {
+    create_config conf;
+    obj_conn_type conn;
+
+    bool passthrough = false;
+    using stab_ptr_type = std::shared_ptr<video_stabilization>;
+    stab_ptr_type video_stab;
+
+    static void image_callback_impl(create_config conf, const stab_ptr_type &stab) {
+        const auto img = OBJ_QUERY(sp_image, conf.in_name);
+        if (const auto ret = stab->process(img)) {
+            OBJ_SAVE(conf.out_name, *ret);
+        }
+    }
+
+    void image_callback() {
+        if (passthrough) {
+            OBJ_SAVE(conf.out_name, OBJ_QUERY(sp_image, conf.in_name));
+            return;
+        }
+
+        auto task = [conf = conf, stab = video_stab] {
+            try {
+                image_callback_impl(conf, stab);
+            } catch (...) { (void) 0; }
+        };
+        TP_DETACH(task);
+    }
+
+    void show_ui() {
+        ImGui::Checkbox("Passthrough", &passthrough);
+    }
+
+    explicit impl(const create_config &_conf) : conf(_conf) {
+        video_stab = std::make_shared<video_stabilization>(conf.opts);
+        conn = OBJ_SIG(conf.in_name)->connect(
+            [this](auto _) { image_callback(); });
+    }
+
+    ~impl() {
+        conn.disconnect();
+    }
+};
+
+video_stabilization_ui::video_stabilization_ui(create_config conf)
+    : pimpl(std::make_unique<impl>(conf)) { (void) 0; }
+
+video_stabilization_ui::~video_stabilization_ui() = default;
+
+void video_stabilization_ui::show_ui() const {
+    pimpl->show_ui();
+}

+ 56 - 0
src/image_process_v5/video_stabilization.h

@@ -0,0 +1,56 @@
+#ifndef VIDEO_STABILIZATION_H
+#define VIDEO_STABILIZATION_H
+
+#include "sp_image.h"
+
+class video_stabilization {
+public:
+    struct point_tracker_config {
+        size_t max_feature_points = 200;
+    };
+
+    struct transform_stabilization_config {
+        size_t future_frames = 20;
+        size_t history_frames = 20;
+        float smooth_radius = 50;
+        float smooth_factor = 1e5;
+        float beta = 1; // for prev_traj
+    };
+
+    struct create_config {
+        bool stereo_mode = false;
+        float enlarge_factor = 0.1f;
+        point_tracker_config pt_conf = {};
+        transform_stabilization_config st_conf = {};
+    };
+
+    explicit video_stabilization(create_config conf);
+
+    ~video_stabilization();
+
+    [[nodiscard]] std::optional<sp_image> process(const sp_image &img) const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+class video_stabilization_ui {
+public:
+    struct create_config {
+        obj_name_type in_name = 0, out_name = 0;
+        video_stabilization::create_config opts = {};
+    };
+
+    explicit video_stabilization_ui(create_config conf);
+
+    ~video_stabilization_ui();
+
+    void show_ui() const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //VIDEO_STABILIZATION_H

+ 22 - 5
src/impl/apps/remote_ar/remote_ar_v2.cpp

@@ -43,6 +43,13 @@ app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
         img_isp.emplace(sub_conf);
     }
 
+    if (true) {
+        auto sub_conf = video_stabilization_ui::create_config{
+            .in_name = left_img_fix_id, .out_name = left_img_stab_id
+        };
+        video_stab.emplace(sub_conf);
+    }
+
     if (true) {
         auto sub_conf = stereo_output_helper::create_config();
         sub_conf.left_name = left_img_id;
@@ -56,7 +63,7 @@ app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
     if (true) {
         auto sub_conf = image_viewer_v2::create_config();
         sub_conf.items.emplace_back(uvc_img_id, "Endoscope", true);
-        sub_conf.items.emplace_back(left_img_fix_id, "Left", true);
+        sub_conf.items.emplace_back(left_img_stab_id, "Left", true);
         sub_conf.items.emplace_back(right_img_id, "Right", true);
         bg_viewer.emplace(sub_conf);
     }
@@ -104,10 +111,20 @@ void app_remote_ar_v2::show_ui() {
         ImGui::PushItemWidth(200);
 
         if (ImGui::CollapsingHeader("MVS Camera")) {
-            auto id_guard = imgui_id_guard("mvs_camera");
-            mvs_cam->show();
-            ImGui::SeparatorText("Enhance");
-            img_isp->show_ui();
+            if (true) {
+                auto id_guard = imgui_id_guard("mvs_camera");
+                mvs_cam->show();
+            }
+            if (true) {
+                auto id_guard = imgui_id_guard("image_enhance");
+                ImGui::SeparatorText("Enhance");
+                img_isp->show_ui();
+            }
+            if (true) {
+                auto id_guard = imgui_id_guard("video_stabilization");
+                ImGui::SeparatorText("Stabilization");
+                video_stab->show_ui();
+            }
         }
 
         if (ImGui::CollapsingHeader("UVC Camera")) {

+ 4 - 1
src/impl/apps/remote_ar/remote_ar_v2.h

@@ -7,6 +7,7 @@
 #include "image_process_v5/image_viewer.h"
 #include "image_process_v5/image_process.h"
 #include "image_process_v5/image_enhance.h"
+#include "image_process_v5/video_stabilization.h"
 #include "module/image_streamer.h"
 #include "module_v5/versatile_saver.h"
 #include "module_v5/oblique_calibrator.h"
@@ -32,11 +33,13 @@ private:
             right_img_id = 2,
             output_img_id = 3,
             uvc_img_id = 4,
-            left_img_fix_id = 5;
+            left_img_fix_id = 5,
+            left_img_stab_id = 6;
 
     std::optional<mvs_camera_ui> mvs_cam;
     std::optional<uvc_camera_ui> uvc_cam;
     std::optional<image_enhance_ui> img_isp;
+    std::optional<video_stabilization_ui> video_stab;
     std::optional<stereo_output_helper> output_helper;
     std::optional<image_viewer_v2> bg_viewer;
     std::optional<image_streamer> streamer;

+ 1 - 1
src/impl/main_impl.cpp

@@ -148,7 +148,7 @@ void init_all() {
     main_ob = new object_manager_v2({.ctx = main_ctx});
     g_memory_manager = new memory_manager();
 
-    constexpr auto background_thread_count = 6; // TODO: load this in config file
+    constexpr auto background_thread_count = 1; // TODO: load this in config file
     g_thread_pool = new BS::thread_pool(background_thread_count);
 
     auto app_conf = app_selector::create_config();