пре 2 година · 055e146e5c
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,96 @@
 
															+cmake_minimum_required(VERSION 3.26)
														
 
															+project(TinyPlayer3 LANGUAGES C CXX)
														
 
															+
														
 
															+set(CMAKE_CXX_STANDARD 20)
														
 
															+
														
 
															+add_executable(${PROJECT_NAME} src/main.cpp
														
 
															+        src/frame_decoder/decoder_base.cpp
														
 
															+        src/frame_receiver/receiver_base.cpp
														
 
															+        src/frame_receiver/receiver_tcp.cpp
														
 
															+        src/frame_receiver/receiver_udp_fec.cpp
														
 
															+        src/main_controller.cpp
														
 
															+        src/main_player.cpp
														
 
															+        src/simple_mq.cpp
														
 
															+        src/third_party/rs.c)
														
 
															+
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ./src)
														
 
															+
														
 
															+# CUDA config
														
 
															+find_package(CUDAToolkit REQUIRED)
														
 
															+target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
														
 
															+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
														
 
															+
														
 
															+# spdlog config
														
 
															+find_package(spdlog REQUIRED)
														
 
															+target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
														
 
															+target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
														
 
															+
														
 
															+# OpenCV config
														
 
															+find_package(OpenCV REQUIRED COMPONENTS imgcodecs)
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
														
 
															+target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
														
 
															+
														
 
															+# glfw config
														
 
															+if (WIN32)
														
 
															+    set(GLFW_INCLUDE_DIR C:/BuildEssentials/VS2019Libs/include)
														
 
															+    set(GLFW_LIB_DIR C:/BuildEssentials/VS2019Libs/lib)
														
 
															+    find_library(GLFW_LIB glfw3 HINTS ${GLFW_LIB_DIR})
														
 
															+    target_include_directories(${PROJECT_NAME} PRIVATE ${GLFW_INCLUDE_DIR})
														
 
															+    target_link_libraries(${PROJECT_NAME} ${GLFW_LIB})
														
 
															+else ()
														
 
															+    find_package(glfw3 REQUIRED)
														
 
															+    target_link_libraries(${PROJECT_NAME} glfw)
														
 
															+endif ()
														
 
															+
														
 
															+# glad config
														
 
															+if (WIN32)
														
 
															+    set(GLAD_DIR C:/BuildEssentials/Library/glad)
														
 
															+else ()
														
 
															+    set(GLAD_DIR /home/tpx/src/glad)
														
 
															+endif ()
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/include)
														
 
															+target_sources(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/src/gl.c)
														
 
															+target_sources(${PROJECT_NAME} PRIVATE src/simple_opengl.cpp)
														
 
															+
														
 
															+# imgui config
														
 
															+if (WIN32)
														
 
															+    set(IMGUI_DIR C:/BuildEssentials/Library/imgui-1.89.5)
														
 
															+else ()
														
 
															+    set(IMGUI_DIR /home/tpx/src/imgui-1.90)
														
 
															+endif ()
														
 
															+set(IMGUI_BACKENDS_DIR ${IMGUI_DIR}/backends)
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUI_DIR} ${IMGUI_BACKENDS_DIR})
														
 
															+target_sources(${PROJECT_NAME} PRIVATE
														
 
															+        ${IMGUI_DIR}/imgui.cpp
														
 
															+        ${IMGUI_DIR}/imgui_draw.cpp
														
 
															+        ${IMGUI_DIR}/imgui_tables.cpp
														
 
															+        ${IMGUI_DIR}/imgui_widgets.cpp
														
 
															+        ${IMGUI_BACKENDS_DIR}/imgui_impl_glfw.cpp
														
 
															+        ${IMGUI_BACKENDS_DIR}/imgui_impl_opengl3.cpp)
														
 
															+target_compile_definitions(${PROJECT_NAME} PRIVATE HAVE_IMGUI)
														
 
															+
														
 
															+# NvDec config
														
 
															+if (WIN32)
														
 
															+    set(NVCODEC_DIR C:/BuildEssentials/CUDA/Video_Codec_SDK_12.0.16)
														
 
															+    find_library(NVDEC_LIB nvcuvid HINTS ${NVCODEC_DIR}/Lib/x64)
														
 
															+else ()
														
 
															+    set(NVCODEC_DIR /home/tpx/src/Video_Codec_SDK_12.0.16)
														
 
															+    find_library(NVDEC_LIB nvcuvid)
														
 
															+endif ()
														
 
															+set(NVCODEC_INCLUDE_DIR ${NVCODEC_DIR}/Interface)
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ${NVCODEC_INCLUDE_DIR})
														
 
															+target_link_libraries(${PROJECT_NAME} ${NVDEC_LIB})
														
 
															+target_sources(${PROJECT_NAME} PRIVATE src/frame_decoder/decoder_nvdec.cpp)
														
 
															+
														
 
															+# nvJPEG config
														
 
															+target_link_libraries(${PROJECT_NAME} nvjpeg)
														
 
															+target_sources(${PROJECT_NAME} PRIVATE src/frame_decoder/decoder_nvjpeg.cpp)
														
 
															+
														
 
															+# Boost config
														
 
															+find_package(Boost REQUIRED COMPONENTS iostreams)
														
 
															+target_include_directories(${PROJECT_NAME} PRIVATE ${Boost_INCLUDE_DIRS})
														
 
															+target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES})
														
 
															+
														
 
															+# JSON config
														
 
															+find_package(nlohmann_json REQUIRED)
														
 
															+target_link_libraries(${PROJECT_NAME} nlohmann_json::nlohmann_json)
														
--- a/src/cuda_helper.hpp
+++ b/src/cuda_helper.hpp
@@ -0,0 +1,59 @@
 
															+#ifndef REMOTEAR3_CUDA_HELPER_H
														
 
															+#define REMOTEAR3_CUDA_HELPER_H
														
 
															+
														
 
															+#include "utility.hpp"
														
 
															+
														
 
															+#include <cuda.h>
														
 
															+#include <cuda_runtime.h>
														
 
															+#include <nppdefs.h>
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+inline bool check_cuda_api_call(CUresult api_ret, unsigned int line_number,
														
 
															+                                const char *file_name, const char *api_call_str) {
														
 
															+    if (api_ret == CUDA_SUCCESS) [[likely]] return true;
														
 
															+    const char *error_name, *error_str;
														
 
															+    auto ret = cuGetErrorName(api_ret, &error_name);
														
 
															+    if (ret != CUDA_SUCCESS) [[unlikely]] error_name = "Unknown";
														
 
															+    ret = cuGetErrorString(api_ret, &error_str);
														
 
															+    if (ret != CUDA_SUCCESS) [[unlikely]] error_str = "Unknown";
														
 
															+    SPDLOG_ERROR("CUDA driver api call {} failed at {}:{} with error 0x{:x}({}):{}.",
														
 
															+                 api_call_str, file_name, line_number,
														
 
															+                 (int) api_ret, error_name, error_str);
														
 
															+    RET_ERROR_B;
														
 
															+}
														
 
															+
														
 
															+inline bool check_cuda_api_call(cudaError api_ret, unsigned int line_number,
														
 
															+                                const char *file_name, const char *api_call_str) {
														
 
															+    if (api_ret == cudaSuccess) [[likely]] return true;
														
 
															+    SPDLOG_ERROR("CUDA runtime api call {} failed at {}:{} with error 0x{:x}.",
														
 
															+                 api_call_str, file_name, line_number, (int) api_ret);
														
 
															+    RET_ERROR_B;
														
 
															+}
														
 
															+
														
 
															+inline bool check_cuda_api_call(NppStatus api_ret, unsigned int line_number,
														
 
															+                                const char *file_name, const char *api_call_str) {
														
 
															+    if (api_ret == NPP_SUCCESS) [[likely]] return true;
														
 
															+    SPDLOG_ERROR("NPP api call {} failed at {}:{} with error 0x{:x}.",
														
 
															+                 api_call_str, file_name, line_number, (int) api_ret);
														
 
															+    RET_ERROR_B;
														
 
															+}
														
 
															+
														
 
															+#define CUDA_API_CHECK(api_call) \
														
 
															+    check_cuda_api_call( \
														
 
															+        api_call, __LINE__, __FILE__, #api_call)
														
 
															+
														
 
															+#define CUDA_API_CHECK_P(api_call) \
														
 
															+    if (!check_cuda_api_call( \
														
 
															+        api_call, __LINE__, __FILE__, #api_call)) [[unlikely]] \
														
 
															+        return nullptr
														
 
															+
														
 
															+inline void use_primary_cuda_ctx() {
														
 
															+    CUdevice cuda_device;
														
 
															+    CUDA_API_CHECK(cuDeviceGet(&cuda_device, 0));
														
 
															+    CUcontext cuda_ctx;
														
 
															+    CUDA_API_CHECK(cuDevicePrimaryCtxRetain(&cuda_ctx, cuda_device));
														
 
															+    CUDA_API_CHECK(cuCtxPushCurrent(cuda_ctx));
														
 
															+}
														
 
															+
														
 
															+#endif //REMOTEAR3_CUDA_HELPER_H
														
--- a/src/frame_decoder/decoder_base.cpp
+++ b/src/frame_decoder/decoder_base.cpp
@@ -0,0 +1,33 @@
 
															+#include "decoder_base.h"
														
 
															+#include "simple_mq.h"
														
 
															+#include "variable_defs.h"
														
 
															+
														
 
															+using namespace simple_mq_singleton;
														
 
															+
														
 
															+void video_nal::create(void *_ptr, size_t _length, bool _idr) {
														
 
															+    free(ptr);
														
 
															+    length = _length;
														
 
															+    idr = _idr;
														
 
															+    ptr = (uint8_t *) malloc(length);
														
 
															+    if (_ptr != nullptr) {
														
 
															+        memcpy(ptr, _ptr, length);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+video_nal::~video_nal() {
														
 
															+    free(ptr);
														
 
															+}
														
 
															+
														
 
															+void decoder_base::wait_render_idle() {
														
 
															+    for (uint64_t cur_cnt = 0;;) {
														
 
															+        if (mq().query_variable<bool>(RENDER_BUSY, &cur_cnt)) {
														
 
															+            mq().wait_variable(RENDER_BUSY, cur_cnt);
														
 
															+        } else {
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void decoder_base::commit_frame(const cv::cuda::GpuMat &img) {
														
 
															+    mq().update_variable(FRAME_OUT, img);
														
 
															+}
														
--- a/src/frame_decoder/decoder_base.h
+++ b/src/frame_decoder/decoder_base.h
@@ -0,0 +1,38 @@
 
															+#ifndef TINYPLAYER3_DECODER_BASE_H
														
 
															+#define TINYPLAYER3_DECODER_BASE_H
														
 
															+
														
 
															+#include <opencv2/core/cuda.hpp>
														
 
															+
														
 
															+enum decoder_type {
														
 
															+    DECODER_NVDEC,
														
 
															+    DECODER_JPEG
														
 
															+};
														
 
															+
														
 
															+struct video_nal {
														
 
															+    uint8_t *ptr = nullptr;
														
 
															+    size_t length = 0;
														
 
															+    bool idr = false;
														
 
															+
														
 
															+    void create(void *ptr, size_t length, bool idr);
														
 
															+
														
 
															+    ~video_nal();
														
 
															+};
														
 
															+
														
 
															+class decoder_base {
														
 
															+public:
														
 
															+
														
 
															+    virtual ~decoder_base() = default;
														
 
															+
														
 
															+    using frame_ptr_type = std::unique_ptr<video_nal>;
														
 
															+
														
 
															+    virtual void decode(frame_ptr_type &&frame) = 0;
														
 
															+
														
 
															+protected:
														
 
															+
														
 
															+    static void wait_render_idle();
														
 
															+
														
 
															+    static void commit_frame(const cv::cuda::GpuMat &img);
														
 
															+
														
 
															+};
														
 
															+
														
 
															+#endif //TINYPLAYER3_DECODER_BASE_H
														
--- a/src/frame_decoder/decoder_nvdec.cpp
+++ b/src/frame_decoder/decoder_nvdec.cpp
@@ -0,0 +1,156 @@
 
															+#include "decoder_nvdec.h"
														
 
															+#include "cuda_helper.hpp"
														
 
															+
														
 
															+#include <nvcuvid.h>
														
 
															+
														
 
															+struct decoder_nvdec::impl {
														
 
															+
														
 
															+    CUvideoparser parser = nullptr;
														
 
															+    CUvideodecoder decoder = nullptr;
														
 
															+
														
 
															+    uint8_t decode_surface;
														
 
															+    cv::Size frame_size;
														
 
															+    cv::cuda::GpuMat frame; // nv12 image
														
 
															+
														
 
															+    impl() {
														
 
															+        // query decoder capability
														
 
															+        CUVIDDECODECAPS caps = {};
														
 
															+        caps.eCodecType = cudaVideoCodec_HEVC;
														
 
															+        caps.eChromaFormat = cudaVideoChromaFormat_420;
														
 
															+        caps.nBitDepthMinus8 = 0; // 8-bit
														
 
															+        CUDA_API_CHECK(cuvidGetDecoderCaps(&caps));
														
 
															+
														
 
															+        // check decoder capability
														
 
															+        CALL_CHECK(caps.bIsSupported == 1);
														
 
															+        CALL_CHECK(caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12));
														
 
															+
														
 
															+        // create parser
														
 
															+        CUVIDPARSERPARAMS params = {};
														
 
															+        params.CodecType = cudaVideoCodec_HEVC;
														
 
															+        params.ulMaxNumDecodeSurfaces = 1; // dummy value according to document
														
 
															+        params.ulMaxDisplayDelay = 0; // no delay
														
 
															+        params.pUserData = this;
														
 
															+        params.pfnSequenceCallback = sequence_callback;
														
 
															+        params.pfnDecodePicture = ready_decode;
														
 
															+        assert(parser == nullptr);
														
 
															+        CUDA_API_CHECK(cuvidCreateVideoParser(&parser, &params));
														
 
															+    }
														
 
															+
														
 
															+    void create_decoder() {
														
 
															+        CUVIDDECODECREATEINFO decoder_info = {};
														
 
															+        decoder_info.ulWidth = frame_size.width;
														
 
															+        decoder_info.ulHeight = frame_size.height;
														
 
															+        decoder_info.ulNumDecodeSurfaces = decode_surface;
														
 
															+        decoder_info.CodecType = cudaVideoCodec_HEVC;
														
 
															+        decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
														
 
															+        decoder_info.ulCreationFlags = cudaVideoCreate_PreferCUVID;
														
 
															+        decoder_info.bitDepthMinus8 = 0; // 8-bit
														
 
															+        decoder_info.OutputFormat = cudaVideoSurfaceFormat_NV12;
														
 
															+        decoder_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
														
 
															+        decoder_info.ulTargetWidth = frame_size.width;
														
 
															+        decoder_info.ulTargetHeight = frame_size.height;
														
 
															+        decoder_info.ulNumOutputSurfaces = 2; // TODO; learn more about this
														
 
															+
														
 
															+        CUDA_API_CHECK(cuvidCreateDecoder(&decoder, &decoder_info));
														
 
															+        assert(decoder != nullptr);
														
 
															+    }
														
 
															+
														
 
															+    void create_frame() {
														
 
															+        assert((frame_size.width % 2) == 0);
														
 
															+        assert((frame_size.height % 2) == 0);
														
 
															+        auto frame_height = frame_size.height + (frame_size.height >> 1);
														
 
															+        frame.create(frame_height, frame_size.width, CV_8UC1);
														
 
															+    }
														
 
															+
														
 
															+    int sequence_callback_impl(CUVIDEOFORMAT *format) {
														
 
															+        // ensure consistency
														
 
															+        assert(format->codec == cudaVideoCodec_HEVC);
														
 
															+        assert(format->progressive_sequence == 1); // progressive
														
 
															+        assert(format->bit_depth_luma_minus8 == 0); // 8-bit
														
 
															+        assert(format->bit_depth_chroma_minus8 == 0); // 8-bit
														
 
															+        assert(format->chroma_format == cudaVideoChromaFormat_420);
														
 
															+
														
 
															+        if (decoder == nullptr) {
														
 
															+            frame_size.width = format->coded_width;
														
 
															+            frame_size.height = format->coded_height;
														
 
															+            decode_surface = format->min_num_decode_surfaces + 4;
														
 
															+            create_decoder();
														
 
															+            create_frame();
														
 
															+        } else {
														
 
															+            assert(format->coded_width == frame_size.width);
														
 
															+            assert(format->coded_height == frame_size.height);
														
 
															+        }
														
 
															+
														
 
															+        return decode_surface;
														
 
															+    }
														
 
															+
														
 
															+    static int sequence_callback(void *ptr, CUVIDEOFORMAT *format) {
														
 
															+        assert(ptr != nullptr);
														
 
															+        return ((impl *) ptr)->sequence_callback_impl(format);
														
 
															+    }
														
 
															+
														
 
															+    int ready_decode_impl(CUVIDPICPARAMS *pic) {
														
 
															+        // decode image
														
 
															+        assert(decoder != nullptr);
														
 
															+        CUDA_API_CHECK(cuvidDecodePicture(decoder, pic));
														
 
															+
														
 
															+        // map frame
														
 
															+        CUdeviceptr ptr_in;
														
 
															+        unsigned int pitch_in;
														
 
															+        CUVIDPROCPARAMS proc_params = {};
														
 
															+        proc_params.progressive_frame = 1; // progressive frame
														
 
															+        proc_params.second_field = 1;
														
 
															+        assert(decoder != nullptr);
														
 
															+        CUDA_API_CHECK(cuvidMapVideoFrame(decoder, pic->CurrPicIdx, &ptr_in, &pitch_in, &proc_params));
														
 
															+        assert(ptr_in != 0);
														
 
															+
														
 
															+        // check decode status
														
 
															+        CUVIDGETDECODESTATUS status = {};
														
 
															+        CUDA_API_CHECK(cuvidGetDecodeStatus(decoder, pic->CurrPicIdx, &status));
														
 
															+        CALL_CHECK(status.decodeStatus == cuvidDecodeStatus_Success);
														
 
															+
														
 
															+        // avoid frame overwritten
														
 
															+        wait_render_idle();
														
 
															+
														
 
															+        // copy frame
														
 
															+        auto luma_in = (void *) ptr_in;
														
 
															+        auto luma_out = frame.cudaPtr();
														
 
															+        CUDA_API_CHECK(cudaMemcpy2D(luma_out, frame.step, luma_in, pitch_in,
														
 
															+                                    frame_size.width, frame_size.height, cudaMemcpyDeviceToDevice));
														
 
															+        auto chroma_in = (char *) ptr_in + pitch_in * ((frame_size.height + 1) & ~1);
														
 
															+        auto chroma_out = frame.ptr(frame_size.height);
														
 
															+        CUDA_API_CHECK(cudaMemcpy2D(chroma_out, frame.step, chroma_in, pitch_in,
														
 
															+                                    frame_size.width, frame_size.height >> 1, cudaMemcpyDeviceToDevice));
														
 
															+
														
 
															+        // unmap frame
														
 
															+        CUDA_API_CHECK(cuvidUnmapVideoFrame(decoder, ptr_in));
														
 
															+
														
 
															+        // commit frame
														
 
															+        commit_frame(frame);
														
 
															+
														
 
															+        return 1; // success
														
 
															+    }
														
 
															+
														
 
															+    static int ready_decode(void *ptr, CUVIDPICPARAMS *pic) {
														
 
															+        assert(ptr != nullptr);
														
 
															+        return ((impl *) ptr)->ready_decode_impl(pic);
														
 
															+    }
														
 
															+
														
 
															+    void decode(frame_ptr_type &&ptr) {
														
 
															+        CUVIDSOURCEDATAPACKET packet = {};
														
 
															+        packet.flags = CUVID_PKT_ENDOFPICTURE;
														
 
															+        packet.payload_size = ptr->length;
														
 
															+        packet.payload = ptr->ptr;
														
 
															+        assert(parser != nullptr);
														
 
															+        CUDA_API_CHECK(cuvidParseVideoData(parser, &packet));
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+decoder_nvdec::decoder_nvdec()
														
 
															+        : pimpl(std::make_unique<impl>()) {}
														
 
															+
														
 
															+decoder_nvdec::~decoder_nvdec() = default;
														
 
															+
														
 
															+void decoder_nvdec::decode(decoder_base::frame_ptr_type &&frame) {
														
 
															+    pimpl->decode(std::move(frame));
														
 
															+}
														
--- a/src/frame_decoder/decoder_nvdec.h
+++ b/src/frame_decoder/decoder_nvdec.h
@@ -0,0 +1,22 @@
 
															+#ifndef TINYPLAYER3_DECODER_NVDEC_H
														
 
															+#define TINYPLAYER3_DECODER_NVDEC_H
														
 
															+
														
 
															+#include "decoder_base.h"
														
 
															+
														
 
															+#include <memory>
														
 
															+
														
 
															+class decoder_nvdec : public decoder_base {
														
 
															+public:
														
 
															+    decoder_nvdec();
														
 
															+
														
 
															+    ~decoder_nvdec() override;
														
 
															+
														
 
															+    void decode(decoder_base::frame_ptr_type &&frame) override;
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+#endif //TINYPLAYER3_DECODER_NVDEC_H
														
--- a/src/frame_decoder/decoder_nvjpeg.cpp
+++ b/src/frame_decoder/decoder_nvjpeg.cpp
@@ -0,0 +1,78 @@
 
															+#include "decoder_nvjpeg.h"
														
 
															+#include "cuda_helper.hpp"
														
 
															+#include "simple_mq.h"
														
 
															+#include "variable_defs.h"
														
 
															+
														
 
															+#include <nvjpeg.h>
														
 
															+
														
 
															+bool check_nvjpeg_api_call(nvjpegStatus_t api_ret, unsigned int line_number,
														
 
															+                           const char *file_name, const char *api_call_str) {
														
 
															+    if (api_ret == NVJPEG_STATUS_SUCCESS) [[likely]] return true;
														
 
															+    SPDLOG_ERROR("nvJPEG api call {} failed at {}:{} with error 0x{:x}.",
														
 
															+                 api_call_str, file_name, line_number, (int) api_ret);
														
 
															+    RET_ERROR_B;
														
 
															+}
														
 
															+
														
 
															+#define API_CHECK(api_call) \
														
 
															+    check_nvjpeg_api_call( \
														
 
															+        api_call, __LINE__, __FILE__, #api_call)
														
 
															+
														
 
															+namespace decoder_nvjpeg_impl {
														
 
															+    nvjpegHandle_t handle = nullptr;
														
 
															+}
														
 
															+
														
 
															+using namespace decoder_nvjpeg_impl;
														
 
															+using namespace simple_mq_singleton;
														
 
															+
														
 
															+struct decoder_nvjpeg::impl {
														
 
															+
														
 
															+    nvjpegJpegState_t dec_state = nullptr;
														
 
															+    cv::cuda::GpuMat img_rgb;
														
 
															+
														
 
															+    impl() {
														
 
															+        if (handle == nullptr) [[unlikely]] {
														
 
															+            API_CHECK(nvjpegCreateSimple(&handle));
														
 
															+        }
														
 
															+        API_CHECK(nvjpegJpegStateCreate(handle, &dec_state));
														
 
															+    }
														
 
															+
														
 
															+    ~impl() {
														
 
															+        API_CHECK(nvjpegJpegStateDestroy(dec_state));
														
 
															+    }
														
 
															+
														
 
															+    void decode(frame_ptr_type &&frame) {
														
 
															+        // decode image info
														
 
															+        int channels;
														
 
															+        nvjpegChromaSubsampling_t subsampling;
														
 
															+        int width[NVJPEG_MAX_COMPONENT];
														
 
															+        int height[NVJPEG_MAX_COMPONENT];
														
 
															+        API_CHECK(nvjpegGetImageInfo(handle, frame->ptr, frame->length,
														
 
															+                                     &channels, &subsampling, width, height));
														
 
															+
														
 
															+        // avoid frame overwritten
														
 
															+        wait_render_idle();
														
 
															+
														
 
															+        // prepare buffer
														
 
															+        assert(channels == 3);
														
 
															+        img_rgb.create(height[0], width[0], CV_8UC3);
														
 
															+        nvjpegImage_t nv_image{};
														
 
															+        nv_image.channel[0] = (uint8_t *) img_rgb.cudaPtr();
														
 
															+        nv_image.pitch[0] = img_rgb.step;
														
 
															+
														
 
															+        // decode image
														
 
															+        API_CHECK(nvjpegDecode(handle, dec_state, frame->ptr, frame->length,
														
 
															+                               NVJPEG_OUTPUT_RGBI, &nv_image, nullptr));
														
 
															+
														
 
															+        // commit frame
														
 
															+        commit_frame(img_rgb);
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+decoder_nvjpeg::decoder_nvjpeg()
														
 
															+        : pimpl(std::make_unique<impl>()) {}
														
 
															+
														
 
															+decoder_nvjpeg::~decoder_nvjpeg() = default;
														
 
															+
														
 
															+void decoder_nvjpeg::decode(decoder_base::frame_ptr_type &&frame) {
														
 
															+    pimpl->decode(std::move(frame));
														
 
															+}
														
--- a/src/frame_decoder/decoder_nvjpeg.h
+++ b/src/frame_decoder/decoder_nvjpeg.h
@@ -0,0 +1,20 @@
 
															+#ifndef TINYPLAYER3_DECODER_NVJPEG_H
														
 
															+#define TINYPLAYER3_DECODER_NVJPEG_H
														
 
															+
														
 
															+#include "decoder_base.h"
														
 
															+
														
 
															+class decoder_nvjpeg : public decoder_base {
														
 
															+public:
														
 
															+    decoder_nvjpeg();
														
 
															+
														
 
															+    ~decoder_nvjpeg() override;
														
 
															+
														
 
															+    void decode(decoder_base::frame_ptr_type &&frame) override;
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+#endif //TINYPLAYER3_DECODER_NVJPEG_H
														
--- a/src/frame_receiver/receiver_base.cpp
+++ b/src/frame_receiver/receiver_base.cpp
@@ -0,0 +1,32 @@
 
															+#include "receiver_base.h"
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+using boost::asio::io_context;
														
 
															+
														
 
															+struct receiver_base::impl {
														
 
															+
														
 
															+    std::unique_ptr<io_context> ctx;
														
 
															+
														
 
															+    impl() {
														
 
															+        ctx = std::make_unique<io_context>();
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+receiver_base::receiver_base()
														
 
															+        : pimpl(std::make_unique<impl>()) {}
														
 
															+
														
 
															+receiver_base::~receiver_base() = default;
														
 
															+
														
 
															+void receiver_base::run() {
														
 
															+    // make run() block forever
														
 
															+    auto blocker = boost::asio::make_work_guard(*pimpl->ctx);
														
 
															+
														
 
															+    SPDLOG_INFO("Receiver started.");
														
 
															+    pimpl->ctx->run();
														
 
															+    SPDLOG_INFO("Receiver stopped.");
														
 
															+}
														
 
															+
														
 
															+boost::asio::io_context *receiver_base::get_ctx() {
														
 
															+    return pimpl->ctx.get();
														
 
															+}
														
--- a/src/frame_receiver/receiver_base.h
+++ b/src/frame_receiver/receiver_base.h
@@ -0,0 +1,37 @@
 
															+#ifndef TINYPLAYER3_RECEIVER_BASE_H
														
 
															+#define TINYPLAYER3_RECEIVER_BASE_H
														
 
															+
														
 
															+#include "frame_decoder/decoder_base.h"
														
 
															+
														
 
															+#include <boost/asio/io_context.hpp>
														
 
															+
														
 
															+#include <memory>
														
 
															+
														
 
															+enum receiver_type {
														
 
															+    RECEIVER_TCP,
														
 
															+    RECEIVER_UDP,
														
 
															+    RECEIVER_UDP_FEC
														
 
															+};
														
 
															+
														
 
															+struct receiver_config {
														
 
															+    std::string server_addr;
														
 
															+    uint16_t server_port;
														
 
															+    decoder_base *decoder;
														
 
															+};
														
 
															+
														
 
															+class receiver_base {
														
 
															+public:
														
 
															+    receiver_base();
														
 
															+
														
 
															+    virtual ~receiver_base();
														
 
															+
														
 
															+    void run();
														
 
															+
														
 
															+    boost::asio::io_context *get_ctx();
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+#endif //TINYPLAYER3_RECEIVER_BASE_H
														
--- a/src/frame_receiver/receiver_tcp.cpp
+++ b/src/frame_receiver/receiver_tcp.cpp
@@ -0,0 +1,73 @@
 
															+#include "receiver_tcp.h"
														
 
															+#include "receiver_utility.hpp"
														
 
															+#include "utility.hpp"
														
 
															+
														
 
															+#include <boost/asio/ip/tcp.hpp>
														
 
															+#include <boost/asio/read.hpp>
														
 
															+#include <boost/asio/post.hpp>
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+using namespace boost::asio::ip;
														
 
															+using boost::asio::buffer;
														
 
															+using boost::asio::io_context;
														
 
															+using boost::asio::post;
														
 
															+using boost::asio::read;
														
 
															+
														
 
															+struct receiver_tcp::impl {
														
 
															+
														
 
															+    receiver_tcp *q_this = nullptr;
														
 
															+    std::unique_ptr<tcp::socket> socket;
														
 
															+    decoder_base *decoder;
														
 
															+
														
 
															+    smart_buffer<uint8_t> in_buf;
														
 
															+
														
 
															+    void receive_one_frame() {
														
 
															+        auto frame = std::make_unique<video_nal>();
														
 
															+        in_buf.create(sizeof(frame->length));
														
 
															+        read(*socket, buffer(in_buf.ptr, in_buf.length));
														
 
															+        read_binary_number(in_buf.ptr, &frame->length);
														
 
															+        frame->create(nullptr, frame->length, true);
														
 
															+        read(*socket, buffer(frame->ptr, frame->length));
														
 
															+        decoder->decode(std::move(frame));
														
 
															+    }
														
 
															+
														
 
															+    void receive_frames() {
														
 
															+        try {
														
 
															+            for (;;) {
														
 
															+                receive_one_frame();
														
 
															+            }
														
 
															+        } catch (std::exception &e) {
														
 
															+            SPDLOG_INFO("Server closed.");
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    void run() {
														
 
															+        post(*q_this->get_ctx(), [this] {
														
 
															+            receive_frames();
														
 
															+            q_this->get_ctx()->stop();
														
 
															+        });
														
 
															+    }
														
 
															+
														
 
															+    static impl *create(const receiver_config &conf, receiver_tcp *q_this) {
														
 
															+        auto ret = std::make_unique<impl>();
														
 
															+        assert(conf.decoder != nullptr);
														
 
															+        ret->q_this = q_this;
														
 
															+        ret->decoder = conf.decoder;
														
 
															+        auto server_ep = tcp::endpoint{address::from_string(conf.server_addr), conf.server_port};
														
 
															+        ret->socket = std::make_unique<tcp::socket>(*q_this->get_ctx());
														
 
															+        EXCEPTION_CHECK_P(ret->socket->connect(server_ep));
														
 
															+        ret->run();
														
 
															+        return ret.release();
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+receiver_tcp::~receiver_tcp() = default;
														
 
															+
														
 
															+receiver_tcp *receiver_tcp::create(const receiver_config &conf) {
														
 
															+    auto ret = std::make_unique<receiver_tcp>();
														
 
															+    auto pimpl = impl::create(conf, ret.get());
														
 
															+    if (pimpl == nullptr) return nullptr;
														
 
															+    ret->pimpl.reset(pimpl);
														
 
															+    return ret.release();
														
 
															+}
														
--- a/src/frame_receiver/receiver_tcp.h
+++ b/src/frame_receiver/receiver_tcp.h
@@ -0,0 +1,19 @@
 
															+#ifndef TINYPLAYER3_RECEIVER_TCP_H
														
 
															+#define TINYPLAYER3_RECEIVER_TCP_H
														
 
															+
														
 
															+#include "receiver_base.h"
														
 
															+
														
 
															+class receiver_tcp : public receiver_base {
														
 
															+public:
														
 
															+
														
 
															+    ~receiver_tcp() override;
														
 
															+
														
 
															+    static receiver_tcp *create(const receiver_config &conf);
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+#endif //TINYPLAYER3_RECEIVER_TCP_H
														
--- a/src/frame_receiver/receiver_udp_fec.cpp
+++ b/src/frame_receiver/receiver_udp_fec.cpp
@@ -0,0 +1,339 @@
 
															+#include "receiver_udp_fec.h"
														
 
															+#include "receiver_utility.hpp"
														
 
															+#include "third_party/scope_guard.hpp"
														
 
															+
														
 
															+extern "C" {
														
 
															+#include "third_party/rs.h"
														
 
															+}
														
 
															+
														
 
															+#include <boost/asio/io_context.hpp>
														
 
															+#include <boost/asio/ip/udp.hpp>
														
 
															+#include <boost/asio/post.hpp>
														
 
															+#include <boost/crc.hpp>
														
 
															+#include <boost/endian.hpp>
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+using namespace boost::asio::ip;
														
 
															+using boost::asio::buffer;
														
 
															+using boost::asio::io_context;
														
 
															+using boost::asio::post;
														
 
															+using boost::system::error_code;
														
 
															+
														
 
															+namespace receiver_udp_fec_impl {
														
 
															+
														
 
															+    enum status_type {
														
 
															+        NOT_INIT,
														
 
															+        WAITING,
														
 
															+        READY
														
 
															+    };
														
 
															+
														
 
															+    struct smart_chunk {
														
 
															+        smart_buffer<uint8_t> block_data;
														
 
															+        smart_buffer<uint8_t *> block_ptrs;
														
 
															+        smart_buffer<bool> block_miss;
														
 
															+        uint8_t ready_blocks = 0;
														
 
															+        status_type status = NOT_INIT;
														
 
															+
														
 
															+        void reset() {
														
 
															+            ready_blocks = 0;
														
 
															+            status = NOT_INIT;
														
 
															+        }
														
 
															+
														
 
															+        void create(uint8_t total_blocks, uint8_t parity_blocks, uint16_t block_size) {
														
 
															+            if (total_blocks != block_ptrs.length ||
														
 
															+                parity_blocks != last_parity_blocks ||
														
 
															+                block_size != last_block_size) [[unlikely]] {
														
 
															+                deallocate();
														
 
															+                allocate(total_blocks, parity_blocks, block_size);
														
 
															+            }
														
 
															+            memset(block_miss.ptr, true, block_miss.length * sizeof(bool));
														
 
															+            assert(status == NOT_INIT);
														
 
															+            status = WAITING;
														
 
															+        }
														
 
															+
														
 
															+        bool reconstruct() {
														
 
															+            if (ready_blocks + last_parity_blocks < block_ptrs.length) return false;
														
 
															+            auto ret = reed_solomon_reconstruct(rs, block_ptrs.ptr, (uint8_t *) block_miss.ptr,
														
 
															+                                                block_ptrs.length, last_block_size);
														
 
															+            if (ret != 0) return false;
														
 
															+            assert(status == WAITING);
														
 
															+            status = READY;
														
 
															+            return true;
														
 
															+        }
														
 
															+
														
 
															+    private:
														
 
															+        reed_solomon *rs = nullptr;
														
 
															+        uint8_t last_parity_blocks = 0;
														
 
															+        uint16_t last_block_size = 0;
														
 
															+
														
 
															+        void deallocate() {
														
 
															+            if (rs == nullptr) return;
														
 
															+            reed_solomon_release(rs);
														
 
															+            rs = nullptr;
														
 
															+        }
														
 
															+
														
 
															+        void allocate(uint8_t total_blocks, uint8_t parity_blocks, uint16_t block_size) {
														
 
															+            assert(rs == nullptr);
														
 
															+            auto data_blocks = total_blocks - parity_blocks;
														
 
															+            rs = reed_solomon_new(data_blocks, parity_blocks);
														
 
															+            block_data.create(total_blocks * block_size);
														
 
															+            block_ptrs.create(total_blocks);
														
 
															+            block_miss.create(total_blocks);
														
 
															+            for (int i = 0; i < total_blocks; ++i) {
														
 
															+                block_ptrs.ptr[i] = block_data.ptr + block_size * i;
														
 
															+            }
														
 
															+            last_parity_blocks = parity_blocks;
														
 
															+            last_block_size = block_size;
														
 
															+        }
														
 
															+    };
														
 
															+
														
 
															+}
														
 
															+
														
 
															+using namespace receiver_udp_fec_impl;
														
 
															+
														
 
															+struct receiver_udp_fec::impl {
														
 
															+
														
 
															+    struct frag_header {
														
 
															+        uint32_t frag_checksum;
														
 
															+        uint8_t frame_type; // 'I' or 'P'
														
 
															+        uint32_t frame_id;
														
 
															+        uint32_t frame_length;
														
 
															+        uint8_t chunk_count;
														
 
															+        uint8_t chunk_id;
														
 
															+        uint32_t chunk_offset;
														
 
															+        uint32_t chunk_length;
														
 
															+        uint16_t block_size;
														
 
															+        uint8_t block_count;
														
 
															+        uint8_t chunk_decode_block_count;
														
 
															+        uint8_t block_id;
														
 
															+    };
														
 
															+
														
 
															+    struct request_type {
														
 
															+        uint32_t request_checksum;
														
 
															+        uint8_t request_type;
														
 
															+        uint32_t frame_id;
														
 
															+    };
														
 
															+
														
 
															+    struct frame_info {
														
 
															+        smart_buffer<smart_chunk> chunks;
														
 
															+        smart_buffer<uint8_t> data;
														
 
															+        uint32_t id = 0;
														
 
															+        uint8_t ready_chunks = 0;
														
 
															+        status_type status = NOT_INIT;
														
 
															+
														
 
															+        void create(uint32_t frame_id, uint8_t chunk_count, size_t length) {
														
 
															+            chunks.create(chunk_count);
														
 
															+            data.create(length);
														
 
															+            for (auto k = 0; k < chunk_count; ++k) {
														
 
															+                chunks.ptr[k].reset();
														
 
															+            }
														
 
															+            id = frame_id;
														
 
															+            ready_chunks = 0;
														
 
															+            status = WAITING;
														
 
															+        }
														
 
															+    };
														
 
															+
														
 
															+    static constexpr auto frag_header_size = 28;
														
 
															+    static constexpr auto request_size = 9;
														
 
															+    static constexpr auto max_package_size = 64 * 1024; // 64KiB
														
 
															+    static constexpr auto udp_buffer_size = 10 * 1024 * 1024; // 10MiB
														
 
															+
														
 
															+    std::unique_ptr<udp::socket> socket;
														
 
															+    decoder_base *decoder;
														
 
															+
														
 
															+    frame_info frame_cache;
														
 
															+    uint32_t last_frame_id = 0;
														
 
															+
														
 
															+    udp::endpoint server_ep;
														
 
															+    smart_buffer<uint8_t> in_buf, out_buf;
														
 
															+
														
 
															+    static uint8_t *read_frag_header(uint8_t *ptr, frag_header *header) {
														
 
															+#define READ(member) ptr = read_binary_number(ptr, &header->member)
														
 
															+        READ(frag_checksum);
														
 
															+        READ(frame_type);
														
 
															+        READ(frame_id);
														
 
															+        READ(frame_length);
														
 
															+        READ(chunk_count);
														
 
															+        READ(chunk_id);
														
 
															+        READ(chunk_offset);
														
 
															+        READ(chunk_length);
														
 
															+        READ(block_size);
														
 
															+        READ(block_count);
														
 
															+        READ(chunk_decode_block_count);
														
 
															+        READ(block_id);
														
 
															+#undef WRITE
														
 
															+        return ptr;
														
 
															+    }
														
 
															+
														
 
															+    static uint8_t *write_request(uint8_t *ptr, const request_type &req) {
														
 
															+#define WRITE(member) ptr = write_binary_number(ptr, req.member)
														
 
															+        WRITE(request_checksum);
														
 
															+        WRITE(request_type);
														
 
															+        WRITE(frame_id);
														
 
															+#undef WRITE
														
 
															+        return ptr;
														
 
															+    }
														
 
															+
														
 
															+    ~impl() {
														
 
															+        request_exit();
														
 
															+    }
														
 
															+
														
 
															+    void refresh_frame(const frag_header &header) {
														
 
															+        frame_cache.create(header.frame_id, header.chunk_count, header.frame_length);
														
 
															+    }
														
 
															+
														
 
															+    void send_request(const request_type &req) {
														
 
															+        out_buf.create(request_size);
														
 
															+        write_request(out_buf.ptr, req);
														
 
															+
														
 
															+        // calculate crc32
														
 
															+        auto crc = boost::crc_32_type{};
														
 
															+        crc.process_bytes(out_buf.ptr + sizeof(uint32_t),
														
 
															+                          request_size - sizeof(uint32_t));
														
 
															+        write_binary_number(out_buf.ptr, crc.checksum());
														
 
															+
														
 
															+        // send packet
														
 
															+        assert(socket != nullptr);
														
 
															+        auto buf = buffer(out_buf.ptr, request_size);
														
 
															+        socket->send_to(buf, server_ep);
														
 
															+    }
														
 
															+
														
 
															+    void request_idr_frame(uint32_t frame_id) {
														
 
															+        request_type req;
														
 
															+        req.request_type = 'I';
														
 
															+        req.frame_id = frame_id;
														
 
															+        send_request(req);
														
 
															+        SPDLOG_WARN("Receive frame {} error, request new IDR frame.", frame_id);
														
 
															+    }
														
 
															+
														
 
															+    void request_frame_confirm(uint32_t frame_id) {
														
 
															+        request_type req;
														
 
															+        req.request_type = 'C';
														
 
															+        req.frame_id = frame_id;
														
 
															+        send_request(req);
														
 
															+    }
														
 
															+
														
 
															+    void request_exit() {
														
 
															+        request_type req;
														
 
															+        req.request_type = 'X';
														
 
															+        send_request(req);
														
 
															+    }
														
 
															+
														
 
															+    void async_handle_package() {
														
 
															+        in_buf.create(max_package_size);
														
 
															+        auto buf = buffer(in_buf.ptr, max_package_size);
														
 
															+        using namespace std::placeholders;
														
 
															+        socket->async_receive(buf, std::bind(&impl::handle_package, this, _1, _2));
														
 
															+    }
														
 
															+
														
 
															+    void handle_package(const error_code &ec, size_t length) {
														
 
															+        // prepare for next request when this function exited.
														
 
															+        auto closer = sg::make_scope_guard([this] {
														
 
															+            async_handle_package();
														
 
															+        });
														
 
															+
														
 
															+        // handle errors
														
 
															+        if (ec) {
														
 
															+            SPDLOG_ERROR("Error while receiving request: {}", ec.what());
														
 
															+            return;
														
 
															+        }
														
 
															+
														
 
															+        // parse package
														
 
															+        frag_header header;
														
 
															+        read_frag_header(in_buf.ptr, &header);
														
 
															+        auto crc = boost::crc_32_type{};
														
 
															+        crc.process_bytes(in_buf.ptr + sizeof(uint32_t),
														
 
															+                          length - sizeof(uint32_t));
														
 
															+        if (crc.checksum() != header.frag_checksum) { // checksum failed
														
 
															+            // TODO show log
														
 
															+            return;
														
 
															+        }
														
 
															+
														
 
															+        assert(length == frag_header_size + header.block_size);
														
 
															+        if (header.frame_id < frame_cache.id) return; // old package
														
 
															+        bool is_idr_frame = header.frame_type == 'I';
														
 
															+        if (frame_cache.status == READY) { // last frame has already been decoded
														
 
															+            if (header.frame_id == frame_cache.id) return; // redundant package
														
 
															+            if (is_idr_frame || // new IDR frame or correct next P frame
														
 
															+                header.frame_id == last_frame_id + 1) {
														
 
															+                refresh_frame(header);
														
 
															+            } else {
														
 
															+                request_idr_frame(header.frame_id);
														
 
															+                return;
														
 
															+            }
														
 
															+        } else {
														
 
															+            if (header.frame_id > frame_cache.id) {
														
 
															+                if (is_idr_frame) {
														
 
															+                    refresh_frame(header);
														
 
															+                } else {
														
 
															+                    request_idr_frame(header.frame_id);
														
 
															+                    return;
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        assert(frame_cache.id == header.frame_id);
														
 
															+        assert(frame_cache.status == WAITING);
														
 
															+        auto &chunk = frame_cache.chunks.ptr[header.chunk_id];
														
 
															+        if (chunk.status == NOT_INIT) {
														
 
															+            auto parity_blocks = header.block_count - header.chunk_decode_block_count;
														
 
															+            chunk.create(header.block_count, parity_blocks, header.block_size);
														
 
															+        } else if (chunk.status == READY) {
														
 
															+            return;
														
 
															+        }
														
 
															+
														
 
															+        assert(chunk.status == WAITING);
														
 
															+        auto data_ptr = in_buf.ptr + frag_header_size;
														
 
															+        memcpy(chunk.block_ptrs.ptr[header.block_id], data_ptr, header.block_size);
														
 
															+        chunk.block_miss.ptr[header.block_id] = false;
														
 
															+        ++chunk.ready_blocks;
														
 
															+        if (!chunk.reconstruct()) [[likely]] return; // need more blocks
														
 
															+
														
 
															+        assert(chunk.status == READY);
														
 
															+        assert(chunk.block_data.length >= header.chunk_length);
														
 
															+        memcpy(frame_cache.data.ptr + header.chunk_offset, chunk.block_data.ptr, header.chunk_length);
														
 
															+        ++frame_cache.ready_chunks;
														
 
															+        if (frame_cache.ready_chunks < frame_cache.chunks.length) return; // need more chunks
														
 
															+
														
 
															+        // decode frame
														
 
															+        frame_cache.status = READY;
														
 
															+        auto frame = std::make_unique<video_nal>();
														
 
															+        frame->create(frame_cache.data.ptr, frame_cache.data.length, is_idr_frame);
														
 
															+        decoder->decode(std::move(frame));
														
 
															+        SPDLOG_TRACE("Frame {} decoded.", frame_cache.id);
														
 
															+        last_frame_id = frame_cache.id;
														
 
															+        request_frame_confirm(frame_cache.id);
														
 
															+    }
														
 
															+
														
 
															+    static impl *create(const receiver_config &conf, receiver_udp_fec *q_this) {
														
 
															+        auto ret = std::make_unique<impl>();
														
 
															+        assert(conf.decoder != nullptr);
														
 
															+        ret->decoder = conf.decoder;
														
 
															+        ret->server_ep = udp::endpoint{address::from_string(conf.server_addr), conf.server_port};
														
 
															+        ret->socket = std::make_unique<udp::socket>(*q_this->get_ctx());
														
 
															+        ret->socket->connect(ret->server_ep);
														
 
															+        ret->socket->set_option(udp::socket::receive_buffer_size{udp_buffer_size});
														
 
															+        ret->async_handle_package();
														
 
															+
														
 
															+        // initialize reed solomon
														
 
															+        fec_init();
														
 
															+
														
 
															+        // notify sender
														
 
															+        post(*q_this->get_ctx(), [ptr = ret.get()] {
														
 
															+            ptr->request_idr_frame(0);
														
 
															+        });
														
 
															+
														
 
															+        return ret.release();
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+receiver_udp_fec::~receiver_udp_fec() = default;
														
 
															+
														
 
															+receiver_udp_fec *receiver_udp_fec::create(const receiver_config &conf) {
														
 
															+    auto ret = std::make_unique<receiver_udp_fec>();
														
 
															+    auto pimpl = impl::create(conf, ret.get());
														
 
															+    if (pimpl == nullptr) return nullptr;
														
 
															+    ret->pimpl.reset(pimpl);
														
 
															+    return ret.release();
														
 
															+}
														
--- a/src/frame_receiver/receiver_udp_fec.h
+++ b/src/frame_receiver/receiver_udp_fec.h
@@ -0,0 +1,21 @@
 
															+#ifndef TINYPLAYER2_FRAME_RECEIVER2_H
														
 
															+#define TINYPLAYER2_FRAME_RECEIVER2_H
														
 
															+
														
 
															+#include "receiver_base.h"
														
 
															+
														
 
															+#include <memory>
														
 
															+
														
 
															+class receiver_udp_fec : public receiver_base {
														
 
															+public:
														
 
															+
														
 
															+    ~receiver_udp_fec() override;
														
 
															+
														
 
															+    static receiver_udp_fec *create(const receiver_config &conf);
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+#endif //TINYPLAYER2_FRAME_RECEIVER2_H
														
--- a/src/frame_receiver/receiver_utility.hpp
+++ b/src/frame_receiver/receiver_utility.hpp
@@ -0,0 +1,57 @@
 
															+#ifndef TINYPLAYER3_RECEIVER_UTILITY_HPP
														
 
															+#define TINYPLAYER3_RECEIVER_UTILITY_HPP
														
 
															+
														
 
															+#include <boost/endian.hpp>
														
 
															+
														
 
															+#include <cstdlib>
														
 
															+
														
 
															+template<typename T>
														
 
															+struct smart_buffer {
														
 
															+    T *ptr = nullptr;
														
 
															+    size_t length = 0;
														
 
															+
														
 
															+    ~smart_buffer() {
														
 
															+        free(ptr);
														
 
															+    }
														
 
															+
														
 
															+    void create(size_t req_length) {
														
 
															+        if (req_length > capacity) [[unlikely]] {
														
 
															+            auto ptr_next = new T[req_length];
														
 
															+            if (ptr != nullptr) {
														
 
															+                delete ptr;
														
 
															+            }
														
 
															+            ptr = ptr_next;
														
 
															+            capacity = req_length;
														
 
															+        }
														
 
															+        length = req_length;
														
 
															+    }
														
 
															+
														
 
															+private:
														
 
															+    size_t capacity = 0;
														
 
															+};
														
 
															+
														
 
															+template<typename T>
														
 
															+static uint8_t *write_binary_number(uint8_t *ptr, T val) {
														
 
															+    static constexpr auto need_swap =
														
 
															+            (boost::endian::order::native != boost::endian::order::big);
														
 
															+    auto real_ptr = (T *) ptr;
														
 
															+    if constexpr (need_swap) {
														
 
															+        *real_ptr = boost::endian::endian_reverse(val);
														
 
															+    } else {
														
 
															+        *real_ptr = val;
														
 
															+    }
														
 
															+    return ptr + sizeof(T);
														
 
															+}
														
 
															+
														
 
															+template<typename T>
														
 
															+static uint8_t *read_binary_number(uint8_t *ptr, T *val) {
														
 
															+    static constexpr auto need_swap =
														
 
															+            (boost::endian::order::native != boost::endian::order::big);
														
 
															+    *val = *(T *) ptr;
														
 
															+    if constexpr (need_swap) {
														
 
															+        boost::endian::endian_reverse_inplace(*val);
														
 
															+    }
														
 
															+    return ptr + sizeof(T);
														
 
															+}
														
 
															+
														
 
															+#endif //TINYPLAYER3_RECEIVER_UTILITY_HPP
														
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -0,0 +1,92 @@
 
															+#include <glad/gl.h>
														
 
															+#include <GLFW/glfw3.h>
														
 
															+
														
 
															+#include <imgui.h>
														
 
															+#include <imgui_impl_glfw.h>
														
 
															+#include <imgui_impl_opengl3.h>
														
 
															+
														
 
															+#include <nlohmann/json.hpp>
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+#include <cassert>
														
 
															+
														
 
															+using namespace nlohmann;
														
 
															+
														
 
															+GLFWwindow *create_controller_window();
														
 
															+
														
 
															+GLFWwindow *create_player_window(const json &conf);
														
 
															+
														
 
															+void controller_main(const char *this_name);
														
 
															+
														
 
															+void player_main(const json &config);
														
 
															+
														
 
															+int main(int argc, char *argv[]) {
														
 
															+
														
 
															+#ifndef NDEBUG
														
 
															+    spdlog::set_level(spdlog::level::trace);
														
 
															+#endif
														
 
															+
														
 
															+    // determine application type
														
 
															+    bool is_controller = (argc == 1);
														
 
															+    json player_conf;
														
 
															+    if (!is_controller) {
														
 
															+        assert(argc == 2);
														
 
															+        player_conf = json::parse(argv[1]);
														
 
															+    }
														
 
															+
														
 
															+    // set GLFW error handler
														
 
															+    glfwSetErrorCallback([](int error, const char *desc) {
														
 
															+        SPDLOG_ERROR("GLFW error: code = {}, description = {}", error, desc);
														
 
															+        assert(false);
														
 
															+    });
														
 
															+
														
 
															+    // create main window
														
 
															+    auto ret = glfwInit();
														
 
															+    assert(ret == GLFW_TRUE);
														
 
															+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
														
 
															+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
														
 
															+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
														
 
															+    glfwWindowHint(GLFW_REFRESH_RATE, 60);
														
 
															+    GLFWwindow *main_window;
														
 
															+    if (is_controller) {
														
 
															+        main_window = create_controller_window();
														
 
															+    } else {
														
 
															+        main_window = create_player_window(player_conf);
														
 
															+    }
														
 
															+    assert(main_window != nullptr);
														
 
															+    glfwMakeContextCurrent(main_window);
														
 
															+    glfwSwapInterval(0);
														
 
															+
														
 
															+    // load opengl functions
														
 
															+    auto version = gladLoadGL(glfwGetProcAddress);
														
 
															+    assert(version > 0);
														
 
															+    SPDLOG_INFO("Loaded OpenGL {}.{}", GLAD_VERSION_MAJOR(version), GLAD_VERSION_MINOR(version));
														
 
															+
														
 
															+#ifndef NDEBUG
														
 
															+    // log opengl error
														
 
															+    glEnable(GL_DEBUG_OUTPUT);
														
 
															+    glDebugMessageCallback([](GLenum source, GLenum type, GLuint id, GLenum severity,
														
 
															+                              GLsizei length, const GLchar *message, const void *user_data) {
														
 
															+        if (type == GL_DEBUG_TYPE_ERROR) {
														
 
															+            SPDLOG_ERROR("OpenGL error: type = {}, severity = {}, message = {}", type, severity, message);
														
 
															+            assert(false);
														
 
															+        }
														
 
															+    }, nullptr);
														
 
															+#endif
														
 
															+
														
 
															+    // setup imgui context
														
 
															+    IMGUI_CHECKVERSION();
														
 
															+    ImGui::CreateContext();
														
 
															+    auto io = ImGui::GetIO();
														
 
															+    io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
														
 
															+    ImGui::StyleColorsDark();
														
 
															+    ImGui_ImplGlfw_InitForOpenGL(main_window, true);
														
 
															+    ImGui_ImplOpenGL3_Init();
														
 
															+
														
 
															+    if (is_controller) {
														
 
															+        controller_main(argv[0]);
														
 
															+    } else {
														
 
															+        player_main(player_conf);
														
 
															+    }
														
 
															+}
														
--- a/src/main_controller.cpp
+++ b/src/main_controller.cpp
@@ -0,0 +1,196 @@
 
															+#include "frame_decoder/decoder_base.h"
														
 
															+#include "frame_receiver/receiver_base.h"
														
 
															+
														
 
															+#include <glad/gl.h>
														
 
															+#include <GLFW/glfw3.h>
														
 
															+
														
 
															+#include <imgui.h>
														
 
															+#include <imgui_impl_glfw.h>
														
 
															+#include <imgui_impl_opengl3.h>
														
 
															+
														
 
															+#include <boost/process/child.hpp>
														
 
															+
														
 
															+#include <nlohmann/json.hpp>
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+#include <memory>
														
 
															+
														
 
															+struct imgui_disable_guard {
														
 
															+    explicit imgui_disable_guard(bool enable = true) {
														
 
															+        is_disabled = enable;
														
 
															+        if (is_disabled) {
														
 
															+            ImGui::BeginDisabled();
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    ~imgui_disable_guard() {
														
 
															+        if (is_disabled) {
														
 
															+            ImGui::EndDisabled();
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+private:
														
 
															+    bool is_disabled;
														
 
															+};
														
 
															+
														
 
															+namespace bp = boost::process;
														
 
															+using namespace nlohmann;
														
 
															+
														
 
															+namespace controller_impl {
														
 
															+    constexpr auto window_width = 800;
														
 
															+    constexpr auto window_height = 600;
														
 
															+    constexpr auto server_address_length = 256;
														
 
															+
														
 
															+    GLFWwindow *main_window;
														
 
															+
														
 
															+    bool full_screen = false;
														
 
															+    int monitor_index = 0;
														
 
															+    char server_address[server_address_length] = "10.0.0.2";
														
 
															+    uint16_t server_port = 5279;
														
 
															+    decoder_type chosen_decoder = DECODER_NVDEC;
														
 
															+    receiver_type chosen_receiver = RECEIVER_TCP;
														
 
															+    std::unique_ptr<bp::child> worker;
														
 
															+
														
 
															+    void stop_worker() {
														
 
															+        worker.reset();
														
 
															+    }
														
 
															+
														
 
															+    bool is_working() {
														
 
															+        if (worker == nullptr) return false;
														
 
															+        if (!worker->running()) {
														
 
															+            stop_worker();
														
 
															+            return false;
														
 
															+        }
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    void start_worker(const char *this_name) {
														
 
															+        // generate config
														
 
															+        json server_config;
														
 
															+        server_config["ip"] = server_address;
														
 
															+        server_config["port"] = server_port;
														
 
															+        json config;
														
 
															+        config["full_screen"] = full_screen;
														
 
															+        config["monitor"] = monitor_index;
														
 
															+        config["server"] = server_config;
														
 
															+        config["receiver"] = (int) chosen_receiver;
														
 
															+        config["decoder"] = (int) chosen_decoder;
														
 
															+
														
 
															+        // create worker
														
 
															+        auto config_str = config.dump();
														
 
															+        worker = std::make_unique<bp::child>(this_name, config_str);
														
 
															+        SPDLOG_INFO("Worker started with config: {}", config_str);
														
 
															+    }
														
 
															+
														
 
															+}
														
 
															+
														
 
															+using namespace controller_impl;
														
 
															+
														
 
															+GLFWwindow *create_controller_window() {
														
 
															+    main_window = glfwCreateWindow(window_width, window_height, "TinyPlayer Control", nullptr, nullptr);
														
 
															+    return main_window;
														
 
															+}
														
 
															+
														
 
															+void controller_main(const char *this_name) {
														
 
															+    while (!glfwWindowShouldClose(main_window)) {
														
 
															+        glfwPollEvents();
														
 
															+        ImGui_ImplOpenGL3_NewFrame();
														
 
															+        ImGui_ImplGlfw_NewFrame();
														
 
															+        ImGui::NewFrame();
														
 
															+
														
 
															+        if (ImGui::Begin("TinyPlayer Control")) {
														
 
															+            ImGui::PushItemWidth(150);
														
 
															+
														
 
															+            ImGui::SeparatorText("Actions");
														
 
															+            if (!is_working()) {
														
 
															+                if (ImGui::Button("Start")) {
														
 
															+                    start_worker(this_name);
														
 
															+                }
														
 
															+            } else {
														
 
															+                if (ImGui::Button("Stop")) {
														
 
															+                    stop_worker();
														
 
															+                }
														
 
															+            }
														
 
															+
														
 
															+            ImGui::SeparatorText("Configs");
														
 
															+            {
														
 
															+                auto guard = imgui_disable_guard(is_working());
														
 
															+
														
 
															+                ImGui::Checkbox("Full Screen", &full_screen);
														
 
															+
														
 
															+                // display monitor
														
 
															+                if (full_screen) {
														
 
															+                    int monitor_count;
														
 
															+                    auto monitors = glfwGetMonitors(&monitor_count);
														
 
															+                    if (monitor_index >= monitor_count) {
														
 
															+                        monitor_index = 0;
														
 
															+                    }
														
 
															+                    auto monitor_name_preview = glfwGetMonitorName(monitors[monitor_index]);
														
 
															+                    if (ImGui::BeginCombo("Monitor", monitor_name_preview)) { // let user select monitors
														
 
															+                        for (int k = 0; k < monitor_count; ++k) {
														
 
															+                            auto is_selected = (monitor_index == k);
														
 
															+                            auto monitor_name = fmt::format("{} - {}", k, glfwGetMonitorName(monitors[k]));
														
 
															+                            if (ImGui::Selectable(monitor_name.c_str(), is_selected)) {
														
 
															+                                monitor_index = k;
														
 
															+                            }
														
 
															+                            if (is_selected) {
														
 
															+                                ImGui::SetItemDefaultFocus();
														
 
															+                            }
														
 
															+                        }
														
 
															+                        ImGui::EndCombo();
														
 
															+                    }
														
 
															+                }
														
 
															+
														
 
															+                // server info
														
 
															+                ImGui::InputText("Server IP", server_address, server_address_length);
														
 
															+                ImGui::InputScalar("Server Port", ImGuiDataType_U16, &server_port);
														
 
															+
														
 
															+                // decoder type
														
 
															+                ImGui::AlignTextToFramePadding();
														
 
															+                ImGui::Text("Decoder Method:");
														
 
															+                ImGui::SameLine();
														
 
															+                if (ImGui::RadioButton("NvDec", chosen_decoder == DECODER_NVDEC)) {
														
 
															+                    chosen_decoder = DECODER_NVDEC;
														
 
															+                    if (chosen_receiver == RECEIVER_UDP) {
														
 
															+                        chosen_receiver = RECEIVER_TCP;
														
 
															+                    }
														
 
															+                }
														
 
															+                ImGui::SameLine();
														
 
															+                if (ImGui::RadioButton("nvJPEG", chosen_decoder == DECODER_JPEG)) {
														
 
															+                    chosen_decoder = DECODER_JPEG;
														
 
															+                }
														
 
															+
														
 
															+                // receiver type
														
 
															+                ImGui::AlignTextToFramePadding();
														
 
															+                ImGui::Text("Receiver Method:");
														
 
															+                ImGui::SameLine();
														
 
															+                if (ImGui::RadioButton("TCP", chosen_receiver == RECEIVER_TCP)) {
														
 
															+                    chosen_receiver = RECEIVER_TCP;
														
 
															+                }
														
 
															+                if (chosen_decoder != DECODER_NVDEC) {
														
 
															+                    ImGui::SameLine();
														
 
															+                    if (ImGui::RadioButton("UDP", chosen_receiver == RECEIVER_UDP)) {
														
 
															+                        chosen_receiver = RECEIVER_UDP;
														
 
															+                    }
														
 
															+                }
														
 
															+                ImGui::SameLine();
														
 
															+                if (ImGui::RadioButton("UDP (FEC)", chosen_receiver == RECEIVER_UDP_FEC)) {
														
 
															+                    chosen_receiver = RECEIVER_UDP_FEC;
														
 
															+                }
														
 
															+            }
														
 
															+            ImGui::PopItemWidth();
														
 
															+        }
														
 
															+        ImGui::End();
														
 
															+        ImGui::Render();
														
 
															+
														
 
															+        int frame_width, frame_height;
														
 
															+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
														
 
															+        glfwGetFramebufferSize(main_window, &frame_width, &frame_height);
														
 
															+        glViewport(0, 0, frame_width, frame_height);
														
 
															+        glClear(GL_COLOR_BUFFER_BIT);
														
 
															+
														
 
															+        ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
														
 
															+        glfwSwapBuffers(main_window);
														
 
															+    }
														
 
															+}
														
--- a/src/main_player.cpp
+++ b/src/main_player.cpp
@@ -0,0 +1,155 @@
 
															+#include "frame_decoder/decoder_nvdec.h"
														
 
															+#include "frame_decoder/decoder_nvjpeg.h"
														
 
															+#include "frame_receiver/receiver_tcp.h"
														
 
															+#include "frame_receiver/receiver_udp_fec.h"
														
 
															+#include "simple_mq.h"
														
 
															+#include "simple_opengl.h"
														
 
															+#include "utility.hpp"
														
 
															+#include "variable_defs.h"
														
 
															+
														
 
															+#include <glad/gl.h>
														
 
															+#include <GLFW/glfw3.h>
														
 
															+
														
 
															+#include <nlohmann/json.hpp>
														
 
															+
														
 
															+#include <thread>
														
 
															+
														
 
															+using namespace nlohmann;
														
 
															+
														
 
															+namespace player_impl {
														
 
															+    constexpr auto window_width = 800;
														
 
															+    constexpr auto window_height = 600;
														
 
															+
														
 
															+    GLFWwindow *main_window;
														
 
															+}
														
 
															+
														
 
															+using namespace player_impl;
														
 
															+using namespace simple_mq_singleton;
														
 
															+
														
 
															+GLFWwindow *create_player_window(const json &config) {
														
 
															+    if (config["full_screen"].get<bool>()) {
														
 
															+        int monitor_count;
														
 
															+        auto monitor_index = config["monitor"].get<int>();
														
 
															+        auto monitors = glfwGetMonitors(&monitor_count);
														
 
															+        assert(monitor_count > monitor_index);
														
 
															+        auto monitor = monitors[monitor_index];
														
 
															+        auto video_mode = glfwGetVideoMode(monitor);
														
 
															+        main_window = glfwCreateWindow(video_mode->width, video_mode->height, "TinyPlayer", monitor, nullptr);
														
 
															+    } else {
														
 
															+        main_window = glfwCreateWindow(window_width, window_height, "TinyPlayer", nullptr, nullptr);
														
 
															+    }
														
 
															+    return main_window;
														
 
															+}
														
 
															+
														
 
															+void player_main(const json &config) {
														
 
															+    // initialize simple mq
														
 
															+    mq();
														
 
															+
														
 
															+    // initialize cuda
														
 
															+    CUDA_API_CHECK(cuInit(0));
														
 
															+    use_primary_cuda_ctx();
														
 
															+
														
 
															+    // create decoder
														
 
															+    std::unique_ptr<decoder_base> decoder;
														
 
															+    auto chosen_decoder = (decoder_type) config["decoder"].get<int>();
														
 
															+    switch (chosen_decoder) {
														
 
															+        case DECODER_NVDEC: {
														
 
															+            decoder = std::make_unique<decoder_nvdec>();
														
 
															+            break;
														
 
															+        }
														
 
															+        case DECODER_JPEG: {
														
 
															+            decoder = std::make_unique<decoder_nvjpeg>();
														
 
															+            break;
														
 
															+        }
														
 
															+        default: {
														
 
															+            RET_ERROR;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    // create receiver config
														
 
															+    receiver_config recv_conf;
														
 
															+    auto &server_conf = config["server"];
														
 
															+    recv_conf.decoder = decoder.get();
														
 
															+    recv_conf.server_addr = server_conf["ip"].get<std::string>();
														
 
															+    recv_conf.server_port = server_conf["port"].get<int>();
														
 
															+
														
 
															+    // create receiver
														
 
															+    std::unique_ptr<receiver_base> receiver;
														
 
															+    auto chosen_receiver = (receiver_type) config["receiver"].get<int>();
														
 
															+    switch (chosen_receiver) {
														
 
															+        case RECEIVER_TCP: {
														
 
															+            receiver.reset(receiver_tcp::create(recv_conf));
														
 
															+            break;
														
 
															+        }
														
 
															+        case RECEIVER_UDP_FEC: {
														
 
															+            receiver.reset(receiver_udp_fec::create(recv_conf));
														
 
															+            break;
														
 
															+        }
														
 
															+        default: {
														
 
															+            RET_ERROR;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    // working thread
														
 
															+    mq().update_variable(RECEIVER_STOPPED, false);
														
 
															+    mq().update_variable(RENDER_BUSY, false);
														
 
															+    mq().update_variable_ptr<cv::cuda::GpuMat>(FRAME_OUT, nullptr);
														
 
															+    auto worker = std::make_unique<std::thread>([&] {
														
 
															+        // initialize cuda
														
 
															+        use_primary_cuda_ctx();
														
 
															+
														
 
															+        receiver->run();
														
 
															+
														
 
															+        // notify receiver stop
														
 
															+        mq().update_variable_ptr<cv::cuda::GpuMat>(FRAME_OUT, nullptr);
														
 
															+    });
														
 
															+
														
 
															+    // create render
														
 
															+    auto render = std::make_unique<simple_render>();
														
 
															+
														
 
															+    uint64 last_cnt = 1; // FRAME_OUT has already been set
														
 
															+    while (!glfwWindowShouldClose(main_window)) {
														
 
															+
														
 
															+        glfwPollEvents();
														
 
															+
														
 
															+        // retrieve new image
														
 
															+        mq().wait_variable(FRAME_OUT, last_cnt);
														
 
															+        uint64_t cur_cnt;
														
 
															+        auto img = mq().query_variable_ptr<cv::cuda::GpuMat>(FRAME_OUT, &cur_cnt);
														
 
															+        assert(cur_cnt > last_cnt);
														
 
															+        last_cnt = cur_cnt;
														
 
															+
														
 
															+        // check for stop
														
 
															+        if (img == nullptr) return;
														
 
															+
														
 
															+        // adjust window
														
 
															+        int fbo_width, fbo_height;
														
 
															+        glfwGetFramebufferSize(main_window, &fbo_width, &fbo_height);
														
 
															+        glViewport(0, 0, fbo_width, fbo_height);
														
 
															+//        auto fbo_wh_ratio = 1.0f * fbo_width / fbo_height;
														
 
															+
														
 
															+        // draw image
														
 
															+        auto rect = simple_rect{-1, -1, 2, 2};
														
 
															+        mq().update_variable(RENDER_BUSY, true);
														
 
															+        switch (chosen_decoder) {
														
 
															+            case DECODER_NVDEC: {
														
 
															+                render->render_nv12(*img, rect, true);
														
 
															+                break;
														
 
															+            }
														
 
															+            case DECODER_JPEG: {
														
 
															+                render->render_rgb(*img, rect, true);
														
 
															+                break;
														
 
															+            }
														
 
															+            default: {
														
 
															+                RET_ERROR;
														
 
															+            }
														
 
															+        }
														
 
															+        glFinish();
														
 
															+        mq().update_variable(RENDER_BUSY, false);
														
 
															+
														
 
															+        glfwSwapBuffers(main_window);
														
 
															+    }
														
 
															+
														
 
															+    // ignore thread exception
														
 
															+    exit(0);
														
 
															+}
														
--- a/src/simple_mq.cpp
+++ b/src/simple_mq.cpp
@@ -0,0 +1,142 @@
 
															+#include "simple_mq.h"
														
 
															+
														
 
															+#include <cassert>
														
 
															+#include <condition_variable>
														
 
															+#include <map>
														
 
															+#include <mutex>
														
 
															+#include <shared_mutex>
														
 
															+
														
 
															+struct simple_mq::impl {
														
 
															+
														
 
															+    struct variable_info {
														
 
															+        std::shared_ptr<void> ptr;
														
 
															+        std::type_index type;
														
 
															+        uint64_t update_cnt = 0;
														
 
															+        std::shared_mutex mu;
														
 
															+        std::condition_variable_any cv;
														
 
															+    };
														
 
															+    using pool_type = std::map<index_type, variable_info *>;
														
 
															+
														
 
															+    pool_type pool;
														
 
															+    std::shared_mutex pool_mu;
														
 
															+    std::condition_variable_any pool_cv;
														
 
															+
														
 
															+    ~impl() {
														
 
															+        for (auto &info: pool) {
														
 
															+            delete info.second;
														
 
															+        }
														
 
															+    };
														
 
															+
														
 
															+    bool update_variable(index_type index,
														
 
															+                         const std::shared_ptr<void> &ptr,
														
 
															+                         std::type_index type) {
														
 
															+        auto iter = pool_type::iterator{};
														
 
															+        {
														
 
															+            auto lock_pool = std::shared_lock{pool_mu};
														
 
															+            iter = pool.find(index);
														
 
															+        }
														
 
															+        if (iter == pool.end()) return false;
														
 
															+        auto &info = iter->second;
														
 
															+        {
														
 
															+            auto lock_variable = std::unique_lock{info->mu};
														
 
															+            info->ptr = ptr;
														
 
															+            info->type = type;
														
 
															+            ++info->update_cnt;
														
 
															+        }
														
 
															+        info->cv.notify_all();
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    void create_variable(index_type index,
														
 
															+                         const std::shared_ptr<void> &ptr,
														
 
															+                         std::type_index type) {
														
 
															+        auto info = new variable_info{ptr, type};
														
 
															+        {
														
 
															+            auto lock_pool = std::unique_lock{pool_mu};
														
 
															+            pool.emplace(index, info);
														
 
															+            ++info->update_cnt;
														
 
															+        }
														
 
															+        pool_cv.notify_all();
														
 
															+    }
														
 
															+
														
 
															+    std::shared_ptr<void> query_variable(index_type index,
														
 
															+                                         std::type_index type,
														
 
															+                                         uint64_t *update_cnt) {
														
 
															+        auto iter = pool_type::iterator{};
														
 
															+        {
														
 
															+            auto lock_pool = std::shared_lock{pool_mu};
														
 
															+            iter = pool.find(index);
														
 
															+        }
														
 
															+        if (iter == pool.end()) {
														
 
															+            if (update_cnt != nullptr) {
														
 
															+                *update_cnt = 0;
														
 
															+            }
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+        auto &info = iter->second;
														
 
															+        {
														
 
															+            auto lock_variable = std::shared_lock{info->mu};
														
 
															+            assert(info->type == type);
														
 
															+            if (update_cnt != nullptr) {
														
 
															+                *update_cnt = info->update_cnt;
														
 
															+            }
														
 
															+            return info->ptr;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    bool wait_variable_update(index_type index, uint64_t old_cnt) {
														
 
															+        auto iter = pool_type::iterator{};
														
 
															+        {
														
 
															+            auto lock_pool = std::shared_lock{pool_mu};
														
 
															+            iter = pool.find(index);
														
 
															+        }
														
 
															+        if (iter == pool.end()) return false;
														
 
															+        auto &info = iter->second;
														
 
															+        {
														
 
															+            auto lock_variable = std::shared_lock{info->mu};
														
 
															+            info->cv.wait(lock_variable, [=] {
														
 
															+                return info->update_cnt > old_cnt;
														
 
															+            });
														
 
															+        }
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    void wait_variable_create(index_type index) {
														
 
															+        auto lock_pool = std::shared_lock{pool_mu};
														
 
															+        pool_cv.wait(lock_pool, [=, this] {
														
 
															+            return pool.contains(index);
														
 
															+        });
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+simple_mq::simple_mq()
														
 
															+        : pimpl(std::make_unique<impl>()) {}
														
 
															+
														
 
															+simple_mq::~simple_mq() = default;
														
 
															+
														
 
															+void simple_mq::update_variable_impl(index_type index,
														
 
															+                                     const std::shared_ptr<void> &ptr,
														
 
															+                                     std::type_index type) {
														
 
															+    auto ret = pimpl->update_variable(index, ptr, type);
														
 
															+    if (ret) [[likely]] return;
														
 
															+    pimpl->create_variable(index, ptr, type);
														
 
															+}
														
 
															+
														
 
															+std::shared_ptr<void> simple_mq::query_variable_impl(index_type index,
														
 
															+                                                     std::type_index type,
														
 
															+                                                     uint64_t *update_cnt) {
														
 
															+    return pimpl->query_variable(index, type, update_cnt);
														
 
															+}
														
 
															+
														
 
															+void simple_mq::wait_variable(index_type index, uint64_t old_cnt) {
														
 
															+    auto ret = pimpl->wait_variable_update(index, old_cnt);
														
 
															+    if (ret)[[likely]] return;
														
 
															+    pimpl->wait_variable_create(index);
														
 
															+}
														
 
															+
														
 
															+namespace simple_mq_singleton {
														
 
															+    simple_mq &mq() {
														
 
															+        static simple_mq instance;
														
 
															+        return instance;
														
 
															+    }
														
 
															+}
														
--- a/src/simple_mq.h
+++ b/src/simple_mq.h
@@ -0,0 +1,63 @@
 
															+#ifndef REMOTEAR3_SIMPLE_MQ_H
														
 
															+#define REMOTEAR3_SIMPLE_MQ_H
														
 
															+
														
 
															+#include <cstdint>
														
 
															+#include <memory>
														
 
															+#include <type_traits>
														
 
															+#include <typeindex>
														
 
															+#include <typeinfo>
														
 
															+
														
 
															+class simple_mq {
														
 
															+public:
														
 
															+    using index_type = int;
														
 
															+
														
 
															+    simple_mq();
														
 
															+
														
 
															+    ~simple_mq();
														
 
															+
														
 
															+    template<typename T>
														
 
															+    void update_variable_ptr(index_type index,
														
 
															+                             const std::shared_ptr<T> &ptr) {
														
 
															+        update_variable_impl(index,
														
 
															+                             std::static_pointer_cast<void>(ptr),
														
 
															+                             typeid(T));
														
 
															+    }
														
 
															+
														
 
															+    template<typename T>
														
 
															+    void update_variable(index_type index, T &&value) {
														
 
															+        using RT = typename std::remove_cvref_t<T>;
														
 
															+        update_variable_ptr(index,
														
 
															+                            std::make_shared<RT>(std::forward<T>(value)));
														
 
															+    }
														
 
															+
														
 
															+    template<typename T>
														
 
															+    std::shared_ptr<T> query_variable_ptr(index_type index, uint64_t *update_cnt = nullptr) {
														
 
															+        return std::static_pointer_cast<T>(
														
 
															+                query_variable_impl(index, typeid(T), update_cnt));
														
 
															+    }
														
 
															+
														
 
															+    template<typename T>
														
 
															+    T query_variable(index_type index, uint64_t *update_cnt = nullptr) {
														
 
															+        return *query_variable_ptr<T>(index, update_cnt);
														
 
															+    }
														
 
															+
														
 
															+    void wait_variable(index_type index, uint64_t old_cnt);
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+
														
 
															+    void update_variable_impl(index_type index,
														
 
															+                              const std::shared_ptr<void> &ptr,
														
 
															+                              std::type_index type);
														
 
															+
														
 
															+    std::shared_ptr<void> query_variable_impl(index_type index,
														
 
															+                                              std::type_index type,
														
 
															+                                              uint64_t *update_cnt);
														
 
															+};
														
 
															+
														
 
															+namespace simple_mq_singleton {
														
 
															+    simple_mq &mq();
														
 
															+}
														
 
															+
														
 
															+#endif //REMOTEAR3_SIMPLE_MQ_H
														
--- a/src/simple_opengl.cpp
+++ b/src/simple_opengl.cpp
@@ -0,0 +1,365 @@
 
															+#include "simple_opengl.h"
														
 
															+
														
 
															+#include <cuda_gl_interop.h>
														
 
															+
														
 
															+namespace simple_opengl_impl {
														
 
															+
														
 
															+    constexpr auto simple_vert_shader_source = R"(
														
 
															+        #version 460
														
 
															+        layout (location = 0) in vec2 pos_in;
														
 
															+        layout (location = 1) in vec2 tex_coord_in;
														
 
															+        out vec2 tex_coord;
														
 
															+        void main() {
														
 
															+            gl_Position = vec4(pos_in, 0, 1);
														
 
															+            tex_coord = tex_coord_in;
														
 
															+        }
														
 
															+    )";
														
 
															+
														
 
															+    constexpr auto rgb_frag_shader_source = R"(
														
 
															+        #version 460
														
 
															+        layout (location = 0) out vec4 color_out;
														
 
															+        in vec2 tex_coord;
														
 
															+        uniform sampler2D tex_sampler;
														
 
															+        void main() {
														
 
															+            color_out = texture(tex_sampler, tex_coord);
														
 
															+        }
														
 
															+    )";
														
 
															+
														
 
															+    constexpr auto nv12_frag_shader_source = R"(
														
 
															+        #version 460
														
 
															+        layout (location = 0) out vec4 color_out;
														
 
															+        in vec2 tex_coord;
														
 
															+        uniform sampler2D luma_tex;
														
 
															+        uniform sampler2D chroma_tex;
														
 
															+        void main() {
														
 
															+            vec3 yuv, rgb;
														
 
															+            yuv.x = texture(luma_tex, tex_coord).x;
														
 
															+            yuv.yz = texture(chroma_tex, tex_coord).xy - vec2(0.5, 0.5);
														
 
															+            rgb = mat3(1, 1, 1,
														
 
															+                       0, -0.39465, 2.03211,
														
 
															+                       1.13983, -0.5806, 0) * yuv;
														
 
															+            color_out = vec4(rgb, 1.0);
														
 
															+        }
														
 
															+    )";
														
 
															+
														
 
															+    constexpr GLuint rect_indices[] = {
														
 
															+            0, 1, 3, // first triangle
														
 
															+            1, 2, 3 // second triangle
														
 
															+    };
														
 
															+
														
 
															+    struct smart_pixel_buffer {
														
 
															+        GLuint id = 0;
														
 
															+        cudaGraphicsResource *res = nullptr;
														
 
															+
														
 
															+        ~smart_pixel_buffer() {
														
 
															+            deallocate();
														
 
															+        }
														
 
															+
														
 
															+        void create(GLenum target, GLenum flags, GLsizeiptr size) {
														
 
															+            if (size == last_size) [[likely]] return;
														
 
															+            deallocate();
														
 
															+            allocate(target, flags, size);
														
 
															+        }
														
 
															+
														
 
															+        void *map_pointer(cudaStream_t stream) {
														
 
															+            void *ptr;
														
 
															+            size_t size;
														
 
															+            CUDA_API_CHECK_P(cudaGraphicsMapResources(1, &res, stream));
														
 
															+            CUDA_API_CHECK_P(cudaGraphicsResourceGetMappedPointer(&ptr, &size, res));
														
 
															+            assert(size == last_size);
														
 
															+            last_stream = stream;
														
 
															+            return ptr;
														
 
															+        }
														
 
															+
														
 
															+        void unmap_pointer() {
														
 
															+            CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &res, last_stream));
														
 
															+        }
														
 
															+
														
 
															+    private:
														
 
															+        GLsizeiptr last_size = 0;
														
 
															+        cudaStream_t last_stream = nullptr;
														
 
															+
														
 
															+        void allocate(GLenum target, GLenum flags, GLsizeiptr size) {
														
 
															+            glGenBuffers(1, &id);
														
 
															+            glBindBuffer(target, id);
														
 
															+            glBufferStorage(target, size, nullptr, flags);
														
 
															+            glBindBuffer(target, 0);
														
 
															+            last_size = size;
														
 
															+
														
 
															+            // register CUDA resource
														
 
															+            if (target == GL_PIXEL_UNPACK_BUFFER) {
														
 
															+                CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
														
 
															+                        &res, id, cudaGraphicsRegisterFlagsWriteDiscard));
														
 
															+            } else {
														
 
															+                assert(target == GL_PIXEL_PACK_BUFFER);
														
 
															+                CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
														
 
															+                        &res, id, cudaGraphicsRegisterFlagsReadOnly));
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        void deallocate() {
														
 
															+            if (id == 0) return;
														
 
															+            glDeleteBuffers(1, &id);
														
 
															+            id = 0;
														
 
															+            last_size = 0;
														
 
															+
														
 
															+            // unregister CUDA resource
														
 
															+            CUDA_API_CHECK(cudaGraphicsUnregisterResource(res));
														
 
															+            res = nullptr;
														
 
															+        }
														
 
															+    };
														
 
															+
														
 
															+}
														
 
															+
														
 
															+using namespace simple_opengl_impl;
														
 
															+
														
 
															+struct simple_render::impl {
														
 
															+    GLuint vao = 0, vbo = 0, ebo = 0;
														
 
															+    GLuint rgb_program = 0, nv12_program = 0;
														
 
															+    GLint luma_tex_loc, chroma_tex_loc;
														
 
															+
														
 
															+    smart_pixel_buffer pbo;
														
 
															+    smart_texture main_tex, extra_tex;
														
 
															+
														
 
															+    impl() {
														
 
															+        create_program();
														
 
															+    }
														
 
															+
														
 
															+    ~impl() {
														
 
															+        glDeleteVertexArrays(1, &vao);
														
 
															+        glDeleteBuffers(1, &vbo);
														
 
															+        glDeleteBuffers(1, &ebo);
														
 
															+    }
														
 
															+
														
 
															+    static void compile_shader(GLuint shader, const char *source, const char *name) {
														
 
															+        glShaderSource(shader, 1, &source, nullptr);
														
 
															+        glCompileShader(shader);
														
 
															+        GLint status, log_length;
														
 
															+        glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
														
 
															+        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
														
 
															+        auto info_log = (GLchar *) malloc(log_length);
														
 
															+        glGetShaderInfoLog(shader, log_length, nullptr, info_log);
														
 
															+        if (status == GL_TRUE) {
														
 
															+            SPDLOG_INFO("Compile {} shader succeeded: {}", name, info_log);
														
 
															+        } else {
														
 
															+            SPDLOG_ERROR("Compile {} shader failed: {}", name, info_log);
														
 
															+            RET_ERROR;
														
 
															+        }
														
 
															+        free(info_log);
														
 
															+    }
														
 
															+
														
 
															+    static void check_program(GLuint program) {
														
 
															+        GLint status, log_length;
														
 
															+        glGetProgramiv(program, GL_LINK_STATUS, &status);
														
 
															+        glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
														
 
															+        auto info_log = (GLchar *) malloc(log_length);
														
 
															+        glGetProgramInfoLog(program, log_length, nullptr, info_log);
														
 
															+        if (status == GL_TRUE) {
														
 
															+            SPDLOG_INFO("Link program succeeded: {}", info_log);
														
 
															+        } else {
														
 
															+            SPDLOG_ERROR("Link program failed: {}", info_log);
														
 
															+            RET_ERROR;
														
 
															+        }
														
 
															+        free(info_log);
														
 
															+    }
														
 
															+
														
 
															+    void create_program() {
														
 
															+        auto simple_vert_shader = glCreateShader(GL_VERTEX_SHADER);
														
 
															+        auto rgb_frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
														
 
															+        auto nv12_frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
														
 
															+        compile_shader(simple_vert_shader, simple_vert_shader_source, "simple_vertex");
														
 
															+        compile_shader(rgb_frag_shader, rgb_frag_shader_source, "rgb_fragment");
														
 
															+        compile_shader(nv12_frag_shader, nv12_frag_shader_source, "nv12_fragment");
														
 
															+
														
 
															+        rgb_program = glCreateProgram();
														
 
															+        glAttachShader(rgb_program, simple_vert_shader);
														
 
															+        glAttachShader(rgb_program, rgb_frag_shader);
														
 
															+        glLinkProgram(rgb_program);
														
 
															+        check_program(rgb_program);
														
 
															+
														
 
															+        nv12_program = glCreateProgram();
														
 
															+        glAttachShader(nv12_program, simple_vert_shader);
														
 
															+        glAttachShader(nv12_program, nv12_frag_shader);
														
 
															+        glLinkProgram(nv12_program);
														
 
															+        check_program(nv12_program);
														
 
															+
														
 
															+        // get texture location
														
 
															+        luma_tex_loc = glGetUniformLocation(nv12_program, "luma_tex");
														
 
															+        chroma_tex_loc = glGetUniformLocation(nv12_program, "chroma_tex");
														
 
															+
														
 
															+        glDeleteShader(simple_vert_shader);
														
 
															+        glDeleteShader(rgb_frag_shader);
														
 
															+        glDeleteShader(nv12_frag_shader);
														
 
															+
														
 
															+        // create buffers
														
 
															+        glGenBuffers(1, &vbo);
														
 
															+        glGenBuffers(1, &ebo);
														
 
															+
														
 
															+        // config vertex buffer
														
 
															+        glBindBuffer(GL_ARRAY_BUFFER, vbo);
														
 
															+        glBufferStorage(GL_ARRAY_BUFFER, 16 * sizeof(GLfloat), nullptr, GL_DYNAMIC_STORAGE_BIT);
														
 
															+
														
 
															+        // fill element buffer
														
 
															+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
														
 
															+        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(rect_indices), rect_indices, GL_STATIC_DRAW);
														
 
															+
														
 
															+        // config vertex array
														
 
															+        glGenVertexArrays(1, &vao);
														
 
															+        glBindVertexArray(vao);
														
 
															+        glEnableVertexAttribArray(0);
														
 
															+        glEnableVertexAttribArray(1);
														
 
															+        glVertexAttribPointer(0, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) 0);
														
 
															+        glVertexAttribPointer(1, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) (2 * sizeof(GLfloat)));
														
 
															+    }
														
 
															+
														
 
															+    void render(const simple_rect &rect, bool flip_y, bool is_rgb) {
														
 
															+        // bind buffers
														
 
															+        glUseProgram(is_rgb ? rgb_program : nv12_program);
														
 
															+        glBindVertexArray(vao);
														
 
															+        glBindBuffer(GL_ARRAY_BUFFER, vbo);
														
 
															+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
														
 
															+
														
 
															+        // bind textures
														
 
															+        if (is_rgb) {
														
 
															+            glActiveTexture(GL_TEXTURE0 + 0);
														
 
															+            glBindTexture(GL_TEXTURE_2D, main_tex.id);
														
 
															+        } else { // nv12
														
 
															+            glUniform1i(luma_tex_loc, 0);
														
 
															+            glUniform1i(chroma_tex_loc, 1);
														
 
															+            glActiveTexture(GL_TEXTURE0 + 0);
														
 
															+            glBindTexture(GL_TEXTURE_2D, main_tex.id);
														
 
															+            glActiveTexture(GL_TEXTURE0 + 1);
														
 
															+            glBindTexture(GL_TEXTURE_2D, extra_tex.id);
														
 
															+        }
														
 
															+
														
 
															+        // fill vertex buffer
														
 
															+        GLfloat tex_top = flip_y ? 0 : 1;
														
 
															+        GLfloat tex_bottom = flip_y ? 1 : 0;
														
 
															+        GLfloat vertices[] = {
														
 
															+                // 2 for position; 2 for texture
														
 
															+                rect.x + rect.width, rect.y + rect.height, 1, tex_top, // top right
														
 
															+                rect.x + rect.width, rect.y, 1, tex_bottom, // bottom right
														
 
															+                rect.x, rect.y, 0, tex_bottom, // bottom left
														
 
															+                rect.x, rect.y + rect.height, 0, tex_top // top left
														
 
															+        };
														
 
															+        static_assert(sizeof(vertices) == 16 * sizeof(GLfloat));
														
 
															+        glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices);
														
 
															+
														
 
															+        // draw texture
														
 
															+        glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, nullptr);
														
 
															+    }
														
 
															+
														
 
															+    void upload_gpu_mat(const cv::cuda::GpuMat &img, bool is_rgb) {
														
 
															+        // allocate memory if needed
														
 
															+        auto pbo_pitch = img.size().width * img.elemSize() * sizeof(uint8_t);
														
 
															+        auto img_bytes = img.size().height * pbo_pitch;
														
 
															+        pbo.create(GL_PIXEL_PACK_BUFFER, GL_DYNAMIC_STORAGE_BIT, (GLsizeiptr) img_bytes);
														
 
															+
														
 
															+        // copy image to pixel buffer
														
 
															+        auto ptr = pbo.map_pointer(nullptr);
														
 
															+        CUDA_API_CHECK(cudaMemcpy2D(ptr, pbo_pitch, img.cudaPtr(), img.step, pbo_pitch,
														
 
															+                                    img.size().height, cudaMemcpyDeviceToDevice));
														
 
															+        pbo.unmap_pointer();
														
 
															+
														
 
															+        cv::Size real_size;
														
 
															+        if (is_rgb) {
														
 
															+            assert(img.type() == CV_8UC3);
														
 
															+            real_size = img.size();
														
 
															+            main_tex.create(GL_RGBA8, real_size);
														
 
															+        } else { // nv12
														
 
															+            assert(img.type() == CV_8UC1);
														
 
															+            assert((img.rows % 3) == 0);
														
 
															+            assert((img.cols % 2) == 0);
														
 
															+            real_size = {img.cols, img.rows / 3 * 2};
														
 
															+            main_tex.create(GL_R8, real_size);
														
 
															+            extra_tex.create(GL_RG8, {real_size.width >> 1,
														
 
															+                                      real_size.height >> 1});
														
 
															+        }
														
 
															+
														
 
															+        // unpack pbo to texture
														
 
															+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.id);
														
 
															+        if (is_rgb) {
														
 
															+            glBindTexture(GL_TEXTURE_2D, main_tex.id);
														
 
															+            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, real_size.width, real_size.height,
														
 
															+                            GL_RGB, GL_UNSIGNED_BYTE, nullptr);
														
 
															+            glBindTexture(GL_TEXTURE_2D, 0);
														
 
															+        } else { // nv12
														
 
															+            glBindTexture(GL_TEXTURE_2D, main_tex.id);
														
 
															+            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, main_tex.size().width, main_tex.size().height,
														
 
															+                            GL_RED, GL_UNSIGNED_BYTE, nullptr);
														
 
															+            glBindTexture(GL_TEXTURE_2D, 0);
														
 
															+
														
 
															+            auto chroma_offset = real_size.height * pbo_pitch;
														
 
															+            glBindTexture(GL_TEXTURE_2D, extra_tex.id);
														
 
															+            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, extra_tex.size().width, extra_tex.size().height,
														
 
															+                            GL_RG, GL_UNSIGNED_BYTE, (void *) chroma_offset);
														
 
															+            glBindTexture(GL_TEXTURE_2D, 0);
														
 
															+        }
														
 
															+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
														
 
															+
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+simple_render::simple_render()
														
 
															+        : pimpl(std::make_unique<impl>()) {}
														
 
															+
														
 
															+simple_render::~simple_render() = default;
														
 
															+
														
 
															+void simple_render::render_rgb(const cv::cuda::GpuMat &img, const simple_rect &rect, bool flip_y) {
														
 
															+    pimpl->upload_gpu_mat(img, true);
														
 
															+    pimpl->render(rect, flip_y, true);
														
 
															+}
														
 
															+
														
 
															+void simple_render::render_nv12(const cv::cuda::GpuMat &img, const simple_rect &rect, bool flip_y) {
														
 
															+    pimpl->upload_gpu_mat(img, false);
														
 
															+    pimpl->render(rect, flip_y, false);
														
 
															+}
														
 
															+
														
 
															+struct smart_texture::impl {
														
 
															+
														
 
															+    smart_texture *q_this = nullptr;
														
 
															+    GLenum last_format;
														
 
															+    cv::Size last_size = {};
														
 
															+    smart_pixel_buffer pbo;
														
 
															+
														
 
															+    void create(GLenum format, cv::Size size, GLint min_filter, GLint max_filter) {
														
 
															+        if (format == last_format && size == last_size) [[likely]] return;
														
 
															+        deallocate();
														
 
															+        allocate(format, size, min_filter, max_filter);
														
 
															+    }
														
 
															+
														
 
															+    void allocate(GLenum format, cv::Size size,
														
 
															+                  GLint min_filter, GLint max_filter) {
														
 
															+        glGenTextures(1, &q_this->id);
														
 
															+        glBindTexture(GL_TEXTURE_2D, q_this->id);
														
 
															+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, min_filter);
														
 
															+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, max_filter);
														
 
															+        glTexStorage2D(GL_TEXTURE_2D, 1, format, size.width, size.height);
														
 
															+        glBindTexture(GL_TEXTURE_2D, 0);
														
 
															+        last_format = format;
														
 
															+        last_size = size;
														
 
															+    }
														
 
															+
														
 
															+    void deallocate() {
														
 
															+        if (q_this->id == 0) return;
														
 
															+        glDeleteTextures(1, &q_this->id);
														
 
															+        q_this->id = 0;
														
 
															+        last_size = {};
														
 
															+    }
														
 
															+
														
 
															+};
														
 
															+
														
 
															+smart_texture::smart_texture()
														
 
															+        : pimpl(std::make_unique<impl>()) {
														
 
															+    pimpl->q_this = this;
														
 
															+}
														
 
															+
														
 
															+smart_texture::~smart_texture() = default;
														
 
															+
														
 
															+void smart_texture::create(GLenum format, cv::Size size, GLint min_filter, GLint max_filter) {
														
 
															+    pimpl->create(format, size, min_filter, max_filter);
														
 
															+}
														
 
															+
														
 
															+cv::Size smart_texture::size() const {
														
 
															+    return pimpl->last_size;
														
 
															+}
														
--- a/src/simple_opengl.h
+++ b/src/simple_opengl.h
@@ -0,0 +1,54 @@
 
															+#ifndef REMOTEAR3_SIMPLE_OPENGL_H
														
 
															+#define REMOTEAR3_SIMPLE_OPENGL_H
														
 
															+
														
 
															+#include "cuda_helper.hpp"
														
 
															+
														
 
															+#include <opencv2/core/cuda.hpp>
														
 
															+#include <opencv2/core/mat.hpp>
														
 
															+
														
 
															+#include <glad/gl.h>
														
 
															+
														
 
															+#include <memory>
														
 
															+
														
 
															+struct simple_rect {
														
 
															+    GLfloat x, y;
														
 
															+    GLfloat width, height;
														
 
															+};
														
 
															+
														
 
															+struct smart_texture {
														
 
															+    GLuint id = 0;
														
 
															+
														
 
															+    smart_texture();
														
 
															+
														
 
															+    ~smart_texture();
														
 
															+
														
 
															+    void create(GLenum format, cv::Size size,
														
 
															+                GLint min_filter = GL_NEAREST, GLint max_filter = GL_NEAREST);
														
 
															+
														
 
															+    cv::Size size() const;
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+class simple_render {
														
 
															+public:
														
 
															+    simple_render();
														
 
															+
														
 
															+    ~simple_render();
														
 
															+
														
 
															+    void render_nv12(const cv::cuda::GpuMat &img,
														
 
															+                     const simple_rect &rect,
														
 
															+                     bool flip_y = false);
														
 
															+
														
 
															+    void render_rgb(const cv::cuda::GpuMat &img,
														
 
															+                    const simple_rect &rect,
														
 
															+                    bool flip_y = true);
														
 
															+
														
 
															+private:
														
 
															+    struct impl;
														
 
															+    std::unique_ptr<impl> pimpl;
														
 
															+};
														
 
															+
														
 
															+#endif //REMOTEAR3_SIMPLE_OPENGL_H
														
--- a/src/third_party/rs.c
+++ b/src/third_party/rs.c
@@ -0,0 +1,998 @@
 
															+/*#define PROFILE*/
														
 
															+/*
														
 
															+ * fec.c -- forward error correction based on Vandermonde matrices
														
 
															+ * 980624
														
 
															+ * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
														
 
															+ * (C) 2001 Alain Knaff (alain@knaff.lu)
														
 
															+ *
														
 
															+ * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
														
 
															+ * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
														
 
															+ * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
														
 
															+ *
														
 
															+ * Redistribution and use in source and binary forms, with or without
														
 
															+ * modification, are permitted provided that the following conditions
														
 
															+ * are met:
														
 
															+ *
														
 
															+ * 1. Redistributions of source code must retain the above copyright
														
 
															+ *    notice, this list of conditions and the following disclaimer.
														
 
															+ * 2. Redistributions in binary form must reproduce the above
														
 
															+ *    copyright notice, this list of conditions and the following
														
 
															+ *    disclaimer in the documentation and/or other materials
														
 
															+ *    provided with the distribution.
														
 
															+ *
														
 
															+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
														
 
															+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
														
 
															+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
														
 
															+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
														
 
															+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
														
 
															+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
														
 
															+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
														
 
															+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
														
 
															+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
														
 
															+ * OF SUCH DAMAGE.
														
 
															+ *
														
 
															+ * Reimplement by Jannson (20161018): compatible for golang version of https://github.com/klauspost/reedsolomon
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * The following parameter defines how many bits are used for
														
 
															+ * field elements. The code supports any value from 2 to 16
														
 
															+ * but fastest operation is achieved with 8 bit elements
														
 
															+ * This is the only parameter you may want to change.
														
 
															+ */
														
 
															+#define GF_BITS  8  /* code over GF(2**GF_BITS) - change to suit */
														
 
															+
														
 
															+#include <stdio.h>
														
 
															+#include <stdlib.h>
														
 
															+#include <string.h>
														
 
															+
														
 
															+#include <assert.h>
														
 
															+#include "rs.h"
														
 
															+
														
 
															+/*
														
 
															+ * stuff used for testing purposes only
														
 
															+ */
														
 
															+
														
 
															+#ifdef  TEST
														
 
															+#define DEB(x)
														
 
															+#define DDB(x) x
														
 
															+#define DEBUG   0   /* minimal debugging */
														
 
															+
														
 
															+#include <sys/time.h>
														
 
															+#define DIFF_T(a,b) \
														
 
															+    (1+ 1000000*(a.tv_sec - b.tv_sec) + (a.tv_usec - b.tv_usec) )
														
 
															+
														
 
															+#define TICK(t) \
														
 
															+    {struct timeval x ; \
														
 
															+    gettimeofday(&x, NULL) ; \
														
 
															+    t = x.tv_usec + 1000000* (x.tv_sec & 0xff ) ; \
														
 
															+    }
														
 
															+#define TOCK(t) \
														
 
															+    { u_long t1 ; TICK(t1) ; \
														
 
															+      if (t1 < t) t = 256000000 + t1 - t ; \
														
 
															+      else t = t1 - t ; \
														
 
															+      if (t == 0) t = 1 ;}
														
 
															+
														
 
															+u_long ticks[10];   /* vars for timekeeping */
														
 
															+#else
														
 
															+#define DEB(x)
														
 
															+#define DDB(x)
														
 
															+#define TICK(x)
														
 
															+#define TOCK(x)
														
 
															+#endif /* TEST */
														
 
															+
														
 
															+/*
														
 
															+ * You should not need to change anything beyond this point.
														
 
															+ * The first part of the file implements linear algebra in GF.
														
 
															+ *
														
 
															+ * gf is the type used to store an element of the Galois Field.
														
 
															+ * Must constain at least GF_BITS bits.
														
 
															+ *
														
 
															+ * Note: unsigned char will work up to GF(256) but int seems to run
														
 
															+ * faster on the Pentium. We use int whenever have to deal with an
														
 
															+ * index, since they are generally faster.
														
 
															+ */
														
 
															+/*
														
 
															+ * AK: Udpcast only uses GF_BITS=8. Remove other possibilities
														
 
															+ */
														
 
															+#if (GF_BITS != 8)
														
 
															+#error "GF_BITS must be 8"
														
 
															+#endif
														
 
															+typedef unsigned char gf;
														
 
															+
														
 
															+#define GF_SIZE ((1 << GF_BITS) - 1)    /* powers of \alpha */
														
 
															+
														
 
															+/*
														
 
															+ * Primitive polynomials - see Lin & Costello, Appendix A,
														
 
															+ * and  Lee & Messerschmitt, p. 453.
														
 
															+ */
														
 
															+static char *allPp[] = {    /* GF_BITS  polynomial      */
														
 
															+        NULL,           /*  0   no code         */
														
 
															+        NULL,           /*  1   no code         */
														
 
															+        "111",          /*  2   1+x+x^2         */
														
 
															+        "1101",         /*  3   1+x+x^3         */
														
 
															+        "11001",            /*  4   1+x+x^4         */
														
 
															+        "101001",           /*  5   1+x^2+x^5       */
														
 
															+        "1100001",          /*  6   1+x+x^6         */
														
 
															+        "10010001",         /*  7   1 + x^3 + x^7       */
														
 
															+        "101110001",        /*  8   1+x^2+x^3+x^4+x^8   */
														
 
															+        "1000100001",       /*  9   1+x^4+x^9       */
														
 
															+        "10010000001",      /* 10   1+x^3+x^10      */
														
 
															+        "101000000001",     /* 11   1+x^2+x^11      */
														
 
															+        "1100101000001",        /* 12   1+x+x^4+x^6+x^12    */
														
 
															+        "11011000000001",       /* 13   1+x+x^3+x^4+x^13    */
														
 
															+        "110000100010001",      /* 14   1+x+x^6+x^10+x^14   */
														
 
															+        "1100000000000001",     /* 15   1+x+x^15        */
														
 
															+        "11010000000010001"     /* 16   1+x+x^3+x^12+x^16   */
														
 
															+};
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * To speed up computations, we have tables for logarithm, exponent
														
 
															+ * and inverse of a number. If GF_BITS <= 8, we use a table for
														
 
															+ * multiplication as well (it takes 64K, no big deal even on a PDA,
														
 
															+ * especially because it can be pre-initialized an put into a ROM!),
														
 
															+ * otherwhise we use a table of logarithms.
														
 
															+ * In any case the macro gf_mul(x,y) takes care of multiplications.
														
 
															+ */
														
 
															+
														
 
															+static gf gf_exp[2*GF_SIZE];    /* index->poly form conversion table    */
														
 
															+static int gf_log[GF_SIZE + 1]; /* Poly->index form conversion table    */
														
 
															+static gf inverse[GF_SIZE+1];   /* inverse of field elem.       */
														
 
															+/* inv[\alpha**i]=\alpha**(GF_SIZE-i-1) */
														
 
															+
														
 
															+/*
														
 
															+ * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
														
 
															+ * without a slow divide.
														
 
															+ */
														
 
															+static inline gf
														
 
															+modnn(int x)
														
 
															+{
														
 
															+    while (x >= GF_SIZE) {
														
 
															+        x -= GF_SIZE;
														
 
															+        x = (x >> GF_BITS) + (x & GF_SIZE);
														
 
															+    }
														
 
															+    return x;
														
 
															+}
														
 
															+
														
 
															+#define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
														
 
															+
														
 
															+/*
														
 
															+ * gf_mul(x,y) multiplies two numbers. If GF_BITS<=8, it is much
														
 
															+ * faster to use a multiplication table.
														
 
															+ *
														
 
															+ * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
														
 
															+ * many numbers by the same constant. In this case the first
														
 
															+ * call sets the constant, and others perform the multiplications.
														
 
															+ * A value related to the multiplication is held in a local variable
														
 
															+ * declared with USE_GF_MULC . See usage in addmul1().
														
 
															+ */
														
 
															+#ifdef _MSC_VER
														
 
															+__declspec(align(16))
														
 
															+#else
														
 
															+_Alignas(16)
														
 
															+#endif
														
 
															+static gf gf_mul_table[(GF_SIZE + 1) * (GF_SIZE + 1)];
														
 
															+
														
 
															+#define gf_mul(x,y) gf_mul_table[(x<<8)+y]
														
 
															+
														
 
															+#define USE_GF_MULC register gf * __gf_mulc_
														
 
															+#define GF_MULC0(c) __gf_mulc_ = &gf_mul_table[(c)<<8]
														
 
															+#define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
														
 
															+#define GF_MULC(dst, x) dst = __gf_mulc_[x]
														
 
															+
														
 
															+static void
														
 
															+init_mul_table(void)
														
 
															+{
														
 
															+    int i, j;
														
 
															+    for (i=0; i< GF_SIZE+1; i++)
														
 
															+        for (j=0; j< GF_SIZE+1; j++)
														
 
															+            gf_mul_table[(i<<8)+j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ;
														
 
															+
														
 
															+    for (j=0; j< GF_SIZE+1; j++)
														
 
															+        gf_mul_table[j] = gf_mul_table[j<<8] = 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
														
 
															+ * Lookup tables:
														
 
															+ *     index->polynomial form       gf_exp[] contains j= \alpha^i;
														
 
															+ *     polynomial form -> index form    gf_log[ j = \alpha^i ] = i
														
 
															+ * \alpha=x is the primitive element of GF(2^m)
														
 
															+ *
														
 
															+ * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
														
 
															+ * multiplication of two numbers can be resolved without calling modnn
														
 
															+ */
														
 
															+
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * initialize the data structures used for computations in GF.
														
 
															+ */
														
 
															+static void
														
 
															+generate_gf(void)
														
 
															+{
														
 
															+    int i;
														
 
															+    gf mask;
														
 
															+    char *Pp =  allPp[GF_BITS] ;
														
 
															+
														
 
															+    mask = 1;   /* x ** 0 = 1 */
														
 
															+    gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
														
 
															+    /*
														
 
															+     * first, generate the (polynomial representation of) powers of \alpha,
														
 
															+     * which are stored in gf_exp[i] = \alpha ** i .
														
 
															+     * At the same time build gf_log[gf_exp[i]] = i .
														
 
															+     * The first GF_BITS powers are simply bits shifted to the left.
														
 
															+     */
														
 
															+    for (i = 0; i < GF_BITS; i++, mask <<= 1 ) {
														
 
															+        gf_exp[i] = mask;
														
 
															+        gf_log[gf_exp[i]] = i;
														
 
															+        /*
														
 
															+         * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
														
 
															+         * gf_exp[GF_BITS] = \alpha ** GF_BITS
														
 
															+         */
														
 
															+        if ( Pp[i] == '1' )
														
 
															+            gf_exp[GF_BITS] ^= mask;
														
 
															+    }
														
 
															+    /*
														
 
															+     * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als
														
 
															+     * compute its inverse.
														
 
															+     */
														
 
															+    gf_log[gf_exp[GF_BITS]] = GF_BITS;
														
 
															+    /*
														
 
															+     * Poly-repr of \alpha ** (i+1) is given by poly-repr of
														
 
															+     * \alpha ** i shifted left one-bit and accounting for any
														
 
															+     * \alpha ** GF_BITS term that may occur when poly-repr of
														
 
															+     * \alpha ** i is shifted.
														
 
															+     */
														
 
															+    mask = 1 << (GF_BITS - 1 ) ;
														
 
															+    for (i = GF_BITS + 1; i < GF_SIZE; i++) {
														
 
															+        if (gf_exp[i - 1] >= mask)
														
 
															+            gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
														
 
															+        else
														
 
															+            gf_exp[i] = gf_exp[i - 1] << 1;
														
 
															+        gf_log[gf_exp[i]] = i;
														
 
															+    }
														
 
															+    /*
														
 
															+     * log(0) is not defined, so use a special value
														
 
															+     */
														
 
															+    gf_log[0] = GF_SIZE ;
														
 
															+    /* set the extended gf_exp values for fast multiply */
														
 
															+    for (i = 0 ; i < GF_SIZE ; i++)
														
 
															+        gf_exp[i + GF_SIZE] = gf_exp[i] ;
														
 
															+
														
 
															+    /*
														
 
															+     * again special cases. 0 has no inverse. This used to
														
 
															+     * be initialized to GF_SIZE, but it should make no difference
														
 
															+     * since noone is supposed to read from here.
														
 
															+     */
														
 
															+    inverse[0] = 0 ;
														
 
															+    inverse[1] = 1;
														
 
															+    for (i=2; i<=GF_SIZE; i++)
														
 
															+        inverse[i] = gf_exp[GF_SIZE-gf_log[i]];
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Various linear algebra operations that i use often.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * addmul() computes dst[] = dst[] + c * src[]
														
 
															+ * This is used often, so better optimize it! Currently the loop is
														
 
															+ * unrolled 16 times, a good value for 486 and pentium-class machines.
														
 
															+ * The case c=0 is also optimized, whereas c=1 is not. These
														
 
															+ * calls are unfrequent in my typical apps so I did not bother.
														
 
															+ *
														
 
															+ * Note that gcc on
														
 
															+ */
														
 
															+#if 0
														
 
															+#define addmul(dst, src, c, sz) \
														
 
															+    if (c != 0) addmul1(dst, src, c, sz)
														
 
															+#endif
														
 
															+
														
 
															+
														
 
															+
														
 
															+#define UNROLL 16 /* 1, 4, 8, 16 */
														
 
															+static void
														
 
															+slow_addmul1(gf *dst1, gf *src1, gf c, int sz)
														
 
															+{
														
 
															+    USE_GF_MULC ;
														
 
															+    register gf *dst = dst1, *src = src1 ;
														
 
															+    gf *lim = &dst[sz - UNROLL + 1] ;
														
 
															+
														
 
															+    GF_MULC0(c) ;
														
 
															+
														
 
															+#if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
														
 
															+    for (; dst < lim ; dst += UNROLL, src += UNROLL ) {
														
 
															+        GF_ADDMULC( dst[0] , src[0] );
														
 
															+        GF_ADDMULC( dst[1] , src[1] );
														
 
															+        GF_ADDMULC( dst[2] , src[2] );
														
 
															+        GF_ADDMULC( dst[3] , src[3] );
														
 
															+#if (UNROLL > 4)
														
 
															+        GF_ADDMULC( dst[4] , src[4] );
														
 
															+        GF_ADDMULC( dst[5] , src[5] );
														
 
															+        GF_ADDMULC( dst[6] , src[6] );
														
 
															+        GF_ADDMULC( dst[7] , src[7] );
														
 
															+#endif
														
 
															+#if (UNROLL > 8)
														
 
															+        GF_ADDMULC( dst[8] , src[8] );
														
 
															+        GF_ADDMULC( dst[9] , src[9] );
														
 
															+        GF_ADDMULC( dst[10] , src[10] );
														
 
															+        GF_ADDMULC( dst[11] , src[11] );
														
 
															+        GF_ADDMULC( dst[12] , src[12] );
														
 
															+        GF_ADDMULC( dst[13] , src[13] );
														
 
															+        GF_ADDMULC( dst[14] , src[14] );
														
 
															+        GF_ADDMULC( dst[15] , src[15] );
														
 
															+#endif
														
 
															+    }
														
 
															+#endif
														
 
															+    lim += UNROLL - 1 ;
														
 
															+    for (; dst < lim; dst++, src++ )        /* final components */
														
 
															+        GF_ADDMULC( *dst , *src );
														
 
															+}
														
 
															+
														
 
															+# define addmul1 slow_addmul1
														
 
															+
														
 
															+static void addmul(gf *dst, gf *src, gf c, int sz) {
														
 
															+    // fprintf(stderr, "Dst=%p Src=%p, gf=%02x sz=%d\n", dst, src, c, sz);
														
 
															+    if (c != 0) addmul1(dst, src, c, sz);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * mul() computes dst[] = c * src[]
														
 
															+ * This is used often, so better optimize it! Currently the loop is
														
 
															+ * unrolled 16 times, a good value for 486 and pentium-class machines.
														
 
															+ * The case c=0 is also optimized, whereas c=1 is not. These
														
 
															+ * calls are unfrequent in my typical apps so I did not bother.
														
 
															+ *
														
 
															+ * Note that gcc on
														
 
															+ */
														
 
															+#if 0
														
 
															+#define mul(dst, src, c, sz) \
														
 
															+    do { if (c != 0) mul1(dst, src, c, sz); else memset(dst, 0, c); } while(0)
														
 
															+#endif
														
 
															+
														
 
															+#define UNROLL 16 /* 1, 4, 8, 16 */
														
 
															+static void
														
 
															+slow_mul1(gf *dst1, gf *src1, gf c, int sz)
														
 
															+{
														
 
															+    USE_GF_MULC ;
														
 
															+    register gf *dst = dst1, *src = src1 ;
														
 
															+    gf *lim = &dst[sz - UNROLL + 1] ;
														
 
															+
														
 
															+    GF_MULC0(c) ;
														
 
															+
														
 
															+#if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
														
 
															+    for (; dst < lim ; dst += UNROLL, src += UNROLL ) {
														
 
															+        GF_MULC( dst[0] , src[0] );
														
 
															+        GF_MULC( dst[1] , src[1] );
														
 
															+        GF_MULC( dst[2] , src[2] );
														
 
															+        GF_MULC( dst[3] , src[3] );
														
 
															+#if (UNROLL > 4)
														
 
															+        GF_MULC( dst[4] , src[4] );
														
 
															+        GF_MULC( dst[5] , src[5] );
														
 
															+        GF_MULC( dst[6] , src[6] );
														
 
															+        GF_MULC( dst[7] , src[7] );
														
 
															+#endif
														
 
															+#if (UNROLL > 8)
														
 
															+        GF_MULC( dst[8] , src[8] );
														
 
															+        GF_MULC( dst[9] , src[9] );
														
 
															+        GF_MULC( dst[10] , src[10] );
														
 
															+        GF_MULC( dst[11] , src[11] );
														
 
															+        GF_MULC( dst[12] , src[12] );
														
 
															+        GF_MULC( dst[13] , src[13] );
														
 
															+        GF_MULC( dst[14] , src[14] );
														
 
															+        GF_MULC( dst[15] , src[15] );
														
 
															+#endif
														
 
															+    }
														
 
															+#endif
														
 
															+    lim += UNROLL - 1 ;
														
 
															+    for (; dst < lim; dst++, src++ )        /* final components */
														
 
															+        GF_MULC( *dst , *src );
														
 
															+}
														
 
															+
														
 
															+# define mul1 slow_mul1
														
 
															+
														
 
															+static inline void mul(gf *dst, gf *src, gf c, int sz) {
														
 
															+    /*fprintf(stderr, "%p = %02x * %p\n", dst, c, src);*/
														
 
															+    if (c != 0) mul1(dst, src, c, sz); else memset(dst, 0, c);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * invert_mat() takes a matrix and produces its inverse
														
 
															+ * k is the size of the matrix.
														
 
															+ * (Gauss-Jordan, adapted from Numerical Recipes in C)
														
 
															+ * Return non-zero if singular.
														
 
															+ */
														
 
															+DEB( int pivloops=0; int pivswaps=0 ; /* diagnostic */)
														
 
															+static int
														
 
															+invert_mat(gf *src, int k)
														
 
															+{
														
 
															+    gf c, *p ;
														
 
															+    int irow, icol, row, col, i, ix ;
														
 
															+
														
 
															+    int error = 1 ;
														
 
															+    int *indxc = malloc(k*sizeof(int));
														
 
															+    int *indxr = malloc(k*sizeof(int));
														
 
															+    int *ipiv = malloc(k*sizeof(int));
														
 
															+    gf *id_row = malloc(k*sizeof(gf));
														
 
															+//    int indxc[k];
														
 
															+//    int indxr[k];
														
 
															+//    int ipiv[k];
														
 
															+//    gf id_row[k];
														
 
															+
														
 
															+    memset(id_row, 0, k*sizeof(gf));
														
 
															+    DEB( pivloops=0; pivswaps=0 ; /* diagnostic */ )
														
 
															+    /*
														
 
															+     * ipiv marks elements already used as pivots.
														
 
															+     */
														
 
															+    for (i = 0; i < k ; i++)
														
 
															+        ipiv[i] = 0 ;
														
 
															+
														
 
															+    for (col = 0; col < k ; col++) {
														
 
															+        gf *pivot_row ;
														
 
															+        /*
														
 
															+         * Zeroing column 'col', look for a non-zero element.
														
 
															+         * First try on the diagonal, if it fails, look elsewhere.
														
 
															+         */
														
 
															+        irow = icol = -1 ;
														
 
															+        if (ipiv[col] != 1 && src[col*k + col] != 0) {
														
 
															+            irow = col ;
														
 
															+            icol = col ;
														
 
															+            goto found_piv ;
														
 
															+        }
														
 
															+        for (row = 0 ; row < k ; row++) {
														
 
															+            if (ipiv[row] != 1) {
														
 
															+                for (ix = 0 ; ix < k ; ix++) {
														
 
															+                    DEB( pivloops++ ; )
														
 
															+                    if (ipiv[ix] == 0) {
														
 
															+                        if (src[row*k + ix] != 0) {
														
 
															+                            irow = row ;
														
 
															+                            icol = ix ;
														
 
															+                            goto found_piv ;
														
 
															+                        }
														
 
															+                    } else if (ipiv[ix] > 1) {
														
 
															+                        fprintf(stderr, "singular matrix\n");
														
 
															+                        goto fail ;
														
 
															+                    }
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+        if (icol == -1) {
														
 
															+            fprintf(stderr, "XXX pivot not found!\n");
														
 
															+            goto fail ;
														
 
															+        }
														
 
															+        found_piv:
														
 
															+        ++(ipiv[icol]) ;
														
 
															+        /*
														
 
															+         * swap rows irow and icol, so afterwards the diagonal
														
 
															+         * element will be correct. Rarely done, not worth
														
 
															+         * optimizing.
														
 
															+         */
														
 
															+        if (irow != icol) {
														
 
															+            for (ix = 0 ; ix < k ; ix++ ) {
														
 
															+                SWAP( src[irow*k + ix], src[icol*k + ix], gf) ;
														
 
															+            }
														
 
															+        }
														
 
															+        indxr[col] = irow ;
														
 
															+        indxc[col] = icol ;
														
 
															+        pivot_row = &src[icol*k] ;
														
 
															+        c = pivot_row[icol] ;
														
 
															+        if (c == 0) {
														
 
															+            fprintf(stderr, "singular matrix 2\n");
														
 
															+            goto fail ;
														
 
															+        }
														
 
															+        if (c != 1 ) { /* otherwhise this is a NOP */
														
 
															+            /*
														
 
															+             * this is done often , but optimizing is not so
														
 
															+             * fruitful, at least in the obvious ways (unrolling)
														
 
															+             */
														
 
															+            DEB( pivswaps++ ; )
														
 
															+            c = inverse[ c ] ;
														
 
															+            pivot_row[icol] = 1 ;
														
 
															+            for (ix = 0 ; ix < k ; ix++ )
														
 
															+                pivot_row[ix] = gf_mul(c, pivot_row[ix] );
														
 
															+        }
														
 
															+        /*
														
 
															+         * from all rows, remove multiples of the selected row
														
 
															+         * to zero the relevant entry (in fact, the entry is not zero
														
 
															+         * because we know it must be zero).
														
 
															+         * (Here, if we know that the pivot_row is the identity,
														
 
															+         * we can optimize the addmul).
														
 
															+         */
														
 
															+        id_row[icol] = 1;
														
 
															+        if (memcmp(pivot_row, id_row, k*sizeof(gf)) != 0) {
														
 
															+            for (p = src, ix = 0 ; ix < k ; ix++, p += k ) {
														
 
															+                if (ix != icol) {
														
 
															+                    c = p[icol] ;
														
 
															+                    p[icol] = 0 ;
														
 
															+                    addmul(p, pivot_row, c, k );
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+        id_row[icol] = 0;
														
 
															+    } /* done all columns */
														
 
															+    for (col = k-1 ; col >= 0 ; col-- ) {
														
 
															+        if (indxr[col] <0 || indxr[col] >= k)
														
 
															+            fprintf(stderr, "AARGH, indxr[col] %d\n", indxr[col]);
														
 
															+        else if (indxc[col] <0 || indxc[col] >= k)
														
 
															+            fprintf(stderr, "AARGH, indxc[col] %d\n", indxc[col]);
														
 
															+        else
														
 
															+        if (indxr[col] != indxc[col] ) {
														
 
															+            for (row = 0 ; row < k ; row++ ) {
														
 
															+                SWAP( src[row*k + indxr[col]], src[row*k + indxc[col]], gf) ;
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+    error = 0 ;
														
 
															+    fail:
														
 
															+    free(indxc);
														
 
															+    free(indxr);
														
 
															+    free(ipiv);
														
 
															+    free(id_row);
														
 
															+    return error ;
														
 
															+}
														
 
															+
														
 
															+static int fec_initialized = 0 ;
														
 
															+
														
 
															+void fec_init(void)
														
 
															+{
														
 
															+    TICK(ticks[0]);
														
 
															+    generate_gf();
														
 
															+    TOCK(ticks[0]);
														
 
															+    DDB(fprintf(stderr, "generate_gf took %ldus\n", ticks[0]);)
														
 
															+    TICK(ticks[0]);
														
 
															+    init_mul_table();
														
 
															+    TOCK(ticks[0]);
														
 
															+    DDB(fprintf(stderr, "init_mul_table took %ldus\n", ticks[0]);)
														
 
															+    fec_initialized = 1 ;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+#ifdef PROFILE
														
 
															+#ifdef __x86_64__
														
 
															+static long long rdtsc(void)
														
 
															+{
														
 
															+    unsigned long low, hi;
														
 
															+    asm volatile ("rdtsc" : "=d" (hi), "=a" (low));
														
 
															+    return ( (((long long)hi) << 32) | ((long long) low));
														
 
															+}
														
 
															+#elif __arm__
														
 
															+static long long rdtsc(void)
														
 
															+{
														
 
															+    u64 val;
														
 
															+    asm volatile("mrs %0, cntvct_el0" : "=r" (val));
														
 
															+    return val;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+void print_matrix1(gf* matrix, int nrows, int ncols) {
														
 
															+    int i, j;
														
 
															+    printf("matrix (%d,%d):\n", nrows, ncols);
														
 
															+    for(i = 0; i < nrows; i++) {
														
 
															+        for(j = 0; j < ncols; j++) {
														
 
															+            printf("%6d ", matrix[i*ncols + j]);
														
 
															+        }
														
 
															+        printf("\n");
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void print_matrix2(gf** matrix, int nrows, int ncols) {
														
 
															+    int i, j;
														
 
															+    printf("matrix (%d,%d):\n", nrows, ncols);
														
 
															+    for(i = 0; i < nrows; i++) {
														
 
															+        for(j = 0; j < ncols; j++) {
														
 
															+            printf("%6d ", matrix[i][j]);
														
 
															+        }
														
 
															+        printf("\n");
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															+/* y = a**n */
														
 
															+static gf galExp(gf a, gf n) {
														
 
															+    int logA;
														
 
															+    int logResult;
														
 
															+    if(0 == n) {
														
 
															+        return 1;
														
 
															+    }
														
 
															+    if(0 == a) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+    logA = gf_log[a];
														
 
															+    logResult = logA * n;
														
 
															+    while(logResult >= 255) {
														
 
															+        logResult -= 255;
														
 
															+    }
														
 
															+
														
 
															+    return gf_exp[logResult];
														
 
															+}
														
 
															+
														
 
															+static inline gf galMultiply(gf a, gf b) {
														
 
															+    return gf_mul_table[ ((int)a << 8) + (int)b ];
														
 
															+}
														
 
															+
														
 
															+static gf* vandermonde(int nrows, int ncols) {
														
 
															+    int row, col, ptr;
														
 
															+    gf* matrix = (gf*)RS_MALLOC(nrows * ncols);
														
 
															+    if(NULL != matrix) {
														
 
															+        ptr = 0;
														
 
															+        for(row = 0; row < nrows; row++) {
														
 
															+            for(col = 0; col < ncols; col++) {
														
 
															+                matrix[ptr++] = galExp((gf)row, (gf)col);
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return matrix;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Not check for input params
														
 
															+ * */
														
 
															+static gf* sub_matrix(gf* matrix, int rmin, int cmin, int rmax, int cmax,  int nrows, int ncols) {
														
 
															+    int i, j, ptr = 0;
														
 
															+    gf* new_m = (gf*)RS_MALLOC( (rmax-rmin) * (cmax-cmin) );
														
 
															+    if(NULL != new_m) {
														
 
															+        for(i = rmin; i < rmax; i++) {
														
 
															+            for(j = cmin; j < cmax; j++) {
														
 
															+                new_m[ptr++] = matrix[i*ncols + j];
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return new_m;
														
 
															+}
														
 
															+
														
 
															+/* y = a.dot(b) */
														
 
															+static gf* multiply1(gf *a, int ar, int ac, gf *b, int br, int bc) {
														
 
															+    gf *new_m, tg;
														
 
															+    int r, c, i, ptr = 0;
														
 
															+
														
 
															+    assert(ac == br);
														
 
															+    new_m = (gf*)RS_CALLOC(1, ar*bc);
														
 
															+    if(NULL != new_m) {
														
 
															+
														
 
															+        /* this multiply is slow */
														
 
															+        for(r = 0; r < ar; r++) {
														
 
															+            for(c = 0; c < bc; c++) {
														
 
															+                tg = 0;
														
 
															+                for(i = 0; i < ac; i++) {
														
 
															+                    /* tg ^= gf_mul_table[ ((int)a[r*ac+i] << 8) + (int)b[i*bc+c] ]; */
														
 
															+                    tg ^= galMultiply(a[r*ac+i], b[i*bc+c]);
														
 
															+                }
														
 
															+
														
 
															+                new_m[ptr++] = tg;
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+    }
														
 
															+
														
 
															+    return new_m;
														
 
															+}
														
 
															+
														
 
															+/* copy from golang rs version */
														
 
															+static inline int code_some_shards(gf* matrixRows, gf** inputs, gf** outputs,
														
 
															+                                   int dataShards, int outputCount, int byteCount) {
														
 
															+    gf* in;
														
 
															+    int iRow, c;
														
 
															+    for(c = 0; c < dataShards; c++) {
														
 
															+        in = inputs[c];
														
 
															+        for(iRow = 0; iRow < outputCount; iRow++) {
														
 
															+            if(0 == c) {
														
 
															+                mul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount);
														
 
															+            } else {
														
 
															+                addmul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount);
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+reed_solomon* reed_solomon_new(int data_shards, int parity_shards) {
														
 
															+    gf* vm = NULL;
														
 
															+    gf* top = NULL;
														
 
															+    int err = 0;
														
 
															+    reed_solomon* rs = NULL;
														
 
															+
														
 
															+    /* MUST use fec_init once time first */
														
 
															+    assert(fec_initialized);
														
 
															+
														
 
															+    do {
														
 
															+        rs = (reed_solomon*) RS_MALLOC(sizeof(reed_solomon));
														
 
															+        if(NULL == rs) {
														
 
															+            return NULL;
														
 
															+        }
														
 
															+        rs->data_shards = data_shards;
														
 
															+        rs->parity_shards = parity_shards;
														
 
															+        rs->shards = (data_shards + parity_shards);
														
 
															+        rs->m = NULL;
														
 
															+        rs->parity = NULL;
														
 
															+
														
 
															+        if(rs->shards > DATA_SHARDS_MAX || data_shards <= 0 || parity_shards <= 0) {
														
 
															+            err = 1;
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        vm = vandermonde(rs->shards, rs->data_shards);
														
 
															+        if(NULL == vm) {
														
 
															+            err = 2;
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        top = sub_matrix(vm, 0, 0, data_shards, data_shards, rs->shards, data_shards);
														
 
															+        if(NULL == top) {
														
 
															+            err = 3;
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        err = invert_mat(top, data_shards);
														
 
															+        assert(0 == err);
														
 
															+
														
 
															+        rs->m = multiply1(vm, rs->shards, data_shards, top, data_shards, data_shards);
														
 
															+        if(NULL == rs->m) {
														
 
															+            err = 4;
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        rs->parity = sub_matrix(rs->m, data_shards, 0, rs->shards, data_shards, rs->shards, data_shards);
														
 
															+        if(NULL == rs->parity) {
														
 
															+            err = 5;
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        RS_FREE(vm);
														
 
															+        RS_FREE(top);
														
 
															+        vm = NULL;
														
 
															+        top = NULL;
														
 
															+        return rs;
														
 
															+
														
 
															+    } while(0);
														
 
															+
														
 
															+    fprintf(stderr, "err=%d\n", err);
														
 
															+    if(NULL != vm) {
														
 
															+        RS_FREE(vm);
														
 
															+    }
														
 
															+    if(NULL != top) {
														
 
															+        RS_FREE(top);
														
 
															+    }
														
 
															+    if(NULL != rs) {
														
 
															+        if(NULL != rs->m) {
														
 
															+            RS_FREE(rs->m);
														
 
															+        }
														
 
															+        if(NULL != rs->parity) {
														
 
															+            RS_FREE(rs->parity);
														
 
															+        }
														
 
															+        RS_FREE(rs);
														
 
															+    }
														
 
															+
														
 
															+    return NULL;
														
 
															+}
														
 
															+
														
 
															+void reed_solomon_release(reed_solomon* rs) {
														
 
															+    if(NULL != rs) {
														
 
															+        if(NULL != rs->m) {
														
 
															+            RS_FREE(rs->m);
														
 
															+        }
														
 
															+        if(NULL != rs->parity) {
														
 
															+            RS_FREE(rs->parity);
														
 
															+        }
														
 
															+        RS_FREE(rs);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * encode one shard
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * data_blocks[rs->data_shards][block_size]
														
 
															+ * fec_blocks[rs->data_shards][block_size]
														
 
															+ * */
														
 
															+int reed_solomon_encode(reed_solomon* rs,
														
 
															+                        unsigned char** data_blocks,
														
 
															+                        unsigned char** fec_blocks,
														
 
															+                        int block_size) {
														
 
															+    assert(NULL != rs && NULL != rs->parity);
														
 
															+
														
 
															+    return code_some_shards(rs->parity, data_blocks, fec_blocks
														
 
															+            , rs->data_shards, rs->parity_shards, block_size);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * decode one shard
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * original data_blocks[rs->data_shards][block_size]
														
 
															+ * dec_fec_blocks[nr_fec_blocks][block_size]
														
 
															+ * fec_block_nos: fec pos number in original fec_blocks
														
 
															+ * erased_blocks: erased blocks in original data_blocks
														
 
															+ * nr_fec_blocks: the number of erased blocks
														
 
															+ * */
														
 
															+int reed_solomon_decode(reed_solomon* rs,
														
 
															+                        unsigned char **data_blocks,
														
 
															+                        int block_size,
														
 
															+                        unsigned char **dec_fec_blocks,
														
 
															+                        unsigned int *fec_block_nos,
														
 
															+                        unsigned int *erased_blocks,
														
 
															+                        int nr_fec_blocks) {
														
 
															+    /* use stack instead of malloc, define a small number of DATA_SHARDS_MAX to save memory */
														
 
															+    gf dataDecodeMatrix[DATA_SHARDS_MAX*DATA_SHARDS_MAX];
														
 
															+    unsigned char* subShards[DATA_SHARDS_MAX];
														
 
															+    unsigned char* outputs[DATA_SHARDS_MAX];
														
 
															+    gf* m = rs->m;
														
 
															+    int i, j, c, swap, subMatrixRow, dataShards, nos, nshards;
														
 
															+
														
 
															+    /* the erased_blocks should always sorted
														
 
															+     * if sorted, nr_fec_blocks times to check it
														
 
															+     * if not, sort it here
														
 
															+     * */
														
 
															+    for(i = 0; i < nr_fec_blocks; i++) {
														
 
															+        swap = 0;
														
 
															+        for(j = i+1; j < nr_fec_blocks; j++) {
														
 
															+            if(erased_blocks[i] > erased_blocks[j]) {
														
 
															+                /* the prefix is bigger than the following, swap */
														
 
															+                c = erased_blocks[i];
														
 
															+                erased_blocks[i] = erased_blocks[j];
														
 
															+                erased_blocks[j] = c;
														
 
															+
														
 
															+                swap = 1;
														
 
															+            }
														
 
															+        }
														
 
															+        //printf("swap:%d\n", swap);
														
 
															+        if(!swap) {
														
 
															+            //already sorted or sorted ok
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    j = 0;
														
 
															+    subMatrixRow = 0;
														
 
															+    nos = 0;
														
 
															+    nshards = 0;
														
 
															+    dataShards = rs->data_shards;
														
 
															+    for(i = 0; i < dataShards; i++) {
														
 
															+        if(j < nr_fec_blocks && i == erased_blocks[j]) {
														
 
															+            //ignore the invalid block
														
 
															+            j++;
														
 
															+        } else {
														
 
															+            /* this row is ok */
														
 
															+            for(c = 0; c < dataShards; c++) {
														
 
															+                dataDecodeMatrix[subMatrixRow*dataShards + c] = m[i*dataShards + c];
														
 
															+            }
														
 
															+            subShards[subMatrixRow] = data_blocks[i];
														
 
															+            subMatrixRow++;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    for(i = 0; i < nr_fec_blocks && subMatrixRow < dataShards; i++) {
														
 
															+        subShards[subMatrixRow] = dec_fec_blocks[i];
														
 
															+        j = dataShards + fec_block_nos[i];
														
 
															+        for(c = 0; c < dataShards; c++) {
														
 
															+            dataDecodeMatrix[subMatrixRow*dataShards + c] = m[j*dataShards + c]; //use spefic pos of original fec_blocks
														
 
															+        }
														
 
															+        subMatrixRow++;
														
 
															+    }
														
 
															+
														
 
															+    if(subMatrixRow < dataShards) {
														
 
															+        //cannot correct
														
 
															+        return -1;
														
 
															+    }
														
 
															+
														
 
															+    invert_mat(dataDecodeMatrix, dataShards);
														
 
															+    //printf("invert:\n");
														
 
															+    //print_matrix1(dataDecodeMatrix, dataShards, dataShards);
														
 
															+    //printf("nShards:\n");
														
 
															+    //print_matrix2(subShards, dataShards, block_size);
														
 
															+
														
 
															+    for(i = 0; i < nr_fec_blocks; i++) {
														
 
															+        j = erased_blocks[i];
														
 
															+        outputs[i] = data_blocks[j];
														
 
															+        //data_blocks[j][0] = 0;
														
 
															+        memmove(dataDecodeMatrix+i*dataShards, dataDecodeMatrix+j*dataShards, dataShards);
														
 
															+    }
														
 
															+    //printf("subMatrixRow:\n");
														
 
															+    //print_matrix1(dataDecodeMatrix, nr_fec_blocks, dataShards);
														
 
															+
														
 
															+    //printf("outputs:\n");
														
 
															+    //print_matrix2(outputs, nr_fec_blocks, block_size);
														
 
															+
														
 
															+    return code_some_shards(dataDecodeMatrix, subShards, outputs,
														
 
															+                            dataShards, nr_fec_blocks, block_size);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * encode a big size of buffer
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * nr_shards: assert(0 == nr_shards % rs->shards)
														
 
															+ * shards[nr_shards][block_size]
														
 
															+ * */
														
 
															+int reed_solomon_encode2(reed_solomon* rs, unsigned char** shards, int nr_shards, int block_size) {
														
 
															+    unsigned char** data_blocks;
														
 
															+    unsigned char** fec_blocks;
														
 
															+    int i, ds = rs->data_shards, ps = rs->parity_shards, ss = rs->shards;
														
 
															+    i = nr_shards / ss;
														
 
															+    data_blocks = shards;
														
 
															+    fec_blocks = &shards[(i*ds)];
														
 
															+
														
 
															+    for(i = 0; i < nr_shards; i += ss) {
														
 
															+        reed_solomon_encode(rs, data_blocks, fec_blocks, block_size);
														
 
															+        data_blocks += ds;
														
 
															+        fec_blocks += ps;
														
 
															+    }
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * reconstruct a big size of buffer
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * nr_shards: assert(0 == nr_shards % rs->data_shards)
														
 
															+ * shards[nr_shards][block_size]
														
 
															+ * marks[nr_shards] marks as errors
														
 
															+ * */
														
 
															+int reed_solomon_reconstruct(reed_solomon* rs,
														
 
															+                             unsigned char** shards,
														
 
															+                             unsigned char* marks,
														
 
															+                             int nr_shards,
														
 
															+                             int block_size) {
														
 
															+    unsigned char *dec_fec_blocks[DATA_SHARDS_MAX];
														
 
															+    unsigned int fec_block_nos[DATA_SHARDS_MAX];
														
 
															+    unsigned int erased_blocks[DATA_SHARDS_MAX];
														
 
															+    unsigned char* fec_marks;
														
 
															+    unsigned char **data_blocks, **fec_blocks;
														
 
															+    int i, j, dn, pn, n;
														
 
															+    int ds = rs->data_shards;
														
 
															+    int ps = rs->parity_shards;
														
 
															+    int err = 0;
														
 
															+
														
 
															+    data_blocks = shards;
														
 
															+    n = nr_shards / rs->shards;
														
 
															+    fec_marks = marks + n*ds; //after all data, is't fec marks
														
 
															+    fec_blocks = shards + n*ds;
														
 
															+
														
 
															+    for(j = 0; j < n; j++) {
														
 
															+        dn = 0;
														
 
															+        for(i = 0; i < ds; i++) {
														
 
															+            if(marks[i]) {
														
 
															+                //errors
														
 
															+                erased_blocks[dn++] = i;
														
 
															+            }
														
 
															+        }
														
 
															+        if(dn > 0) {
														
 
															+            pn = 0;
														
 
															+            for(i = 0; i < ps && pn < dn; i++) {
														
 
															+                if(!fec_marks[i]) {
														
 
															+                    //got valid fec row
														
 
															+                    fec_block_nos[pn] = i;
														
 
															+                    dec_fec_blocks[pn] = fec_blocks[i];
														
 
															+                    pn++;
														
 
															+                }
														
 
															+            }
														
 
															+
														
 
															+            if(dn == pn) {
														
 
															+                reed_solomon_decode(rs
														
 
															+                        , data_blocks
														
 
															+                        , block_size
														
 
															+                        , dec_fec_blocks
														
 
															+                        , fec_block_nos
														
 
															+                        , erased_blocks
														
 
															+                        , dn);
														
 
															+            } else {
														
 
															+                //error but we continue
														
 
															+                err = -1;
														
 
															+            }
														
 
															+        }
														
 
															+        data_blocks += ds;
														
 
															+        marks += ds;
														
 
															+        fec_blocks += ps;
														
 
															+        fec_marks += ps;
														
 
															+    }
														
 
															+
														
 
															+    return err;
														
 
															+}
														
 
															+
														
--- a/src/third_party/rs.h
+++ b/src/third_party/rs.h
@@ -0,0 +1,88 @@
 
															+#ifndef __RS_H_
														
 
															+#define __RS_H_
														
 
															+
														
 
															+/* use small value to save memory */
														
 
															+#ifndef DATA_SHARDS_MAX
														
 
															+#define DATA_SHARDS_MAX (255)
														
 
															+#endif
														
 
															+
														
 
															+/* use other memory allocator */
														
 
															+#ifndef RS_MALLOC
														
 
															+#define RS_MALLOC(x)    malloc(x)
														
 
															+#endif
														
 
															+
														
 
															+#ifndef RS_FREE
														
 
															+#define RS_FREE(x)      free(x)
														
 
															+#endif
														
 
															+
														
 
															+#ifndef RS_CALLOC
														
 
															+#define RS_CALLOC(n, x) calloc(n, x)
														
 
															+#endif
														
 
															+
														
 
															+typedef struct _reed_solomon {
														
 
															+    int data_shards;
														
 
															+    int parity_shards;
														
 
															+    int shards;
														
 
															+    unsigned char* m;
														
 
															+    unsigned char* parity;
														
 
															+} reed_solomon;
														
 
															+
														
 
															+/**
														
 
															+ * MUST initial one time
														
 
															+ * */
														
 
															+void fec_init(void);
														
 
															+
														
 
															+reed_solomon* reed_solomon_new(int data_shards, int parity_shards);
														
 
															+void reed_solomon_release(reed_solomon* rs);
														
 
															+
														
 
															+/**
														
 
															+ * encode one shard
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * data_blocks[rs->data_shards][block_size]
														
 
															+ * fec_blocks[rs->data_shards][block_size]
														
 
															+ * */
														
 
															+int reed_solomon_encode(reed_solomon* rs,
														
 
															+                        unsigned char** data_blocks,
														
 
															+                        unsigned char** fec_blocks,
														
 
															+                        int block_size);
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * decode one shard
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * original data_blocks[rs->data_shards][block_size]
														
 
															+ * dec_fec_blocks[nr_fec_blocks][block_size]
														
 
															+ * fec_block_nos: fec pos number in original fec_blocks
														
 
															+ * erased_blocks: erased blocks in original data_blocks
														
 
															+ * nr_fec_blocks: the number of erased blocks
														
 
															+ * */
														
 
															+int reed_solomon_decode(reed_solomon* rs,
														
 
															+                        unsigned char **data_blocks,
														
 
															+                        int block_size,
														
 
															+                        unsigned char **dec_fec_blocks,
														
 
															+                        unsigned int *fec_block_nos,
														
 
															+                        unsigned int *erased_blocks,
														
 
															+                        int nr_fec_blocks);
														
 
															+
														
 
															+/**
														
 
															+ * encode a big size of buffer
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * nr_shards: assert(0 == nr_shards % rs->data_shards)
														
 
															+ * shards[nr_shards][block_size]
														
 
															+ * */
														
 
															+int reed_solomon_encode2(reed_solomon* rs, unsigned char** shards, int nr_shards, int block_size);
														
 
															+
														
 
															+/**
														
 
															+ * reconstruct a big size of buffer
														
 
															+ * input:
														
 
															+ * rs
														
 
															+ * nr_shards: assert(0 == nr_shards % rs->data_shards)
														
 
															+ * shards[nr_shards][block_size]
														
 
															+ * marks[nr_shards] marks as errors
														
 
															+ * */
														
 
															+int reed_solomon_reconstruct(reed_solomon* rs, unsigned char** shards, unsigned char* marks, int nr_shards, int block_size);
														
 
															+#endif
														
 
															+
														
--- a/src/third_party/scope_guard.hpp
+++ b/src/third_party/scope_guard.hpp
@@ -0,0 +1,180 @@
 
															+/*
														
 
															+ *  Created on: 13/02/2018
														
 
															+ *      Author: ricab
														
 
															+ *
														
 
															+ * See README.md for documentation of this header's public interface.
														
 
															+ */
														
 
															+
														
 
															+#ifndef SCOPE_GUARD_HPP_
														
 
															+#define SCOPE_GUARD_HPP_
														
 
															+
														
 
															+#include <type_traits>
														
 
															+#include <utility>
														
 
															+
														
 
															+#if __cplusplus >= 201703L && defined(SG_REQUIRE_NOEXCEPT_IN_CPP17)
														
 
															+#define SG_REQUIRE_NOEXCEPT
														
 
															+#endif
														
 
															+
														
 
															+namespace sg {
														
 
															+    namespace detail {
														
 
															+        /* --- Some custom type traits --- */
														
 
															+
														
 
															+        // Type trait determining whether a type is callable with no arguments
														
 
															+        template<typename T, typename = void>
														
 
															+        struct is_noarg_callable_t
														
 
															+                : public std::false_type {
														
 
															+        }; // in general, false
														
 
															+
														
 
															+        template<typename T>
														
 
															+        struct is_noarg_callable_t<T, decltype(std::declval<T &&>()())>
														
 
															+                : public std::true_type {
														
 
															+        }; // only true when call expression valid
														
 
															+
														
 
															+        // Type trait determining whether a no-argument callable returns void
														
 
															+        template<typename T>
														
 
															+        struct returns_void_t
														
 
															+                : public std::is_same<void, decltype(std::declval<T &&>()())> {
														
 
															+        };
														
 
															+
														
 
															+        /* Type trait determining whether a no-arg callable is nothrow invocable if
														
 
															+        required. This is where SG_REQUIRE_NOEXCEPT logic is encapsulated. */
														
 
															+        template<typename T>
														
 
															+        struct is_nothrow_invocable_if_required_t
														
 
															+                : public
														
 
															+#ifdef SG_REQUIRE_NOEXCEPT
														
 
															+                  std::is_nothrow_invocable<T> /* Note: _r variants not enough to
														
 
															+                                          confirm void return: any return can be
														
 
															+                                          discarded so all returns are
														
 
															+                                          compatible with void */
														
 
															+#else
														
 
															+                  std::true_type
														
 
															+#endif
														
 
															+        {
														
 
															+        };
														
 
															+
														
 
															+        // logic AND of two or more type traits
														
 
															+        template<typename A, typename B, typename... C>
														
 
															+        struct and_t : public and_t<A, and_t<B, C...>> {
														
 
															+        }; // for more than two arguments
														
 
															+
														
 
															+        template<typename A, typename B>
														
 
															+        struct and_t<A, B> : public std::conditional<A::value, B, A>::type {
														
 
															+        }; // for two arguments
														
 
															+
														
 
															+        // Type trait determining whether a type is a proper scope_guard callback.
														
 
															+        template<typename T>
														
 
															+        struct is_proper_sg_callback_t
														
 
															+                : public and_t<is_noarg_callable_t<T>,
														
 
															+                        returns_void_t<T>,
														
 
															+                        is_nothrow_invocable_if_required_t<T>,
														
 
															+                        std::is_nothrow_destructible<T>> {
														
 
															+        };
														
 
															+
														
 
															+
														
 
															+        /* --- The actual scope_guard template --- */
														
 
															+
														
 
															+        template<typename Callback,
														
 
															+                typename = typename std::enable_if<
														
 
															+                        is_proper_sg_callback_t<Callback>::value>::type>
														
 
															+        class scope_guard;
														
 
															+
														
 
															+
														
 
															+        /* --- Now the friend maker --- */
														
 
															+
														
 
															+        template<typename Callback>
														
 
															+        detail::scope_guard<Callback> make_scope_guard(Callback &&callback)
														
 
															+        noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value); /*
														
 
															+    we need this in the inner namespace due to MSVC bugs preventing
														
 
															+    sg::detail::scope_guard from befriending a sg::make_scope_guard
														
 
															+    template instance in the parent namespace (see https://is.gd/xFfFhE). */
														
 
															+
														
 
															+
														
 
															+        /* --- The template specialization that actually defines the class --- */
														
 
															+
														
 
															+        template<typename Callback>
														
 
															+        class scope_guard<Callback> final {
														
 
															+        public:
														
 
															+            typedef Callback callback_type;
														
 
															+
														
 
															+            scope_guard(scope_guard &&other)
														
 
															+            noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value);
														
 
															+
														
 
															+            ~scope_guard() noexcept; // highlight noexcept dtor
														
 
															+
														
 
															+            void dismiss() noexcept;
														
 
															+
														
 
															+        public:
														
 
															+            scope_guard() = delete;
														
 
															+
														
 
															+            scope_guard(const scope_guard &) = delete;
														
 
															+
														
 
															+            scope_guard &operator=(const scope_guard &) = delete;
														
 
															+
														
 
															+            scope_guard &operator=(scope_guard &&) = delete;
														
 
															+
														
 
															+        private:
														
 
															+            explicit scope_guard(Callback &&callback)
														
 
															+            noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value); /*
														
 
															+                                                      meant for friends only */
														
 
															+
														
 
															+            friend scope_guard<Callback> make_scope_guard<Callback>(Callback &&)
														
 
															+            noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value); /*
														
 
															+      only make_scope_guard can create scope_guards from scratch (i.e. non-move)
														
 
															+      */
														
 
															+
														
 
															+        private:
														
 
															+            Callback m_callback;
														
 
															+            bool m_active;
														
 
															+
														
 
															+        };
														
 
															+
														
 
															+    } // namespace detail
														
 
															+
														
 
															+
														
 
															+    /* --- Now the single public maker function --- */
														
 
															+
														
 
															+    using detail::make_scope_guard; // see comment on declaration above
														
 
															+
														
 
															+} // namespace sg
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+template<typename Callback>
														
 
															+sg::detail::scope_guard<Callback>::scope_guard(Callback &&callback)
														
 
															+noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value)
														
 
															+        : m_callback(std::forward<Callback>(callback)) /* use () instead of {} because
														
 
															+    of DR 1467 (https://is.gd/WHmWuo), which still impacts older compilers
														
 
															+    (e.g. GCC 4.x and clang <=3.6, see https://godbolt.org/g/TE9tPJ and
														
 
															+    https://is.gd/Tsmh8G) */
														
 
															+        , m_active{true} {}
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+template<typename Callback>
														
 
															+sg::detail::scope_guard<Callback>::~scope_guard() noexcept {
														
 
															+    if (m_active)
														
 
															+        m_callback();
														
 
															+}
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+template<typename Callback>
														
 
															+sg::detail::scope_guard<Callback>::scope_guard(scope_guard &&other)
														
 
															+noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value)
														
 
															+        : m_callback(std::forward<Callback>(other.m_callback)) // idem
														
 
															+        , m_active{std::move(other.m_active)} {
														
 
															+    other.m_active = false;
														
 
															+}
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+template<typename Callback>
														
 
															+inline void sg::detail::scope_guard<Callback>::dismiss() noexcept {
														
 
															+    m_active = false;
														
 
															+}
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+template<typename Callback>
														
 
															+inline auto sg::detail::make_scope_guard(Callback &&callback)
														
 
															+noexcept(std::is_nothrow_constructible<Callback, Callback &&>::value)
														
 
															+-> detail::scope_guard <Callback> {
														
 
															+    return detail::scope_guard<Callback>{std::forward<Callback>(callback)};
														
 
															+}
														
 
															+
														
 
															+#endif /* SCOPE_GUARD_HPP_ */
														
--- a/src/utility.hpp
+++ b/src/utility.hpp
@@ -0,0 +1,74 @@
 
															+#ifndef REMOTEAR3_UTILITY_HPP
														
 
															+#define REMOTEAR3_UTILITY_HPP
														
 
															+
														
 
															+#include <spdlog/spdlog.h>
														
 
															+
														
 
															+// https://en.cppreference.com/w/cpp/utility/unreachable
														
 
															+[[noreturn]] inline void unreachable() {
														
 
															+    // Uses compiler specific extensions if possible.
														
 
															+    // Even if no extension is used, undefined behavior is still raised by
														
 
															+    // an empty function body and the noreturn attribute.
														
 
															+#ifdef __GNUC__ // GCC, Clang, ICC
														
 
															+    __builtin_unreachable();
														
 
															+// #elifdef _MSC_VER // MSVC
														
 
															+#else
														
 
															+    __assume(false);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+#define RET_ERROR \
														
 
															+    assert(false);\
														
 
															+    unreachable()
														
 
															+
														
 
															+#define RET_ERROR_B \
														
 
															+    assert(false); \
														
 
															+    return false
														
 
															+
														
 
															+#define RET_ERROR_P \
														
 
															+    assert(false); \
														
 
															+    return nullptr
														
 
															+
														
 
															+inline bool check_function_call(bool function_ret, unsigned int line_number,
														
 
															+                                const char *file_name, const char *function_call_str) {
														
 
															+    if (function_ret) [[likely]] return true;
														
 
															+    SPDLOG_ERROR("Function call {} failed at {}:{}.",
														
 
															+                 function_call_str, file_name, line_number);
														
 
															+    RET_ERROR_B;
														
 
															+}
														
 
															+
														
 
															+#define CALL_CHECK(function_call) \
														
 
															+    check_function_call( \
														
 
															+        function_call, __LINE__, __FILE__, #function_call)
														
 
															+
														
 
															+#define EXCEPTION_CHECK_P(function_call) \
														
 
															+    try { \
														
 
															+        function_call; \
														
 
															+    } catch (std::exception &e) { \
														
 
															+        SPDLOG_ERROR("Function call {} failed at {}:{}, {}.", \
														
 
															+                     #function_call, __FILE__, __LINE__, e.what()); \
														
 
															+        RET_ERROR_P; \
														
 
															+    } (void) 0
														
 
															+
														
 
															+struct log_timer {
														
 
															+
														
 
															+    void reset() {
														
 
															+        last_ts = clock_type::now();
														
 
															+    }
														
 
															+
														
 
															+    void record(std::string_view name) {
														
 
															+        SPDLOG_TRACE("{} reached at {}ms", name,
														
 
															+                     std::chrono::duration_cast<time_res>(clock_type::now() - last_ts).count());
														
 
															+    }
														
 
															+
														
 
															+private:
														
 
															+    using clock_type = std::chrono::high_resolution_clock;
														
 
															+    using time_res = std::chrono::milliseconds;
														
 
															+    clock_type::time_point last_ts;
														
 
															+};
														
 
															+
														
 
															+extern log_timer global_timer;
														
 
															+
														
 
															+#define RESET_TIMER global_timer.reset()
														
 
															+#define RECORD_TIME(name) global_timer.record(name)
														
 
															+
														
 
															+#endif //REMOTEAR3_UTILITY_HPP
														
--- a/src/variable_defs.h
+++ b/src/variable_defs.h
@@ -0,0 +1,8 @@
 
															+#ifndef TINYPLAYER3_VARIABLE_DEFS_H
														
 
															+#define TINYPLAYER3_VARIABLE_DEFS_H
														
 
															+
														
 
															+constexpr auto FRAME_OUT = 0;
														
 
															+constexpr auto RENDER_BUSY = 1;
														
 
															+constexpr auto RECEIVER_STOPPED = 2;
														
 
															+
														
 
															+#endif //TINYPLAYER3_VARIABLE_DEFS_H