hamdiboukamcha
diff --git a/‎CMakeLists.txt‎
Lines changed: 84 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 131 additions & 2 deletions b/‎README.md‎
Lines changed: 131 additions & 2 deletions
diff --git a/‎asset/1693479941020.jpg‎
59 KB b/‎asset/1693479941020.jpg‎
59 KB
diff --git a/‎asset/BiRefNet-Cpp-TensorRT.JPG‎
19 KB b/‎asset/BiRefNet-Cpp-TensorRT.JPG‎
19 KB
diff --git a/‎asset/O-1693479941020.jpg‎
15.2 KB b/‎asset/O-1693479941020.jpg‎
15.2 KB
diff --git a/‎include/birefnet.h‎
Lines changed: 85 additions & 0 deletions b/‎include/birefnet.h‎
Lines changed: 85 additions & 0 deletions
@@ -0,0 +1,84 @@
+cmake_minimum_required(VERSION 3.18)
+
+# Project declaration with C++ and CUDA support
+project(BiRefNetTRT LANGUAGES CXX CUDA)
+
+# Set C++ standard to C++17
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Define the path to TensorRT installation
+set(TENSORRT_PATH "F:/Program Files/TensorRT-8.6.1.6")  # Update this to the actual path for TensorRT
+
+# Define the path to OpenCV installation
+
+# Allow overriding TensorRT and OpenCV paths via command line
+# e.g., cmake -DTENSORRT_PATH="path/to/TensorRT" -DOpenCV_DIR="path/to/OpenCV" ..
+option(TENSORRT_PATH_OPTION "Path to TensorRT installation" ${TENSORRT_PATH})
+set(TENSORRT_PATH ${TENSORRT_PATH_OPTION} CACHE PATH "Path to TensorRT installation")
+
+# Find OpenCV
+find_package(OpenCV REQUIRED)
+if(NOT OpenCV_FOUND)
+    message(FATAL_ERROR "OpenCV not found. Please install OpenCV or set OpenCV_DIR.")
+endif()
+
+# Find CUDA
+find_package(CUDA REQUIRED)
+if(NOT CUDA_FOUND)
+    message(FATAL_ERROR "CUDA not found. Please install the CUDA Toolkit.")
+endif()
+
+# Include directories for TensorRT
+include_directories(${TENSORRT_PATH}/include)
+
+# Include directory for your project
+include_directories(${CMAKE_SOURCE_DIR}/include)
+
+# Define source files (including CUDA sources)
+set(SOURCES
+    main.cpp
+    src/birefnet.cpp
+)
+
+# Create executable (CMake handles CUDA sources automatically)
+add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
+
+# Define API_EXPORTS macro
+target_compile_definitions(${PROJECT_NAME} PRIVATE API_EXPORTS)
+
+# Specify include directories (modern CMake approach)
+target_include_directories(${PROJECT_NAME} PRIVATE
+    src/
+    ${OpenCV_INCLUDE_DIRS}
+    ${CUDA_INCLUDE_DIRS}
+    ${TENSORRT_PATH}/include
+)
+
+# Link TensorRT libraries
+# Specify full paths to TensorRT libraries to avoid relying on link_directories
+set(TENSORRT_LIBS
+    "${TENSORRT_PATH}/lib/nvinfer.lib"
+    "${TENSORRT_PATH}/lib/nvonnxparser.lib"
+    "${TENSORRT_PATH}/lib/nvparsers.lib"
+    "${TENSORRT_PATH}/lib/nvinfer_plugin.lib"
+)
+
+# Link libraries to the target
+target_link_libraries(${PROJECT_NAME} PRIVATE
+    ${OpenCV_LIBS}
+    ${CUDA_LIBRARIES}
+    ${TENSORRT_LIBS}
+)
+
+# Enable separable compilation for CUDA (optional but recommended)
+set_target_properties(${PROJECT_NAME} PROPERTIES
+    CUDA_SEPARABLE_COMPILATION ON
+)
+
+# (Optional) Specify CUDA architectures based on your GPU hardware
+# set(CMAKE_CUDA_ARCHITECTURES 75)  # Example for Turing architecture
+
+# (Optional) Set output directories for binaries
+# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
@@ -1,2 +1,131 @@
-# BiRefNet-Cpp-TensorRT
-A high-performance C++ implementation of the Bilateral Reference Network (**BiRefNet**) leveraging **TensorRT** and **CUDA**, optimized for real-time, high-resolution dichotomous image segmentation.
+# BiRefNet C++ TENSORRT
+A high-performance C++ implementation of the Bilateral Reference Network (**BiRefNet**) leveraging **TensorRT** and **CUDA**, optimized for real-time, high-resolution dichotomous image segmentation.
+
+<img src="asset/BiRefNet-Cpp-TensorRT.JPG" alt="BiRefNet Banner" width="800"/>
+
+<a href="https://github.com/hamdiboukamcha/BiRefNet-Cpp-TensorRT" style="margin: 0 2px;">
+    <img src="https://img.shields.io/badge/GitHub-Repo-blue?style=flat&logo=GitHub" alt="GitHub">
+</a>
+<a href="https://github.com/hamdiboukamcha/BiRefNet-Cpp-TensorRT?tab=License" style="margin: 0 2px;">
+    <img src="https://img.shields.io/badge/License-MIT-lightgrey?style=flat&logo=license" alt="License">
+</a>
+
+---
+
+## 🌐 Overview
+
+**BiRefNet C++ TENSORRT** is designed to efficiently run bilateral reference segmentation tasks on the GPU. By harnessing TensorRT’s optimizations and CUDA kernels, it aims to deliver state-of-the-art performance with minimal latency.
+
+### Key Features
+
+- **TensorRT Acceleration**: Speed up inference for segmentation tasks using serialized TRT engines.  
+- **CUDA Integration**: Comprehensive GPU-based preprocessing, postprocessing, and memory handling.  
+- **High-Resolution Support**: Out-of-the-box ability to process high-resolution images (e.g., 1024x1024).  
+- **Easy Integration**: C++17 codebase for easy deployment into existing pipelines.  
+
+---
+
+## 📢 What's New
+
+- **Enhanced Bilateral Reference**: Improves dichotomous segmentation outputs by leveraging dual reference guidance.  
+- **Improved Memory Footprint**: Optimized GPU allocation for large-batch or high-resolution workloads.  
+- **Configurable Precision**: Support for **FP16** or **FP32** modes (requires GPU with half-precision support for FP16).  
+- **Flexible I/O**: Easily integrate your own data loaders or pipeline steps thanks to modular design.
+
+---
+
+## 📂 Project Structure
+
+		BiRefNet/ ├── include 
+		          │ └── birefnet.h # Main BiRefNet class definition 
+              ├── src 
+              │ └── birefnet.cpp # Implementation of the BiRefNet class 
+              ├── CMakeLists.txt # CMake configuration 
+              └── main.cpp # Demo application
+
+
+- **include/birefnet.h**  
+  Header file defining the `BiRefNet` class, which manages TensorRT engine creation, execution, and memory buffers.
+
+- **src/birefnet.cpp**  
+  Source implementation for loading serialized engines, running inference, and handling output postprocessing.
+
+- **CMakeLists.txt**  
+  Configuration for building the project using CMake. Adjust paths to TensorRT, CUDA, and OpenCV as needed.
+
+- **main.cpp**  
+  A minimal example demonstrating how to load the model, run inference on images or videos, and save the results.
+
+---
+
+## 🚀 Installation
+
+1. **Clone the Repository**
+
+   git clone https://github.com/hamdiboukamcha/BiRefNet-Cpp-TensorRT.git
+   cd BiRefNet-Cpp-TensorRT
+	 mkdir build && cd build
+	 cmake ..
+	 make -j$(nproc)
+
+	
+## 📦 Dependencies
+CUDA
+Required for GPU acceleration and kernel launches (e.g., CUDA 11.x or later).
+
+TensorRT
+High-performance deep learning inference library (v8.x or later recommended).
+
+OpenCV
+Needed for image loading, preprocessing, and basic visualization.
+
+C++17
+This project uses modern C++ features. Ensure your compiler supports C++17 or above.				  
+
+## 🔍 Code Overview
+Main Components
+BiRefNet Class
+
+Initializes a TensorRT engine from a given engine/model path.
+Handles preprocessing (image resizing, mean/std normalization, etc.).
+Runs inference and postprocesses outputs into segmentation maps.
+Manages CUDA resources and streams.
+Logger Class (in main.cpp)
+
+Implements TensorRT’s ILogger interface for custom logging.
+Notable Functions
+BiRefNet::BiRefNet(...)
+
+Constructor that loads a .trt (serialized TensorRT) engine into memory.
+BiRefNet::predict(cv::Mat& image)
+
+Main function to run inference: takes an OpenCV cv::Mat as input, returns the segmented result as cv::Mat.
+BiRefNet::preprocess(...)
+
+Converts an image into normalized floats (mean subtraction, division by std, etc.).
+BiRefNet::postprocess(...)
+
+Reshapes the raw output into meaningful image data, typically an 8-bit or 32-bit matrix for segmentation.
+
+## 🎬 Usage
+Prepare Your Engine
+
+Convert your model to ONNX, then build a TensorRT engine (e.g., using trtexec or a custom builder).
+	trtexec --onnx=birefnet_model.onnx --saveEngine=BiRefNet-tiny.engine --fp16
+	
+## 📞 Contact
+For further inquiries or advanced usage discussions:
+
+Email: your.email@example.com
+LinkedIn: Your Name
+
+## 📜 Citation
+If you use BiRefNet C++ TENSORRT in your academic work or research, please cite:
+
+@misc{Boukamcha2025BiRefNet,
+    author       = {Hamdi Boukamcha},
+    title        = {BiRefNet C++ TENSORRT},
+    year         = {2025},
+    publisher    = {GitHub},
+    howpublished = {\url{https://github.com/hamdiboukamcha/BiRefNet-Cpp-TensorRT}}
+}
@@ -0,0 +1,85 @@
+#pragma once
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+#include <tuple>
+#include <algorithm>
+#include <opencv2/opencv.hpp>
+#include <NvInfer.h>
+
+/// \class BiRefNet
+/// \brief A class Bilateral Reference for High-Resolution Dichotomous Image Segmentation.
+///
+/// This class is responsible for initializing a TensorRT engine from a given model path,
+/// preparing input data, running inference on input images, and returning the predicted output.
+/// It manages CUDA resources (e.g., the TensorRT runtime, engine, context) and provides
+/// convenient methods to preprocess input images and postprocess the inference results.
+class BiRefNet
+{
+public:
+
+	/// \brief Constructor for the BiRefNet class.
+	///
+	/// \param model_path Path to the serialized TensorRT engine file (e.g., .trt).
+	/// \param logger A reference to a TensorRT ILogger implementation for logging.
+	BiRefNet(std::string model_path, nvinfer1::ILogger& logger);
+
+	/// \brief Runs inference on the given input image.
+	///
+	/// This method preprocesses the input image, runs inference on the loaded
+	/// TensorRT engine, and returns a pair of cv::Mat containing the results (e.g., depth map, segmentation mask, etc.).
+	///
+	/// \param image The input cv::Mat image on which inference is to be performed.
+	/// \return A  cv::Mat objects representing the inference outputs.
+	cv::Mat predict(cv::Mat& image);
+
+	/// \brief Destructor for the BiRefNet class.
+	///
+	/// Cleans up all allocated resources, including GPU buffers and CUDA streams.
+	~BiRefNet();
+	
+private:
+	int input_w = 1024;  ///< The input width for the model.
+	int input_h = 1024;  ///< The input height for the model.
+
+	float mean[3] = { 123.675, 116.28, 103.53 };  ///< Mean values for preprocessing.
+	float std[3] = { 58.395, 57.12, 57.375 };     ///< Standard deviation values for preprocessing.
+
+	std::vector<int> offset;                      ///< Offset values for internal calculations.
+
+	nvinfer1::IRuntime* runtime = nullptr;        ///< Pointer to the TensorRT Runtime.
+	nvinfer1::ICudaEngine* engine = nullptr;      ///< Pointer to the TensorRT Engine.
+	nvinfer1::IExecutionContext* context = nullptr;///< Pointer to the TensorRT Execution Context.
+	nvinfer1::INetworkDefinition* network = nullptr; ///< (Optional) Pointer to the Network definition if needed.
+
+	void* buffer[2] = { nullptr, nullptr };       ///< I/O buffer pointers on the GPU.
+	float* output_Data = nullptr;                  ///< Host pointer for depth output (example usage).
+	cudaStream_t stream;                          ///< CUDA stream for asynchronous operations.
+
+	/// \brief Resizes the given depth map image to the specified dimensions and returns extra info.
+	///
+	/// \param img The input depth map image (e.g., CV_32FC1 or CV_8UC1).
+	/// \param w   The target width for the resized image.
+	/// \param h   The target height for the resized image.
+	/// \return A std::tuple containing:
+	///         - A cv::Mat with the resized depth map.
+	///         - The resized width (int).
+	///         - The resized height (int).
+	std::tuple<cv::Mat, int, int> resize_depth(cv::Mat& img, int w, int h);
+
+
+	/// \brief Preprocessing function to convert the input image into a suitable tensor format.
+	///
+	/// \param image The input cv::Mat image.
+	/// \return A vector of floats representing the preprocessed image data.
+	std::vector<float> preprocess(cv::Mat& image);
+
+	/// \brief Postprocesses the raw model output into a segmentation map.
+	/// \param output_Data Pointer to the float array containing the model's output.
+	/// \param img_w The width of the output segmentation map.
+	/// \param img_h The height of the output segmentation map.
+	/// \return A cv::Mat representing the postprocessed segmentation map.
+	cv::Mat BiRefNet::postprocess(float* output_Data, int img_w, int img_h);
+
+};