1111
1212#include < random>
1313#include < sstream>
14+ #include < fstream>
1415
1516json format_error_response (const std::string & message, const enum error_type type) {
1617 std::string type_str;
@@ -774,6 +775,65 @@ json oaicompat_completion_params_parse(const json & body) {
774775 return llama_params;
775776}
776777
778+ // media_path always end with '/', see arg.cpp
779+ static void handle_media (
780+ std::vector<raw_buffer> & out_files,
781+ json & media_obj,
782+ const std::string & media_path) {
783+ std::string url = json_value (media_obj, " url" , std::string ());
784+ if (string_starts_with (url, " http" )) {
785+ // download remote image
786+ // TODO @ngxson : maybe make these params configurable
787+ common_remote_params params;
788+ params.headers .push_back (" User-Agent: llama.cpp/" + build_info);
789+ params.max_size = 1024 * 1024 * 10 ; // 10MB
790+ params.timeout = 10 ; // seconds
791+ SRV_INF (" downloading image from '%s'\n " , url.c_str ());
792+ auto res = common_remote_get_content (url, params);
793+ if (200 <= res.first && res.first < 300 ) {
794+ SRV_INF (" downloaded %ld bytes\n " , res.second .size ());
795+ raw_buffer data;
796+ data.insert (data.end (), res.second .begin (), res.second .end ());
797+ out_files.push_back (data);
798+ } else {
799+ throw std::runtime_error (" Failed to download image" );
800+ }
801+
802+ } else if (string_starts_with (url, " file://" )) {
803+ if (media_path.empty ()) {
804+ throw std::invalid_argument (" file:// URLs are not allowed unless --media-path is specified" );
805+ }
806+ // load local image file
807+ std::string file_path = url.substr (7 ); // remove "file://"
808+ raw_buffer data;
809+ if (!fs_validate_filename (file_path, true )) {
810+ throw std::invalid_argument (" file path is not allowed: " + file_path);
811+ }
812+ SRV_INF (" loading image from local file '%s'\n " , (media_path + file_path).c_str ());
813+ std::ifstream file (media_path + file_path, std::ios::binary);
814+ if (!file) {
815+ throw std::invalid_argument (" file does not exist or cannot be opened: " + file_path);
816+ }
817+ data.assign ((std::istreambuf_iterator<char >(file)), std::istreambuf_iterator<char >());
818+ out_files.push_back (data);
819+
820+ } else {
821+ // try to decode base64 image
822+ std::vector<std::string> parts = string_split<std::string>(url, /* separator*/ ' ,' );
823+ if (parts.size () != 2 ) {
824+ throw std::runtime_error (" Invalid url value" );
825+ } else if (!string_starts_with (parts[0 ], " data:image/" )) {
826+ throw std::runtime_error (" Invalid url format: " + parts[0 ]);
827+ } else if (!string_ends_with (parts[0 ], " base64" )) {
828+ throw std::runtime_error (" url must be base64 encoded" );
829+ } else {
830+ auto base64_data = parts[1 ];
831+ auto decoded_data = base64_decode (base64_data);
832+ out_files.push_back (decoded_data);
833+ }
834+ }
835+ }
836+
777837// used by /chat/completions endpoint
778838json oaicompat_chat_params_parse (
779839 json & body, /* openai api json semantics */
@@ -860,41 +920,8 @@ json oaicompat_chat_params_parse(
860920 throw std::runtime_error (" image input is not supported - hint: if this is unexpected, you may need to provide the mmproj" );
861921 }
862922
863- json image_url = json_value (p, " image_url" , json::object ());
864- std::string url = json_value (image_url, " url" , std::string ());
865- if (string_starts_with (url, " http" )) {
866- // download remote image
867- // TODO @ngxson : maybe make these params configurable
868- common_remote_params params;
869- params.headers .push_back (" User-Agent: llama.cpp/" + build_info);
870- params.max_size = 1024 * 1024 * 10 ; // 10MB
871- params.timeout = 10 ; // seconds
872- SRV_INF (" downloading image from '%s'\n " , url.c_str ());
873- auto res = common_remote_get_content (url, params);
874- if (200 <= res.first && res.first < 300 ) {
875- SRV_INF (" downloaded %ld bytes\n " , res.second .size ());
876- raw_buffer data;
877- data.insert (data.end (), res.second .begin (), res.second .end ());
878- out_files.push_back (data);
879- } else {
880- throw std::runtime_error (" Failed to download image" );
881- }
882-
883- } else {
884- // try to decode base64 image
885- std::vector<std::string> parts = string_split<std::string>(url, /* separator*/ ' ,' );
886- if (parts.size () != 2 ) {
887- throw std::invalid_argument (" Invalid image_url.url value" );
888- } else if (!string_starts_with (parts[0 ], " data:image/" )) {
889- throw std::invalid_argument (" Invalid image_url.url format: " + parts[0 ]);
890- } else if (!string_ends_with (parts[0 ], " base64" )) {
891- throw std::invalid_argument (" image_url.url must be base64 encoded" );
892- } else {
893- auto base64_data = parts[1 ];
894- auto decoded_data = base64_decode (base64_data);
895- out_files.push_back (decoded_data);
896- }
897- }
923+ json image_url = json_value (p, " image_url" , json::object ());
924+ handle_media (out_files, image_url, opt.media_path );
898925
899926 // replace this chunk with a marker
900927 p[" type" ] = " text" ;
@@ -916,6 +943,8 @@ json oaicompat_chat_params_parse(
916943 auto decoded_data = base64_decode (data); // expected to be base64 encoded
917944 out_files.push_back (decoded_data);
918945
946+ // TODO: add audio_url support by reusing handle_media()
947+
919948 // replace this chunk with a marker
920949 p[" type" ] = " text" ;
921950 p[" text" ] = mtmd_default_marker ();
0 commit comments