/* Copyright (C) 2021 Vladimir Sukhonosov * Copyright (C) 2021 Martins Mozeiko * All Rights Reserved. * * This is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, * USA. */ #include #include #include #define SAFE_RELEASE(obj) if (obj) { obj->Release(); obj = NULL; } #include #include #include #include using namespace rfb; static LogWriter vlog("H264WinDecoderContext"); bool H264WinDecoderContext::initCodec() { os::AutoMutex lock(&mutex); if (FAILED(MFStartup(MF_VERSION, MFSTARTUP_LITE))) { vlog.error("Could not initialize MediaFoundation"); return false; } if (FAILED(CoCreateInstance(CLSID_CMSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&decoder))) { vlog.error("MediaFoundation H264 codec not found"); return false; } GUID CLSID_VideoProcessorMFT = { 0x88753b26, 0x5b24, 0x49bd, { 0xb2, 0xe7, 0xc, 0x44, 0x5c, 0x78, 0xc9, 0x82 } }; if (FAILED(CoCreateInstance(CLSID_VideoProcessorMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter))) { vlog.error("Cannot create MediaFoundation Video Processor (available only on Windows 8+). Trying ColorConvert DMO."); if (FAILED(CoCreateInstance(CLSID_CColorConvertDMO, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter))) { decoder->Release(); vlog.error("ColorConvert DMO not found"); return false; } } // if possible, enable low-latency decoding (Windows 8 and up) IMFAttributes* attributes; if (SUCCEEDED(decoder->GetAttributes(&attributes))) { GUID MF_LOW_LATENCY = { 0x9c27891a, 0xed7a, 0x40e1, { 0x88, 0xe8, 0xb2, 0x27, 0x27, 0xa0, 0x24, 0xee } }; if (SUCCEEDED(attributes->SetUINT32(MF_LOW_LATENCY, TRUE))) { vlog.info("Enabled low latency mode"); } attributes->Release(); } // set decoder input type IMFMediaType* input_type; if (FAILED(MFCreateMediaType(&input_type))) { decoder->Release(); converter->Release(); vlog.error("Could not create MF MediaType"); return false; } input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); decoder->SetInputType(0, input_type, 0); input_type->Release(); // set decoder output type (NV12) DWORD output_index = 0; IMFMediaType* output_type = NULL; while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type))) { GUID subtype; if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12) { decoder->SetOutputType(0, output_type, 0); output_type->Release(); break; } output_type->Release(); } if (FAILED(decoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0))) { decoder->Release(); converter->Release(); input_type->Release(); vlog.error("Could not start H264 decoder"); return false; } MFT_OUTPUT_STREAM_INFO info; decoder->GetOutputStreamInfo(0, &info); if (FAILED(MFCreateSample(&input_sample)) || FAILED(MFCreateSample(&decoded_sample)) || FAILED(MFCreateSample(&converted_sample)) || FAILED(MFCreateMemoryBuffer(4 * 1024 * 1024, &input_buffer)) || FAILED(MFCreateMemoryBuffer(info.cbSize, &decoded_buffer))) { decoder->Release(); converter->Release(); input_type->Release(); SAFE_RELEASE(input_sample); SAFE_RELEASE(decoded_sample); SAFE_RELEASE(converted_sample); SAFE_RELEASE(input_buffer); SAFE_RELEASE(decoded_buffer); vlog.error("Could not allocate media samples/buffers"); return false; } input_sample->AddBuffer(input_buffer); decoded_sample->AddBuffer(decoded_buffer); initialized = true; return true; } void H264WinDecoderContext::freeCodec() { os::AutoMutex lock(&mutex); if (!initialized) return; SAFE_RELEASE(decoder) SAFE_RELEASE(converter) SAFE_RELEASE(input_sample) SAFE_RELEASE(decoded_sample) SAFE_RELEASE(converted_sample) SAFE_RELEASE(input_buffer) SAFE_RELEASE(decoded_buffer) SAFE_RELEASE(converted_buffer) MFShutdown(); initialized = false; } void H264WinDecoderContext::decode(const uint8_t* h264_buffer, uint32_t len, ModifiablePixelBuffer* pb) { os::AutoMutex lock(&mutex); if (!initialized) return; if (FAILED(input_buffer->SetCurrentLength(len))) { input_buffer->Release(); if (FAILED(MFCreateMemoryBuffer(len, &input_buffer))) { vlog.error("Could not allocate media buffer"); return; } input_buffer->SetCurrentLength(len); input_sample->RemoveAllBuffers(); input_sample->AddBuffer(input_buffer); } BYTE* locked; input_buffer->Lock(&locked, NULL, NULL); memcpy(locked, h264_buffer, len); input_buffer->Unlock(); vlog.debug("Received %u bytes, decoding", len); // extract actual size, including possible cropping ParseSPS(h264_buffer, len); if (FAILED(decoder->ProcessInput(0, input_sample, 0))) { vlog.error("Error sending a packet to decoding"); return; } bool decoded = false; // try to retrieve all decoded output, as input can submit multiple h264 packets in one buffer for (;;) { DWORD curlen; decoded_buffer->GetCurrentLength(&curlen); decoded_buffer->SetCurrentLength(0); MFT_OUTPUT_DATA_BUFFER decoded_data; decoded_data.dwStreamID = 0; decoded_data.pSample = decoded_sample; decoded_data.dwStatus = 0; decoded_data.pEvents = NULL; DWORD status; HRESULT hr = decoder->ProcessOutput(0, 1, &decoded_data, &status); SAFE_RELEASE(decoded_data.pEvents) if (SUCCEEDED(hr)) { vlog.debug("Frame decoded"); // successfully decoded next frame // but do not exit loop, try again if there is next frame decoded = true; } else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { // no more frames to decode if (decoded) { // restore previous buffer length for converter decoded_buffer->SetCurrentLength(curlen); } break; } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { // something changed (resolution, framerate, h264 properties...) // need to setup output type and try decoding again DWORD output_index = 0; IMFMediaType* output_type = NULL; while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type))) { GUID subtype; if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12) { decoder->SetOutputType(0, output_type, 0); break; } output_type->Release(); output_type = NULL; } // reinitialize output type (NV12) that now has correct properties (width/height/framerate) decoder->SetOutputType(0, output_type, 0); UINT32 width, height; if FAILED(MFGetAttributeSize(output_type, MF_MT_FRAME_SIZE, &width, &height)) { vlog.error("Error getting output type size"); output_type->Release(); break; } // if MFT reports different width or height than calculated cropped width/height if (crop_width != 0 && crop_height != 0 && (width != crop_width || height != crop_height)) { // create NV12/RGB image with full size as we'll do manual cropping width = full_width; height = full_height; } else { // no manual cropping necessary offset_x = offset_y = 0; crop_width = width; crop_height = height; } vlog.debug("Setting up decoded output with %ux%u size", crop_width, crop_height); // input type to converter, BGRX pixel format IMFMediaType* converted_type; if (FAILED(MFCreateMediaType(&converted_type))) { vlog.error("Error creating media type"); } else { converted_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); converted_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32); converted_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); MFSetAttributeSize(converted_type, MF_MT_FRAME_SIZE, full_width, full_height); MFGetStrideForBitmapInfoHeader(MFVideoFormat_RGB32.Data1, full_width, &stride); // bottom-up stride = -stride; converted_type->SetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32)stride); // setup NV12 -> BGRX converter converter->SetOutputType(0, converted_type, 0); converter->SetInputType(0, output_type, 0); converted_type->Release(); // create converter output buffer MFT_OUTPUT_STREAM_INFO info; converter->GetOutputStreamInfo(0, &info); if (FAILED(MFCreateMemoryBuffer(info.cbSize, &converted_buffer))) { vlog.error("Error creating media buffer"); } else { converted_sample->AddBuffer(converted_buffer); } } output_type->Release(); } } // we care only about final image // we ignore previous images if decoded multiple in a row if (decoded) { if (FAILED(converter->ProcessInput(0, decoded_sample, 0))) { vlog.error("Error sending a packet to converter"); return; } MFT_OUTPUT_DATA_BUFFER converted_data; converted_data.dwStreamID = 0; converted_data.pSample = converted_sample; converted_data.dwStatus = 0; converted_data.pEvents = NULL; DWORD status; HRESULT hr = converter->ProcessOutput(0, 1, &converted_data, &status); SAFE_RELEASE(converted_data.pEvents) if (FAILED(hr)) { vlog.error("Error converting to RGB"); } else { vlog.debug("Frame converted to RGB"); BYTE* out; DWORD len; converted_buffer->Lock(&out, NULL, &len); pb->imageRect(rect, out + offset_y * stride + offset_x * 4, (int)stride / 4); converted_buffer->Unlock(); } } } // "7.3.2.1.1 Sequence parameter set data syntax" on page 66 of https://www.itu.int/rec/T-REC-H.264-202108-I/en void H264WinDecoderContext::ParseSPS(const uint8_t* buffer, int length) { #define EXPECT(cond) if (!(cond)) return; #define GET_BIT(bit) do { \ if (available == 0) \ { \ if (length == 0) return; \ byte = *buffer++; \ length--; \ available = 8; \ } \ bit = (byte >> --available) & 1; \ } while (0) #define GET_BITS(n, var) do { \ var = 0; \ for (int i = n-1; i >= 0; i--) \ { \ unsigned bit; \ GET_BIT(bit); \ var |= bit << i; \ } \ } while (0) // "9.1 Parsing process for Exp-Golomb codes" on page 231 #define GET_UE(var) do { \ int zeroes = -1; \ for (unsigned bit = 0; !bit; zeroes++) \ GET_BIT(bit); \ GET_BITS(zeroes, var); \ var += (1U << zeroes) - 1; \ } while(0) #define SKIP_UE() do { \ unsigned var; \ GET_UE(var); \ } while (0) #define SKIP_BITS(bits) do { \ unsigned var; \ GET_BITS(bits, var); \ } while (0) // check for NAL header EXPECT((length >= 3 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 1) || (length >= 4 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0 && buffer[3] == 1)); length -= 4 - buffer[2]; buffer += 4 - buffer[2]; // NAL unit type EXPECT(length > 1); uint8_t type = buffer[0]; EXPECT((type & 0x80) == 0); // forbidden zero bit EXPECT((type & 0x1f) == 7); // SPS NAL unit type buffer++; length--; int available = 0; uint8_t byte = 0; unsigned profile_idc; unsigned seq_parameter_set_id; GET_BITS(8, profile_idc); SKIP_BITS(6); // constraint_set0..5_flag SKIP_BITS(2); // reserved_zero_2bits SKIP_BITS(8); // level_idc GET_UE(seq_parameter_set_id); unsigned chroma_format_idc = 1; if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || profile_idc == 138 || profile_idc == 139 || profile_idc == 134 || profile_idc == 135) { GET_UE(chroma_format_idc); if (chroma_format_idc == 3) { SKIP_BITS(1); // separate_colour_plane_flag } SKIP_UE(); // bit_depth_luma_minus8 SKIP_UE(); // bit_depth_chroma_minus8; SKIP_BITS(1); // qpprime_y_zero_transform_bypass_flag unsigned seq_scaling_matrix_present_flag; GET_BITS(1, seq_scaling_matrix_present_flag); if (seq_scaling_matrix_present_flag) { for (int i = 0; i < (chroma_format_idc != 3 ? 8 : 12); i++) { int seq_scaling_list_present_flag; GET_BITS(1, seq_scaling_list_present_flag); for (int j = 0; j < (seq_scaling_list_present_flag ? 16 : 64); j++) { SKIP_UE(); // delta_scale; } } } } unsigned log2_max_frame_num_minus4; GET_UE(log2_max_frame_num_minus4); // log2_max_frame_num_minus4 unsigned pic_order_cnt_type; GET_UE(pic_order_cnt_type); if (pic_order_cnt_type == 0) { SKIP_UE(); // log2_max_pic_order_cnt_lsb_minus4 } else if (pic_order_cnt_type == 1) { SKIP_BITS(1); // delta_pic_order_always_zero_flag SKIP_UE(); // offset_for_non_ref_pic SKIP_UE(); // offset_for_top_to_bottom_field unsigned num_ref_frames_in_pic_order_cnt_cycle; GET_UE(num_ref_frames_in_pic_order_cnt_cycle); for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { SKIP_UE(); // offset_for_ref_frame } } SKIP_UE(); // max_num_ref_frames SKIP_BITS(1); // gaps_in_frame_num_value_allowed_flag unsigned pic_width_in_mbs_minus1; GET_UE(pic_width_in_mbs_minus1); unsigned pic_height_in_map_units_minus1; GET_UE(pic_height_in_map_units_minus1); unsigned frame_mbs_only_flag; GET_BITS(1, frame_mbs_only_flag); if (!frame_mbs_only_flag) { SKIP_BITS(1); // mb_adaptive_frame_field_flag } SKIP_BITS(1); // direct_8x8_inference_flag unsigned frame_cropping_flag; GET_BITS(1, frame_cropping_flag); unsigned frame_crop_left_offset = 0; unsigned frame_crop_right_offset = 0; unsigned frame_crop_top_offset = 0; unsigned frame_crop_bottom_offset = 0; if (frame_cropping_flag) { GET_UE(frame_crop_left_offset); GET_UE(frame_crop_right_offset); GET_UE(frame_crop_top_offset); GET_UE(frame_crop_bottom_offset); } // ignore rest of bits full_width = 16 * (pic_width_in_mbs_minus1 + 1); full_height = 16 * (pic_height_in_map_units_minus1 + 1) * (2 - frame_mbs_only_flag); // "6.2 Source, decoded, and output picture formats" on page 44 unsigned sub_width_c = (chroma_format_idc == 1 || chroma_format_idc == 2) ? 2 : 1; unsigned sub_height_c = (chroma_format_idc == 1) ? 2 : 1; // page 101 unsigned crop_unit_x = chroma_format_idc == 0 ? 1 : sub_width_c; unsigned crop_unit_y = chroma_format_idc == 0 ? 2 - frame_mbs_only_flag : sub_height_c * (2 - frame_mbs_only_flag); crop_width = full_width - crop_unit_x * (frame_crop_right_offset + frame_crop_left_offset); crop_height = full_height - crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset); offset_x = frame_crop_left_offset; offset_y = frame_crop_bottom_offset; vlog.debug("SPS parsing - full=%dx%d, cropped=%dx%d, offset=%d,%d", full_width, full_height, crop_width, crop_height, offset_x, offset_y); #undef SKIP_BITS #undef SKIP_UE #undef GET_BITS #undef GET_BIT #undef GET_UE #undef EXPECT }