You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

H264WinDecoderContext.cxx 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /* Copyright (C) 2021 Vladimir Sukhonosov <xornet@xornet.org>
  2. * Copyright (C) 2021 Martins Mozeiko <martins.mozeiko@gmail.com>
  3. * All Rights Reserved.
  4. *
  5. * This is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This software is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this software; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  18. * USA.
  19. */
  20. #include <mfapi.h>
  21. #include <mferror.h>
  22. #include <wmcodecdsp.h>
  23. #define SAFE_RELEASE(obj) if (obj) { obj->Release(); obj = NULL; }
  24. #include <os/Mutex.h>
  25. #include <rfb/LogWriter.h>
  26. #include <rfb/PixelBuffer.h>
  27. #include <rfb/H264WinDecoderContext.h>
  28. using namespace rfb;
  29. static LogWriter vlog("H264WinDecoderContext");
  30. bool H264WinDecoderContext::initCodec() {
  31. os::AutoMutex lock(&mutex);
  32. if (FAILED(MFStartup(MF_VERSION, MFSTARTUP_LITE)))
  33. {
  34. vlog.error("Could not initialize MediaFoundation");
  35. return false;
  36. }
  37. if (FAILED(CoCreateInstance(CLSID_CMSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&decoder)))
  38. {
  39. vlog.error("MediaFoundation H264 codec not found");
  40. return false;
  41. }
  42. GUID CLSID_VideoProcessorMFT = { 0x88753b26, 0x5b24, 0x49bd, { 0xb2, 0xe7, 0xc, 0x44, 0x5c, 0x78, 0xc9, 0x82 } };
  43. if (FAILED(CoCreateInstance(CLSID_VideoProcessorMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter)))
  44. {
  45. vlog.error("Cannot create MediaFoundation Video Processor (available only on Windows 8+). Trying ColorConvert DMO.");
  46. if (FAILED(CoCreateInstance(CLSID_CColorConvertDMO, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter)))
  47. {
  48. decoder->Release();
  49. vlog.error("ColorConvert DMO not found");
  50. return false;
  51. }
  52. }
  53. // if possible, enable low-latency decoding (Windows 8 and up)
  54. IMFAttributes* attributes;
  55. if (SUCCEEDED(decoder->GetAttributes(&attributes)))
  56. {
  57. GUID MF_LOW_LATENCY = { 0x9c27891a, 0xed7a, 0x40e1, { 0x88, 0xe8, 0xb2, 0x27, 0x27, 0xa0, 0x24, 0xee } };
  58. if (SUCCEEDED(attributes->SetUINT32(MF_LOW_LATENCY, TRUE)))
  59. {
  60. vlog.info("Enabled low latency mode");
  61. }
  62. attributes->Release();
  63. }
  64. // set decoder input type
  65. IMFMediaType* input_type;
  66. if (FAILED(MFCreateMediaType(&input_type)))
  67. {
  68. decoder->Release();
  69. converter->Release();
  70. vlog.error("Could not create MF MediaType");
  71. return false;
  72. }
  73. input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
  74. input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
  75. decoder->SetInputType(0, input_type, 0);
  76. input_type->Release();
  77. // set decoder output type (NV12)
  78. DWORD output_index = 0;
  79. IMFMediaType* output_type = NULL;
  80. while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type)))
  81. {
  82. GUID subtype;
  83. if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12)
  84. {
  85. decoder->SetOutputType(0, output_type, 0);
  86. output_type->Release();
  87. break;
  88. }
  89. output_type->Release();
  90. }
  91. if (FAILED(decoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0)))
  92. {
  93. decoder->Release();
  94. converter->Release();
  95. input_type->Release();
  96. vlog.error("Could not start H264 decoder");
  97. return false;
  98. }
  99. MFT_OUTPUT_STREAM_INFO info;
  100. decoder->GetOutputStreamInfo(0, &info);
  101. if (FAILED(MFCreateSample(&input_sample)) ||
  102. FAILED(MFCreateSample(&decoded_sample)) ||
  103. FAILED(MFCreateSample(&converted_sample)) ||
  104. FAILED(MFCreateMemoryBuffer(4 * 1024 * 1024, &input_buffer)) ||
  105. FAILED(MFCreateMemoryBuffer(info.cbSize, &decoded_buffer)))
  106. {
  107. decoder->Release();
  108. converter->Release();
  109. input_type->Release();
  110. SAFE_RELEASE(input_sample);
  111. SAFE_RELEASE(decoded_sample);
  112. SAFE_RELEASE(converted_sample);
  113. SAFE_RELEASE(input_buffer);
  114. SAFE_RELEASE(decoded_buffer);
  115. vlog.error("Could not allocate media samples/buffers");
  116. return false;
  117. }
  118. input_sample->AddBuffer(input_buffer);
  119. decoded_sample->AddBuffer(decoded_buffer);
  120. initialized = true;
  121. return true;
  122. }
  123. void H264WinDecoderContext::freeCodec() {
  124. os::AutoMutex lock(&mutex);
  125. if (!initialized)
  126. return;
  127. SAFE_RELEASE(decoder)
  128. SAFE_RELEASE(converter)
  129. SAFE_RELEASE(input_sample)
  130. SAFE_RELEASE(decoded_sample)
  131. SAFE_RELEASE(converted_sample)
  132. SAFE_RELEASE(input_buffer)
  133. SAFE_RELEASE(decoded_buffer)
  134. SAFE_RELEASE(converted_buffer)
  135. MFShutdown();
  136. initialized = false;
  137. }
  138. void H264WinDecoderContext::decode(const rdr::U8* h264_buffer, rdr::U32 len, rdr::U32 flags, ModifiablePixelBuffer* pb) {
  139. os::AutoMutex lock(&mutex);
  140. if (!initialized)
  141. return;
  142. if (FAILED(input_buffer->SetCurrentLength(len)))
  143. {
  144. input_buffer->Release();
  145. if (FAILED(MFCreateMemoryBuffer(len, &input_buffer)))
  146. {
  147. vlog.error("Could not allocate media buffer");
  148. return;
  149. }
  150. input_buffer->SetCurrentLength(len);
  151. input_sample->RemoveAllBuffers();
  152. input_sample->AddBuffer(input_buffer);
  153. }
  154. BYTE* locked;
  155. input_buffer->Lock(&locked, NULL, NULL);
  156. memcpy(locked, h264_buffer, len);
  157. input_buffer->Unlock();
  158. vlog.debug("Received %u bytes, decoding", len);
  159. // extract actual size, including possible cropping
  160. ParseSPS(h264_buffer, len);
  161. if (FAILED(decoder->ProcessInput(0, input_sample, 0)))
  162. {
  163. vlog.error("Error sending a packet to decoding");
  164. return;
  165. }
  166. bool decoded = false;
  167. // try to retrieve all decoded output, as input can submit multiple h264 packets in one buffer
  168. for (;;)
  169. {
  170. DWORD curlen;
  171. decoded_buffer->GetCurrentLength(&curlen);
  172. decoded_buffer->SetCurrentLength(0);
  173. MFT_OUTPUT_DATA_BUFFER decoded_data;
  174. decoded_data.dwStreamID = 0;
  175. decoded_data.pSample = decoded_sample;
  176. decoded_data.dwStatus = 0;
  177. decoded_data.pEvents = NULL;
  178. DWORD status;
  179. HRESULT hr = decoder->ProcessOutput(0, 1, &decoded_data, &status);
  180. SAFE_RELEASE(decoded_data.pEvents)
  181. if (SUCCEEDED(hr))
  182. {
  183. vlog.debug("Frame decoded");
  184. // successfully decoded next frame
  185. // but do not exit loop, try again if there is next frame
  186. decoded = true;
  187. }
  188. else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
  189. {
  190. // no more frames to decode
  191. if (decoded)
  192. {
  193. // restore previous buffer length for converter
  194. decoded_buffer->SetCurrentLength(curlen);
  195. }
  196. break;
  197. }
  198. else if (hr == MF_E_TRANSFORM_STREAM_CHANGE)
  199. {
  200. // something changed (resolution, framerate, h264 properties...)
  201. // need to setup output type and try decoding again
  202. DWORD output_index = 0;
  203. IMFMediaType* output_type = NULL;
  204. while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type)))
  205. {
  206. GUID subtype;
  207. if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12)
  208. {
  209. decoder->SetOutputType(0, output_type, 0);
  210. break;
  211. }
  212. output_type->Release();
  213. output_type = NULL;
  214. }
  215. // reinitialize output type (NV12) that now has correct properties (width/height/framerate)
  216. decoder->SetOutputType(0, output_type, 0);
  217. UINT32 width, height;
  218. if FAILED(MFGetAttributeSize(output_type, MF_MT_FRAME_SIZE, &width, &height))
  219. {
  220. vlog.error("Error getting output type size");
  221. output_type->Release();
  222. break;
  223. }
  224. // if MFT reports different width or height than calculated cropped width/height
  225. if (crop_width != 0 && crop_height != 0 && (width != crop_width || height != crop_height))
  226. {
  227. // create NV12/RGB image with full size as we'll do manual cropping
  228. width = full_width;
  229. height = full_height;
  230. }
  231. else
  232. {
  233. // no manual cropping necessary
  234. offset_x = offset_y = 0;
  235. crop_width = width;
  236. crop_height = height;
  237. }
  238. vlog.debug("Setting up decoded output with %ux%u size", crop_width, crop_height);
  239. // input type to converter, BGRX pixel format
  240. IMFMediaType* converted_type;
  241. if (FAILED(MFCreateMediaType(&converted_type)))
  242. {
  243. vlog.error("Error creating media type");
  244. }
  245. else
  246. {
  247. converted_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
  248. converted_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
  249. converted_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
  250. MFSetAttributeSize(converted_type, MF_MT_FRAME_SIZE, full_width, full_height);
  251. MFGetStrideForBitmapInfoHeader(MFVideoFormat_RGB32.Data1, full_width, &stride);
  252. // bottom-up
  253. stride = -stride;
  254. converted_type->SetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32)stride);
  255. // setup NV12 -> BGRX converter
  256. converter->SetOutputType(0, converted_type, 0);
  257. converter->SetInputType(0, output_type, 0);
  258. converted_type->Release();
  259. // create converter output buffer
  260. MFT_OUTPUT_STREAM_INFO info;
  261. converter->GetOutputStreamInfo(0, &info);
  262. if (FAILED(MFCreateMemoryBuffer(info.cbSize, &converted_buffer)))
  263. {
  264. vlog.error("Error creating media buffer");
  265. }
  266. else
  267. {
  268. converted_sample->AddBuffer(converted_buffer);
  269. }
  270. }
  271. output_type->Release();
  272. }
  273. }
  274. // we care only about final image
  275. // we ignore previous images if decoded multiple in a row
  276. if (decoded)
  277. {
  278. if (FAILED(converter->ProcessInput(0, decoded_sample, 0)))
  279. {
  280. vlog.error("Error sending a packet to converter");
  281. return;
  282. }
  283. MFT_OUTPUT_DATA_BUFFER converted_data;
  284. converted_data.dwStreamID = 0;
  285. converted_data.pSample = converted_sample;
  286. converted_data.dwStatus = 0;
  287. converted_data.pEvents = NULL;
  288. DWORD status;
  289. HRESULT hr = converter->ProcessOutput(0, 1, &converted_data, &status);
  290. SAFE_RELEASE(converted_data.pEvents)
  291. if (FAILED(hr))
  292. {
  293. vlog.error("Error converting to RGB");
  294. }
  295. else
  296. {
  297. vlog.debug("Frame converted to RGB");
  298. BYTE* out;
  299. DWORD len;
  300. converted_buffer->Lock(&out, NULL, &len);
  301. pb->imageRect(rect, out + offset_y * stride + offset_x * 4, (int)stride / 4);
  302. converted_buffer->Unlock();
  303. }
  304. }
  305. }
  306. // "7.3.2.1.1 Sequence parameter set data syntax" on page 66 of https://www.itu.int/rec/T-REC-H.264-202108-I/en
  307. void H264WinDecoderContext::ParseSPS(const rdr::U8* buffer, int length)
  308. {
  309. #define EXPECT(cond) if (!(cond)) return;
  310. #define GET_BIT(bit) do { \
  311. if (available == 0) \
  312. { \
  313. if (length == 0) return; \
  314. byte = *buffer++; \
  315. length--; \
  316. available = 8; \
  317. } \
  318. bit = (byte >> --available) & 1; \
  319. } while (0)
  320. #define GET_BITS(n, var) do { \
  321. var = 0; \
  322. for (int i = n-1; i >= 0; i--) \
  323. { \
  324. unsigned bit; \
  325. GET_BIT(bit); \
  326. var |= bit << i; \
  327. } \
  328. } while (0)
  329. // "9.1 Parsing process for Exp-Golomb codes" on page 231
  330. #define GET_UE(var) do { \
  331. int zeroes = -1; \
  332. for (unsigned bit = 0; !bit; zeroes++) \
  333. GET_BIT(bit); \
  334. GET_BITS(zeroes, var); \
  335. var += (1U << zeroes) - 1; \
  336. } while(0)
  337. #define SKIP_UE() do { \
  338. unsigned var; \
  339. GET_UE(var); \
  340. } while (0)
  341. #define SKIP_BITS(bits) do { \
  342. unsigned var; \
  343. GET_BITS(bits, var); \
  344. } while (0)
  345. // check for NAL header
  346. EXPECT((length >= 3 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 1) ||
  347. (length >= 4 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0 && buffer[3] == 1));
  348. length -= 4 - buffer[2];
  349. buffer += 4 - buffer[2];
  350. // NAL unit type
  351. EXPECT(length > 1);
  352. rdr::U8 type = buffer[0];
  353. EXPECT((type & 0x80) == 0); // forbidden zero bit
  354. EXPECT((type & 0x1f) == 7); // SPS NAL unit type
  355. buffer++;
  356. length--;
  357. int available = 0;
  358. rdr::U8 byte = 0;
  359. unsigned profile_idc;
  360. unsigned seq_parameter_set_id;
  361. GET_BITS(8, profile_idc);
  362. SKIP_BITS(6); // constraint_set0..5_flag
  363. SKIP_BITS(2); // reserved_zero_2bits
  364. SKIP_BITS(8); // level_idc
  365. GET_UE(seq_parameter_set_id);
  366. unsigned chroma_format_idc = 1;
  367. if (profile_idc == 100 || profile_idc == 110 ||
  368. profile_idc == 122 || profile_idc == 244 ||
  369. profile_idc == 44 || profile_idc == 83 ||
  370. profile_idc == 86 || profile_idc == 118 ||
  371. profile_idc == 128 || profile_idc == 138 ||
  372. profile_idc == 139 || profile_idc == 134 ||
  373. profile_idc == 135)
  374. {
  375. GET_UE(chroma_format_idc);
  376. if (chroma_format_idc == 3)
  377. {
  378. SKIP_BITS(1); // separate_colour_plane_flag
  379. }
  380. SKIP_UE(); // bit_depth_luma_minus8
  381. SKIP_UE(); // bit_depth_chroma_minus8;
  382. SKIP_BITS(1); // qpprime_y_zero_transform_bypass_flag
  383. unsigned seq_scaling_matrix_present_flag;
  384. GET_BITS(1, seq_scaling_matrix_present_flag);
  385. if (seq_scaling_matrix_present_flag)
  386. {
  387. for (int i = 0; i < (chroma_format_idc != 3 ? 8 : 12); i++)
  388. {
  389. int seq_scaling_list_present_flag;
  390. GET_BITS(1, seq_scaling_list_present_flag);
  391. for (int j = 0; j < (seq_scaling_list_present_flag ? 16 : 64); j++)
  392. {
  393. SKIP_UE(); // delta_scale;
  394. }
  395. }
  396. }
  397. }
  398. unsigned log2_max_frame_num_minus4;
  399. GET_UE(log2_max_frame_num_minus4); // log2_max_frame_num_minus4
  400. unsigned pic_order_cnt_type;
  401. GET_UE(pic_order_cnt_type);
  402. if (pic_order_cnt_type == 0)
  403. {
  404. SKIP_UE(); // log2_max_pic_order_cnt_lsb_minus4
  405. }
  406. else if (pic_order_cnt_type == 1)
  407. {
  408. SKIP_BITS(1); // delta_pic_order_always_zero_flag
  409. SKIP_UE(); // offset_for_non_ref_pic
  410. SKIP_UE(); // offset_for_top_to_bottom_field
  411. unsigned num_ref_frames_in_pic_order_cnt_cycle;
  412. GET_UE(num_ref_frames_in_pic_order_cnt_cycle);
  413. for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++)
  414. {
  415. SKIP_UE(); // offset_for_ref_frame
  416. }
  417. }
  418. SKIP_UE(); // max_num_ref_frames
  419. SKIP_BITS(1); // gaps_in_frame_num_value_allowed_flag
  420. unsigned pic_width_in_mbs_minus1;
  421. GET_UE(pic_width_in_mbs_minus1);
  422. unsigned pic_height_in_map_units_minus1;
  423. GET_UE(pic_height_in_map_units_minus1);
  424. unsigned frame_mbs_only_flag;
  425. GET_BITS(1, frame_mbs_only_flag);
  426. if (!frame_mbs_only_flag)
  427. {
  428. SKIP_BITS(1); // mb_adaptive_frame_field_flag
  429. }
  430. SKIP_BITS(1); // direct_8x8_inference_flag
  431. unsigned frame_cropping_flag;
  432. GET_BITS(1, frame_cropping_flag);
  433. unsigned frame_crop_left_offset = 0;
  434. unsigned frame_crop_right_offset = 0;
  435. unsigned frame_crop_top_offset = 0;
  436. unsigned frame_crop_bottom_offset = 0;
  437. if (frame_cropping_flag)
  438. {
  439. GET_UE(frame_crop_left_offset);
  440. GET_UE(frame_crop_right_offset);
  441. GET_UE(frame_crop_top_offset);
  442. GET_UE(frame_crop_bottom_offset);
  443. }
  444. // ignore rest of bits
  445. full_width = 16 * (pic_width_in_mbs_minus1 + 1);
  446. full_height = 16 * (pic_height_in_map_units_minus1 + 1) * (2 - frame_mbs_only_flag);
  447. // "6.2 Source, decoded, and output picture formats" on page 44
  448. unsigned sub_width_c = (chroma_format_idc == 1 || chroma_format_idc == 2) ? 2 : 1;
  449. unsigned sub_height_c = (chroma_format_idc == 1) ? 2 : 1;
  450. // page 101
  451. unsigned crop_unit_x = chroma_format_idc == 0 ? 1 : sub_width_c;
  452. unsigned crop_unit_y = chroma_format_idc == 0 ? 2 - frame_mbs_only_flag : sub_height_c * (2 - frame_mbs_only_flag);
  453. crop_width = full_width - crop_unit_x * (frame_crop_right_offset + frame_crop_left_offset);
  454. crop_height = full_height - crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset);
  455. offset_x = frame_crop_left_offset;
  456. offset_y = frame_crop_bottom_offset;
  457. vlog.debug("SPS parsing - full=%dx%d, cropped=%dx%d, offset=%d,%d", full_width, full_height, crop_width, crop_height, offset_x, offset_y);
  458. #undef SKIP_BITS
  459. #undef SKIP_UE
  460. #undef GET_BITS
  461. #undef GET_BIT
  462. #undef GET_UE
  463. #undef EXPECT
  464. }