You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

H264WinDecoderContext.cxx 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. /* Copyright (C) 2021 Vladimir Sukhonosov <xornet@xornet.org>
  2. * Copyright (C) 2021 Martins Mozeiko <martins.mozeiko@gmail.com>
  3. * All Rights Reserved.
  4. *
  5. * This is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This software is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this software; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  18. * USA.
  19. */
  20. #include <mfapi.h>
  21. #include <mferror.h>
  22. #include <wmcodecdsp.h>
  23. #define SAFE_RELEASE(obj) if (obj) { obj->Release(); obj = NULL; }
  24. #include <os/Mutex.h>
  25. #include <rfb/LogWriter.h>
  26. #include <rfb/PixelBuffer.h>
  27. #include <rfb/H264WinDecoderContext.h>
  28. using namespace rfb;
  29. static LogWriter vlog("H264WinDecoderContext");
  30. bool H264WinDecoderContext::initCodec() {
  31. os::AutoMutex lock(&mutex);
  32. if (FAILED(MFStartup(MF_VERSION, MFSTARTUP_LITE)))
  33. {
  34. vlog.error("Could not initialize MediaFoundation");
  35. return false;
  36. }
  37. if (FAILED(CoCreateInstance(CLSID_CMSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&decoder)))
  38. {
  39. vlog.error("MediaFoundation H264 codec not found");
  40. return false;
  41. }
  42. GUID CLSID_VideoProcessorMFT = { 0x88753b26, 0x5b24, 0x49bd, { 0xb2, 0xe7, 0xc, 0x44, 0x5c, 0x78, 0xc9, 0x82 } };
  43. if (FAILED(CoCreateInstance(CLSID_VideoProcessorMFT, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter)))
  44. {
  45. vlog.error("Cannot create MediaFoundation Video Processor (available only on Windows 8+). Trying ColorConvert DMO.");
  46. if (FAILED(CoCreateInstance(CLSID_CColorConvertDMO, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, (LPVOID*)&converter)))
  47. {
  48. decoder->Release();
  49. vlog.error("ColorConvert DMO not found");
  50. return false;
  51. }
  52. }
  53. // if possible, enable low-latency decoding (Windows 8 and up)
  54. IMFAttributes* attributes;
  55. if (SUCCEEDED(decoder->GetAttributes(&attributes)))
  56. {
  57. GUID MF_LOW_LATENCY = { 0x9c27891a, 0xed7a, 0x40e1, { 0x88, 0xe8, 0xb2, 0x27, 0x27, 0xa0, 0x24, 0xee } };
  58. if (SUCCEEDED(attributes->SetUINT32(MF_LOW_LATENCY, TRUE)))
  59. {
  60. vlog.info("Enabled low latency mode");
  61. }
  62. attributes->Release();
  63. }
  64. // set decoder input type
  65. IMFMediaType* input_type;
  66. if (FAILED(MFCreateMediaType(&input_type)))
  67. {
  68. decoder->Release();
  69. converter->Release();
  70. vlog.error("Could not create MF MediaType");
  71. return false;
  72. }
  73. input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
  74. input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
  75. decoder->SetInputType(0, input_type, 0);
  76. input_type->Release();
  77. // set decoder output type (NV12)
  78. DWORD output_index = 0;
  79. IMFMediaType* output_type = NULL;
  80. while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type)))
  81. {
  82. GUID subtype;
  83. if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12)
  84. {
  85. decoder->SetOutputType(0, output_type, 0);
  86. output_type->Release();
  87. break;
  88. }
  89. output_type->Release();
  90. }
  91. if (FAILED(decoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0)))
  92. {
  93. decoder->Release();
  94. converter->Release();
  95. input_type->Release();
  96. vlog.error("Could not start H264 decoder");
  97. return false;
  98. }
  99. MFT_OUTPUT_STREAM_INFO info;
  100. decoder->GetOutputStreamInfo(0, &info);
  101. if (FAILED(MFCreateSample(&input_sample)) ||
  102. FAILED(MFCreateSample(&decoded_sample)) ||
  103. FAILED(MFCreateSample(&converted_sample)) ||
  104. FAILED(MFCreateMemoryBuffer(4 * 1024 * 1024, &input_buffer)) ||
  105. FAILED(MFCreateMemoryBuffer(info.cbSize, &decoded_buffer)))
  106. {
  107. decoder->Release();
  108. converter->Release();
  109. input_type->Release();
  110. SAFE_RELEASE(input_sample);
  111. SAFE_RELEASE(decoded_sample);
  112. SAFE_RELEASE(converted_sample);
  113. SAFE_RELEASE(input_buffer);
  114. SAFE_RELEASE(decoded_buffer);
  115. vlog.error("Could not allocate media samples/buffers");
  116. return false;
  117. }
  118. input_sample->AddBuffer(input_buffer);
  119. decoded_sample->AddBuffer(decoded_buffer);
  120. initialized = true;
  121. return true;
  122. }
  123. void H264WinDecoderContext::freeCodec() {
  124. os::AutoMutex lock(&mutex);
  125. if (!initialized)
  126. return;
  127. SAFE_RELEASE(decoder)
  128. SAFE_RELEASE(converter)
  129. SAFE_RELEASE(input_sample)
  130. SAFE_RELEASE(decoded_sample)
  131. SAFE_RELEASE(converted_sample)
  132. SAFE_RELEASE(input_buffer)
  133. SAFE_RELEASE(decoded_buffer)
  134. SAFE_RELEASE(converted_buffer)
  135. MFShutdown();
  136. initialized = false;
  137. }
  138. void H264WinDecoderContext::decode(const uint8_t* h264_buffer,
  139. uint32_t len,
  140. ModifiablePixelBuffer* pb) {
  141. os::AutoMutex lock(&mutex);
  142. if (!initialized)
  143. return;
  144. if (FAILED(input_buffer->SetCurrentLength(len)))
  145. {
  146. input_buffer->Release();
  147. if (FAILED(MFCreateMemoryBuffer(len, &input_buffer)))
  148. {
  149. vlog.error("Could not allocate media buffer");
  150. return;
  151. }
  152. input_buffer->SetCurrentLength(len);
  153. input_sample->RemoveAllBuffers();
  154. input_sample->AddBuffer(input_buffer);
  155. }
  156. BYTE* locked;
  157. input_buffer->Lock(&locked, NULL, NULL);
  158. memcpy(locked, h264_buffer, len);
  159. input_buffer->Unlock();
  160. vlog.debug("Received %u bytes, decoding", len);
  161. // extract actual size, including possible cropping
  162. ParseSPS(h264_buffer, len);
  163. if (FAILED(decoder->ProcessInput(0, input_sample, 0)))
  164. {
  165. vlog.error("Error sending a packet to decoding");
  166. return;
  167. }
  168. bool decoded = false;
  169. // try to retrieve all decoded output, as input can submit multiple h264 packets in one buffer
  170. for (;;)
  171. {
  172. DWORD curlen;
  173. decoded_buffer->GetCurrentLength(&curlen);
  174. decoded_buffer->SetCurrentLength(0);
  175. MFT_OUTPUT_DATA_BUFFER decoded_data;
  176. decoded_data.dwStreamID = 0;
  177. decoded_data.pSample = decoded_sample;
  178. decoded_data.dwStatus = 0;
  179. decoded_data.pEvents = NULL;
  180. DWORD status;
  181. HRESULT hr = decoder->ProcessOutput(0, 1, &decoded_data, &status);
  182. SAFE_RELEASE(decoded_data.pEvents)
  183. if (SUCCEEDED(hr))
  184. {
  185. vlog.debug("Frame decoded");
  186. // successfully decoded next frame
  187. // but do not exit loop, try again if there is next frame
  188. decoded = true;
  189. }
  190. else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
  191. {
  192. // no more frames to decode
  193. if (decoded)
  194. {
  195. // restore previous buffer length for converter
  196. decoded_buffer->SetCurrentLength(curlen);
  197. }
  198. break;
  199. }
  200. else if (hr == MF_E_TRANSFORM_STREAM_CHANGE)
  201. {
  202. // something changed (resolution, framerate, h264 properties...)
  203. // need to setup output type and try decoding again
  204. DWORD output_index = 0;
  205. IMFMediaType* output_type = NULL;
  206. while (SUCCEEDED(decoder->GetOutputAvailableType(0, output_index++, &output_type)))
  207. {
  208. GUID subtype;
  209. if (SUCCEEDED(output_type->GetGUID(MF_MT_SUBTYPE, &subtype)) && subtype == MFVideoFormat_NV12)
  210. {
  211. decoder->SetOutputType(0, output_type, 0);
  212. break;
  213. }
  214. output_type->Release();
  215. output_type = NULL;
  216. }
  217. // reinitialize output type (NV12) that now has correct properties (width/height/framerate)
  218. decoder->SetOutputType(0, output_type, 0);
  219. UINT32 width, height;
  220. if FAILED(MFGetAttributeSize(output_type, MF_MT_FRAME_SIZE, &width, &height))
  221. {
  222. vlog.error("Error getting output type size");
  223. output_type->Release();
  224. break;
  225. }
  226. // if MFT reports different width or height than calculated cropped width/height
  227. if (crop_width != 0 && crop_height != 0 && (width != crop_width || height != crop_height))
  228. {
  229. // create NV12/RGB image with full size as we'll do manual cropping
  230. width = full_width;
  231. height = full_height;
  232. }
  233. else
  234. {
  235. // no manual cropping necessary
  236. offset_x = offset_y = 0;
  237. crop_width = width;
  238. crop_height = height;
  239. }
  240. vlog.debug("Setting up decoded output with %ux%u size", crop_width, crop_height);
  241. // input type to converter, BGRX pixel format
  242. IMFMediaType* converted_type;
  243. if (FAILED(MFCreateMediaType(&converted_type)))
  244. {
  245. vlog.error("Error creating media type");
  246. }
  247. else
  248. {
  249. converted_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
  250. converted_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
  251. converted_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
  252. MFSetAttributeSize(converted_type, MF_MT_FRAME_SIZE, full_width, full_height);
  253. MFGetStrideForBitmapInfoHeader(MFVideoFormat_RGB32.Data1, full_width, &stride);
  254. // bottom-up
  255. stride = -stride;
  256. converted_type->SetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32)stride);
  257. // setup NV12 -> BGRX converter
  258. converter->SetOutputType(0, converted_type, 0);
  259. converter->SetInputType(0, output_type, 0);
  260. converted_type->Release();
  261. // create converter output buffer
  262. MFT_OUTPUT_STREAM_INFO info;
  263. converter->GetOutputStreamInfo(0, &info);
  264. if (FAILED(MFCreateMemoryBuffer(info.cbSize, &converted_buffer)))
  265. {
  266. vlog.error("Error creating media buffer");
  267. }
  268. else
  269. {
  270. converted_sample->AddBuffer(converted_buffer);
  271. }
  272. }
  273. output_type->Release();
  274. }
  275. }
  276. // we care only about final image
  277. // we ignore previous images if decoded multiple in a row
  278. if (decoded)
  279. {
  280. if (FAILED(converter->ProcessInput(0, decoded_sample, 0)))
  281. {
  282. vlog.error("Error sending a packet to converter");
  283. return;
  284. }
  285. MFT_OUTPUT_DATA_BUFFER converted_data;
  286. converted_data.dwStreamID = 0;
  287. converted_data.pSample = converted_sample;
  288. converted_data.dwStatus = 0;
  289. converted_data.pEvents = NULL;
  290. DWORD status;
  291. HRESULT hr = converter->ProcessOutput(0, 1, &converted_data, &status);
  292. SAFE_RELEASE(converted_data.pEvents)
  293. if (FAILED(hr))
  294. {
  295. vlog.error("Error converting to RGB");
  296. }
  297. else
  298. {
  299. vlog.debug("Frame converted to RGB");
  300. BYTE* out;
  301. DWORD len;
  302. converted_buffer->Lock(&out, NULL, &len);
  303. pb->imageRect(rect, out + offset_y * stride + offset_x * 4, (int)stride / 4);
  304. converted_buffer->Unlock();
  305. }
  306. }
  307. }
  308. // "7.3.2.1.1 Sequence parameter set data syntax" on page 66 of https://www.itu.int/rec/T-REC-H.264-202108-I/en
  309. void H264WinDecoderContext::ParseSPS(const uint8_t* buffer, int length)
  310. {
  311. #define EXPECT(cond) if (!(cond)) return;
  312. #define GET_BIT(bit) do { \
  313. if (available == 0) \
  314. { \
  315. if (length == 0) return; \
  316. byte = *buffer++; \
  317. length--; \
  318. available = 8; \
  319. } \
  320. bit = (byte >> --available) & 1; \
  321. } while (0)
  322. #define GET_BITS(n, var) do { \
  323. var = 0; \
  324. for (int i = n-1; i >= 0; i--) \
  325. { \
  326. unsigned bit; \
  327. GET_BIT(bit); \
  328. var |= bit << i; \
  329. } \
  330. } while (0)
  331. // "9.1 Parsing process for Exp-Golomb codes" on page 231
  332. #define GET_UE(var) do { \
  333. int zeroes = -1; \
  334. for (unsigned bit = 0; !bit; zeroes++) \
  335. GET_BIT(bit); \
  336. GET_BITS(zeroes, var); \
  337. var += (1U << zeroes) - 1; \
  338. } while(0)
  339. #define SKIP_UE() do { \
  340. unsigned var; \
  341. GET_UE(var); \
  342. } while (0)
  343. #define SKIP_BITS(bits) do { \
  344. unsigned var; \
  345. GET_BITS(bits, var); \
  346. } while (0)
  347. // check for NAL header
  348. EXPECT((length >= 3 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 1) ||
  349. (length >= 4 && buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0 && buffer[3] == 1));
  350. length -= 4 - buffer[2];
  351. buffer += 4 - buffer[2];
  352. // NAL unit type
  353. EXPECT(length > 1);
  354. uint8_t type = buffer[0];
  355. EXPECT((type & 0x80) == 0); // forbidden zero bit
  356. EXPECT((type & 0x1f) == 7); // SPS NAL unit type
  357. buffer++;
  358. length--;
  359. int available = 0;
  360. uint8_t byte = 0;
  361. unsigned profile_idc;
  362. unsigned seq_parameter_set_id;
  363. GET_BITS(8, profile_idc);
  364. SKIP_BITS(6); // constraint_set0..5_flag
  365. SKIP_BITS(2); // reserved_zero_2bits
  366. SKIP_BITS(8); // level_idc
  367. GET_UE(seq_parameter_set_id);
  368. unsigned chroma_format_idc = 1;
  369. if (profile_idc == 100 || profile_idc == 110 ||
  370. profile_idc == 122 || profile_idc == 244 ||
  371. profile_idc == 44 || profile_idc == 83 ||
  372. profile_idc == 86 || profile_idc == 118 ||
  373. profile_idc == 128 || profile_idc == 138 ||
  374. profile_idc == 139 || profile_idc == 134 ||
  375. profile_idc == 135)
  376. {
  377. GET_UE(chroma_format_idc);
  378. if (chroma_format_idc == 3)
  379. {
  380. SKIP_BITS(1); // separate_colour_plane_flag
  381. }
  382. SKIP_UE(); // bit_depth_luma_minus8
  383. SKIP_UE(); // bit_depth_chroma_minus8;
  384. SKIP_BITS(1); // qpprime_y_zero_transform_bypass_flag
  385. unsigned seq_scaling_matrix_present_flag;
  386. GET_BITS(1, seq_scaling_matrix_present_flag);
  387. if (seq_scaling_matrix_present_flag)
  388. {
  389. for (int i = 0; i < (chroma_format_idc != 3 ? 8 : 12); i++)
  390. {
  391. int seq_scaling_list_present_flag;
  392. GET_BITS(1, seq_scaling_list_present_flag);
  393. for (int j = 0; j < (seq_scaling_list_present_flag ? 16 : 64); j++)
  394. {
  395. SKIP_UE(); // delta_scale;
  396. }
  397. }
  398. }
  399. }
  400. unsigned log2_max_frame_num_minus4;
  401. GET_UE(log2_max_frame_num_minus4); // log2_max_frame_num_minus4
  402. unsigned pic_order_cnt_type;
  403. GET_UE(pic_order_cnt_type);
  404. if (pic_order_cnt_type == 0)
  405. {
  406. SKIP_UE(); // log2_max_pic_order_cnt_lsb_minus4
  407. }
  408. else if (pic_order_cnt_type == 1)
  409. {
  410. SKIP_BITS(1); // delta_pic_order_always_zero_flag
  411. SKIP_UE(); // offset_for_non_ref_pic
  412. SKIP_UE(); // offset_for_top_to_bottom_field
  413. unsigned num_ref_frames_in_pic_order_cnt_cycle;
  414. GET_UE(num_ref_frames_in_pic_order_cnt_cycle);
  415. for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++)
  416. {
  417. SKIP_UE(); // offset_for_ref_frame
  418. }
  419. }
  420. SKIP_UE(); // max_num_ref_frames
  421. SKIP_BITS(1); // gaps_in_frame_num_value_allowed_flag
  422. unsigned pic_width_in_mbs_minus1;
  423. GET_UE(pic_width_in_mbs_minus1);
  424. unsigned pic_height_in_map_units_minus1;
  425. GET_UE(pic_height_in_map_units_minus1);
  426. unsigned frame_mbs_only_flag;
  427. GET_BITS(1, frame_mbs_only_flag);
  428. if (!frame_mbs_only_flag)
  429. {
  430. SKIP_BITS(1); // mb_adaptive_frame_field_flag
  431. }
  432. SKIP_BITS(1); // direct_8x8_inference_flag
  433. unsigned frame_cropping_flag;
  434. GET_BITS(1, frame_cropping_flag);
  435. unsigned frame_crop_left_offset = 0;
  436. unsigned frame_crop_right_offset = 0;
  437. unsigned frame_crop_top_offset = 0;
  438. unsigned frame_crop_bottom_offset = 0;
  439. if (frame_cropping_flag)
  440. {
  441. GET_UE(frame_crop_left_offset);
  442. GET_UE(frame_crop_right_offset);
  443. GET_UE(frame_crop_top_offset);
  444. GET_UE(frame_crop_bottom_offset);
  445. }
  446. // ignore rest of bits
  447. full_width = 16 * (pic_width_in_mbs_minus1 + 1);
  448. full_height = 16 * (pic_height_in_map_units_minus1 + 1) * (2 - frame_mbs_only_flag);
  449. // "6.2 Source, decoded, and output picture formats" on page 44
  450. unsigned sub_width_c = (chroma_format_idc == 1 || chroma_format_idc == 2) ? 2 : 1;
  451. unsigned sub_height_c = (chroma_format_idc == 1) ? 2 : 1;
  452. // page 101
  453. unsigned crop_unit_x = chroma_format_idc == 0 ? 1 : sub_width_c;
  454. unsigned crop_unit_y = chroma_format_idc == 0 ? 2 - frame_mbs_only_flag : sub_height_c * (2 - frame_mbs_only_flag);
  455. crop_width = full_width - crop_unit_x * (frame_crop_right_offset + frame_crop_left_offset);
  456. crop_height = full_height - crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset);
  457. offset_x = frame_crop_left_offset;
  458. offset_y = frame_crop_bottom_offset;
  459. vlog.debug("SPS parsing - full=%dx%d, cropped=%dx%d, offset=%d,%d", full_width, full_height, crop_width, crop_height, offset_x, offset_y);
  460. #undef SKIP_BITS
  461. #undef SKIP_UE
  462. #undef GET_BITS
  463. #undef GET_BIT
  464. #undef GET_UE
  465. #undef EXPECT
  466. }