media_clip_ffmpeg.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. /*
  2. This file is part of Telegram Desktop,
  3. the official desktop application for the Telegram messaging service.
  4. For license and copyright information please follow this link:
  5. https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
  6. */
  7. #include "media/clip/media_clip_ffmpeg.h"
  8. #include "core/file_location.h"
  9. #include "logs.h"
  10. namespace Media {
  11. namespace Clip {
  12. namespace internal {
  13. namespace {
  14. constexpr auto kSkipInvalidDataPackets = 10;
  15. constexpr auto kMaxInlineArea = 1280 * 720;
  16. constexpr auto kMaxSendingArea = 3840 * 2160; // usual 4K
  17. // See https://github.com/telegramdesktop/tdesktop/issues/7225
  18. constexpr auto kAlignImageBy = 64;
  19. void alignedImageBufferCleanupHandler(void *data) {
  20. auto buffer = static_cast<uchar*>(data);
  21. delete[] buffer;
  22. }
  23. // Create a QImage of desired size where all the data is aligned to 16 bytes.
  24. QImage createAlignedImage(QSize size) {
  25. auto width = size.width();
  26. auto height = size.height();
  27. auto widthalign = kAlignImageBy / 4;
  28. auto neededwidth = width + ((width % widthalign) ? (widthalign - (width % widthalign)) : 0);
  29. auto bytesperline = neededwidth * 4;
  30. auto buffer = new uchar[bytesperline * height + kAlignImageBy];
  31. auto cleanupdata = static_cast<void*>(buffer);
  32. auto bufferval = reinterpret_cast<uintptr_t>(buffer);
  33. auto alignedbuffer = buffer + ((bufferval % kAlignImageBy) ? (kAlignImageBy - (bufferval % kAlignImageBy)) : 0);
  34. return QImage(alignedbuffer, width, height, bytesperline, QImage::Format_ARGB32_Premultiplied, alignedImageBufferCleanupHandler, cleanupdata);
  35. }
  36. bool isAlignedImage(const QImage &image) {
  37. return !(reinterpret_cast<uintptr_t>(image.constBits()) % kAlignImageBy) && !(image.bytesPerLine() % kAlignImageBy);
  38. }
  39. } // namespace
  40. FFMpegReaderImplementation::FFMpegReaderImplementation(
  41. Core::FileLocation *location,
  42. QByteArray *data)
  43. : ReaderImplementation(location, data)
  44. , _frame(FFmpeg::MakeFramePointer()) {
  45. }
  46. ReaderImplementation::ReadResult FFMpegReaderImplementation::readNextFrame() {
  47. do {
  48. int res = avcodec_receive_frame(_codecContext, _frame.get());
  49. if (res >= 0) {
  50. const auto limit = (_mode == Mode::Inspecting)
  51. ? kMaxSendingArea
  52. : kMaxInlineArea;
  53. if (_frame->width * _frame->height > limit) {
  54. return ReadResult::Error;
  55. }
  56. processReadFrame();
  57. return ReadResult::Success;
  58. }
  59. if (res == AVERROR_EOF) {
  60. _packetQueue.clear();
  61. if (!_hadFrame) {
  62. LOG(("Gif Error: Got EOF before a single frame was read!"));
  63. return ReadResult::Error;
  64. }
  65. if ((res = avformat_seek_file(_fmtContext, _streamId, std::numeric_limits<int64_t>::min(), 0, std::numeric_limits<int64_t>::max(), 0)) < 0) {
  66. if ((res = av_seek_frame(_fmtContext, _streamId, 0, AVSEEK_FLAG_BYTE)) < 0) {
  67. if ((res = av_seek_frame(_fmtContext, _streamId, 0, AVSEEK_FLAG_FRAME)) < 0) {
  68. if ((res = av_seek_frame(_fmtContext, _streamId, 0, 0)) < 0) {
  69. char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
  70. LOG(("Gif Error: Unable to av_seek_frame() to the start %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  71. return ReadResult::Error;
  72. }
  73. }
  74. }
  75. }
  76. avcodec_flush_buffers(_codecContext);
  77. _hadFrame = false;
  78. _frameMs = 0;
  79. _lastReadVideoMs = _lastReadAudioMs = 0;
  80. _skippedInvalidDataPackets = 0;
  81. _frameIndex = -1;
  82. continue;
  83. } else if (res != AVERROR(EAGAIN)) {
  84. char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
  85. LOG(("Gif Error: Unable to avcodec_receive_frame() %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  86. return ReadResult::Error;
  87. }
  88. while (_packetQueue.empty()) {
  89. auto packetResult = readAndProcessPacket();
  90. if (packetResult == PacketResult::Error) {
  91. return ReadResult::Error;
  92. } else if (packetResult == PacketResult::EndOfFile) {
  93. break;
  94. }
  95. }
  96. if (_packetQueue.empty()) {
  97. avcodec_send_packet(_codecContext, nullptr); // drain
  98. continue;
  99. }
  100. auto packet = std::move(_packetQueue.front());
  101. _packetQueue.pop_front();
  102. const auto native = &packet.fields();
  103. const auto guard = gsl::finally([
  104. &,
  105. size = native->size,
  106. data = native->data
  107. ] {
  108. native->size = size;
  109. native->data = data;
  110. packet = FFmpeg::Packet();
  111. });
  112. res = avcodec_send_packet(_codecContext, native);
  113. if (res < 0) {
  114. char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
  115. LOG(("Gif Error: Unable to avcodec_send_packet() %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  116. if (res == AVERROR_INVALIDDATA) {
  117. if (++_skippedInvalidDataPackets < kSkipInvalidDataPackets) {
  118. continue; // try to skip bad packet
  119. }
  120. }
  121. return ReadResult::Error;
  122. }
  123. } while (true);
  124. return ReadResult::Error;
  125. }
  126. void FFMpegReaderImplementation::processReadFrame() {
  127. #if DA_FFMPEG_HAVE_DURATION
  128. int64 duration = _frame->duration;
  129. #else
  130. int64 duration = _frame->pkt_duration;
  131. #endif
  132. int64 framePts = _frame->pts;
  133. crl::time frameMs = (framePts * 1000LL * _fmtContext->streams[_streamId]->time_base.num) / _fmtContext->streams[_streamId]->time_base.den;
  134. _currentFrameDelay = _nextFrameDelay;
  135. if (_frameMs + _currentFrameDelay < frameMs) {
  136. _currentFrameDelay = int32(frameMs - _frameMs);
  137. } else if (frameMs < _frameMs + _currentFrameDelay) {
  138. frameMs = _frameMs + _currentFrameDelay;
  139. }
  140. if (duration == AV_NOPTS_VALUE) {
  141. _nextFrameDelay = 0;
  142. } else {
  143. _nextFrameDelay = (duration * 1000LL * _fmtContext->streams[_streamId]->time_base.num) / _fmtContext->streams[_streamId]->time_base.den;
  144. }
  145. _frameMs = frameMs;
  146. _hadFrame = _frameRead = true;
  147. _frameTime += _currentFrameDelay;
  148. ++_frameIndex;
  149. }
  150. ReaderImplementation::ReadResult FFMpegReaderImplementation::readFramesTill(crl::time frameMs, crl::time systemMs) {
  151. if (_frameRead && _frameTime > frameMs) {
  152. return ReadResult::Success;
  153. }
  154. auto readResult = readNextFrame();
  155. if (readResult != ReadResult::Success || _frameTime > frameMs) {
  156. return readResult;
  157. }
  158. readResult = readNextFrame();
  159. if (_frameTime <= frameMs) {
  160. _frameTime = frameMs + 5; // keep up
  161. }
  162. return readResult;
  163. }
  164. crl::time FFMpegReaderImplementation::frameRealTime() const {
  165. return _frameMs;
  166. }
  167. crl::time FFMpegReaderImplementation::framePresentationTime() const {
  168. return qMax(_frameTime + _frameTimeCorrection, crl::time(0));
  169. }
  170. crl::time FFMpegReaderImplementation::durationMs() const {
  171. const auto rebase = [](int64_t duration, const AVRational &base) {
  172. return (duration * 1000LL * base.num) / base.den;
  173. };
  174. const auto stream = _fmtContext->streams[_streamId];
  175. if (stream->duration != AV_NOPTS_VALUE) {
  176. return rebase(stream->duration, stream->time_base);
  177. } else if (_fmtContext->duration != AV_NOPTS_VALUE) {
  178. return rebase(_fmtContext->duration, AVRational{ 1, AV_TIME_BASE });
  179. }
  180. return 0;
  181. }
  182. bool FFMpegReaderImplementation::renderFrame(
  183. QImage &to,
  184. bool &hasAlpha,
  185. int &index,
  186. const QSize &size) {
  187. Expects(_frameRead);
  188. _frameRead = false;
  189. index = _frameIndex;
  190. if (!_width || !_height) {
  191. _width = _frame->width;
  192. _height = _frame->height;
  193. if (!_width || !_height) {
  194. LOG(("Gif Error: Bad frame size %1").arg(logData()));
  195. return false;
  196. }
  197. }
  198. QSize toSize(size.isEmpty() ? QSize(_width, _height) : size);
  199. if (!size.isEmpty() && rotationSwapWidthHeight()) {
  200. toSize.transpose();
  201. }
  202. if (to.isNull() || to.size() != toSize || !to.isDetached() || !isAlignedImage(to)) {
  203. to = createAlignedImage(toSize);
  204. }
  205. const auto format = (_frame->format == AV_PIX_FMT_NONE)
  206. ? _codecContext->pix_fmt
  207. : _frame->format;
  208. const auto bgra = (format == AV_PIX_FMT_BGRA);
  209. hasAlpha = bgra || (format == AV_PIX_FMT_YUVA420P);
  210. if (_frame->width == toSize.width() && _frame->height == toSize.height() && bgra) {
  211. int32 sbpl = _frame->linesize[0], dbpl = to.bytesPerLine(), bpl = qMin(sbpl, dbpl);
  212. uchar *s = _frame->data[0], *d = to.bits();
  213. for (int32 i = 0, l = _frame->height; i < l; ++i) {
  214. memcpy(d + i * dbpl, s + i * sbpl, bpl);
  215. }
  216. } else {
  217. if ((_swsSize != toSize) || (_frame->format != -1 && _frame->format != _codecContext->pix_fmt) || !_swsContext) {
  218. _swsSize = toSize;
  219. _swsContext = sws_getCachedContext(_swsContext, _frame->width, _frame->height, AVPixelFormat(_frame->format), toSize.width(), toSize.height(), AV_PIX_FMT_BGRA, 0, nullptr, nullptr, nullptr);
  220. }
  221. // AV_NUM_DATA_POINTERS defined in AVFrame struct
  222. uint8_t *toData[AV_NUM_DATA_POINTERS] = { to.bits(), nullptr };
  223. int toLinesize[AV_NUM_DATA_POINTERS] = { int(to.bytesPerLine()), 0 };
  224. sws_scale(_swsContext, _frame->data, _frame->linesize, 0, _frame->height, toData, toLinesize);
  225. }
  226. if (hasAlpha) {
  227. FFmpeg::PremultiplyInplace(to);
  228. }
  229. if (_rotation != Rotation::None) {
  230. QTransform rotationTransform;
  231. switch (_rotation) {
  232. case Rotation::Degrees90: rotationTransform.rotate(90); break;
  233. case Rotation::Degrees180: rotationTransform.rotate(180); break;
  234. case Rotation::Degrees270: rotationTransform.rotate(270); break;
  235. }
  236. to = to.transformed(rotationTransform);
  237. }
  238. FFmpeg::ClearFrameMemory(_frame.get());
  239. return true;
  240. }
  241. FFMpegReaderImplementation::Rotation FFMpegReaderImplementation::rotationFromDegrees(int degrees) const {
  242. switch (degrees) {
  243. case 90: return Rotation::Degrees90;
  244. case 180: return Rotation::Degrees180;
  245. case 270: return Rotation::Degrees270;
  246. }
  247. return Rotation::None;
  248. }
  249. bool FFMpegReaderImplementation::start(Mode mode, crl::time &positionMs) {
  250. _mode = mode;
  251. initDevice();
  252. if (!_device->open(QIODevice::ReadOnly)) {
  253. LOG(("Gif Error: Unable to open device %1").arg(logData()));
  254. return false;
  255. }
  256. _ioBuffer = (uchar*)av_malloc(FFmpeg::kAVBlockSize);
  257. _ioContext = avio_alloc_context(_ioBuffer, FFmpeg::kAVBlockSize, 0, static_cast<void*>(this), &FFMpegReaderImplementation::Read, nullptr, &FFMpegReaderImplementation::Seek);
  258. _fmtContext = avformat_alloc_context();
  259. if (!_fmtContext) {
  260. LOG(("Gif Error: Unable to avformat_alloc_context %1").arg(logData()));
  261. return false;
  262. }
  263. _fmtContext->pb = _ioContext;
  264. int res = 0;
  265. char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
  266. if ((res = avformat_open_input(&_fmtContext, nullptr, nullptr, nullptr)) < 0) {
  267. _ioBuffer = nullptr;
  268. LOG(("Gif Error: Unable to avformat_open_input %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  269. return false;
  270. }
  271. _opened = true;
  272. if ((res = avformat_find_stream_info(_fmtContext, nullptr)) < 0) {
  273. LOG(("Gif Error: Unable to avformat_find_stream_info %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  274. return false;
  275. }
  276. _streamId = av_find_best_stream(_fmtContext, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
  277. if (_streamId < 0) {
  278. LOG(("Gif Error: Unable to av_find_best_stream %1, error %2, %3").arg(logData()).arg(_streamId).arg(av_make_error_string(err, sizeof(err), _streamId)));
  279. return false;
  280. }
  281. auto rotateTag = av_dict_get(_fmtContext->streams[_streamId]->metadata, "rotate", nullptr, 0);
  282. if (rotateTag && *rotateTag->value) {
  283. auto stringRotateTag = QString::fromUtf8(rotateTag->value);
  284. auto toIntSucceeded = false;
  285. auto rotateDegrees = stringRotateTag.toInt(&toIntSucceeded);
  286. if (toIntSucceeded) {
  287. _rotation = rotationFromDegrees(rotateDegrees);
  288. }
  289. }
  290. _codecContext = avcodec_alloc_context3(nullptr);
  291. if (!_codecContext) {
  292. LOG(("Gif Error: Unable to avcodec_alloc_context3 %1").arg(logData()));
  293. return false;
  294. }
  295. if ((res = avcodec_parameters_to_context(_codecContext, _fmtContext->streams[_streamId]->codecpar)) < 0) {
  296. LOG(("Gif Error: Unable to avcodec_parameters_to_context %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  297. return false;
  298. }
  299. _codecContext->pkt_timebase = _fmtContext->streams[_streamId]->time_base;
  300. av_opt_set_int(_codecContext, "refcounted_frames", 1, 0);
  301. const auto codec = FFmpeg::FindDecoder(_codecContext);
  302. if (!codec) {
  303. LOG(("Gif Error: Unable to avcodec_find_decoder %1").arg(logData()));
  304. return false;
  305. }
  306. if (_mode == Mode::Inspecting) {
  307. const auto audioStreamId = av_find_best_stream(_fmtContext, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
  308. _hasAudioStream = (audioStreamId >= 0);
  309. }
  310. if ((res = avcodec_open2(_codecContext, codec, nullptr)) < 0) {
  311. LOG(("Gif Error: Unable to avcodec_open2 %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  312. return false;
  313. }
  314. if (positionMs > 0) {
  315. const auto timeBase = _fmtContext->streams[_streamId]->time_base;
  316. const auto timeStamp = (positionMs * timeBase.den)
  317. / (1000LL * timeBase.num);
  318. if (av_seek_frame(_fmtContext, _streamId, timeStamp, 0) < 0) {
  319. if (av_seek_frame(_fmtContext, _streamId, timeStamp, AVSEEK_FLAG_BACKWARD) < 0) {
  320. return false;
  321. }
  322. }
  323. }
  324. FFmpeg::Packet packet;
  325. auto readResult = readPacket(packet);
  326. if (readResult == PacketResult::Ok && positionMs > 0) {
  327. positionMs = countPacketMs(packet);
  328. }
  329. if (readResult == PacketResult::Ok) {
  330. processPacket(std::move(packet));
  331. }
  332. return true;
  333. }
  334. bool FFMpegReaderImplementation::inspectAt(crl::time &positionMs) {
  335. if (positionMs > 0) {
  336. const auto timeBase = _fmtContext->streams[_streamId]->time_base;
  337. const auto timeStamp = (positionMs * timeBase.den)
  338. / (1000LL * timeBase.num);
  339. if (av_seek_frame(_fmtContext, _streamId, timeStamp, 0) < 0) {
  340. if (av_seek_frame(_fmtContext, _streamId, timeStamp, AVSEEK_FLAG_BACKWARD) < 0) {
  341. return false;
  342. }
  343. }
  344. }
  345. _packetQueue.clear();
  346. FFmpeg::Packet packet;
  347. auto readResult = readPacket(packet);
  348. if (readResult == PacketResult::Ok && positionMs > 0) {
  349. positionMs = countPacketMs(packet);
  350. }
  351. if (readResult == PacketResult::Ok) {
  352. processPacket(std::move(packet));
  353. }
  354. return true;
  355. }
  356. bool FFMpegReaderImplementation::isGifv() const {
  357. if (_hasAudioStream) {
  358. return false;
  359. }
  360. if (dataSize() > kMaxInMemory) {
  361. return false;
  362. }
  363. if (_codecContext->codec_id != AV_CODEC_ID_H264) {
  364. return false;
  365. }
  366. return true;
  367. }
  368. bool FFMpegReaderImplementation::isWebmSticker() const {
  369. if (_hasAudioStream) {
  370. return false;
  371. }
  372. if (dataSize() > kMaxInMemory) {
  373. return false;
  374. }
  375. if (_codecContext->codec_id != AV_CODEC_ID_VP9) {
  376. return false;
  377. }
  378. return true;
  379. }
  380. QString FFMpegReaderImplementation::logData() const {
  381. return u"for file '%1', data size '%2'"_q.arg(_location ? _location->name() : QString()).arg(_data->size());
  382. }
  383. FFMpegReaderImplementation::~FFMpegReaderImplementation() {
  384. if (_codecContext) avcodec_free_context(&_codecContext);
  385. if (_swsContext) sws_freeContext(_swsContext);
  386. if (_opened) {
  387. avformat_close_input(&_fmtContext);
  388. }
  389. if (_ioContext) {
  390. av_freep(&_ioContext->buffer);
  391. av_freep(&_ioContext);
  392. } else if (_ioBuffer) {
  393. av_freep(&_ioBuffer);
  394. }
  395. if (_fmtContext) avformat_free_context(_fmtContext);
  396. }
  397. FFMpegReaderImplementation::PacketResult FFMpegReaderImplementation::readPacket(FFmpeg::Packet &packet) {
  398. int res = 0;
  399. if ((res = av_read_frame(_fmtContext, &packet.fields())) < 0) {
  400. if (res == AVERROR_EOF) {
  401. return PacketResult::EndOfFile;
  402. }
  403. char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
  404. LOG(("Gif Error: Unable to av_read_frame() %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
  405. return PacketResult::Error;
  406. }
  407. return PacketResult::Ok;
  408. }
  409. void FFMpegReaderImplementation::processPacket(FFmpeg::Packet &&packet) {
  410. const auto &native = packet.fields();
  411. auto videoPacket = (native.stream_index == _streamId);
  412. if (videoPacket) {
  413. _lastReadVideoMs = countPacketMs(packet);
  414. _packetQueue.push_back(std::move(packet));
  415. }
  416. }
  417. crl::time FFMpegReaderImplementation::countPacketMs(
  418. const FFmpeg::Packet &packet) const {
  419. const auto &native = packet.fields();
  420. int64 packetPts = (native.pts == AV_NOPTS_VALUE) ? native.dts : native.pts;
  421. crl::time packetMs = (packetPts * 1000LL * _fmtContext->streams[native.stream_index]->time_base.num) / _fmtContext->streams[native.stream_index]->time_base.den;
  422. return packetMs;
  423. }
  424. FFMpegReaderImplementation::PacketResult FFMpegReaderImplementation::readAndProcessPacket() {
  425. FFmpeg::Packet packet;
  426. auto result = readPacket(packet);
  427. if (result == PacketResult::Ok) {
  428. processPacket(std::move(packet));
  429. }
  430. return result;
  431. }
  432. int FFMpegReaderImplementation::Read(void *opaque, uint8_t *buf, int buf_size) {
  433. FFMpegReaderImplementation *l = reinterpret_cast<FFMpegReaderImplementation*>(opaque);
  434. int ret = l->_device->read((char*)(buf), buf_size);
  435. switch (ret) {
  436. case -1: return AVERROR_EXTERNAL;
  437. case 0: return AVERROR_EOF;
  438. default: return ret;
  439. }
  440. }
  441. int64_t FFMpegReaderImplementation::Seek(void *opaque, int64_t offset, int whence) {
  442. FFMpegReaderImplementation *l = reinterpret_cast<FFMpegReaderImplementation*>(opaque);
  443. switch (whence) {
  444. case SEEK_SET: return l->_device->seek(offset) ? l->_device->pos() : -1;
  445. case SEEK_CUR: return l->_device->seek(l->_device->pos() + offset) ? l->_device->pos() : -1;
  446. case SEEK_END: return l->_device->seek(l->_device->size() + offset) ? l->_device->pos() : -1;
  447. case AVSEEK_SIZE: {
  448. // Special whence for determining filesize without any seek.
  449. return l->_dataSize;
  450. } break;
  451. }
  452. return -1;
  453. }
  454. } // namespace internal
  455. } // namespace Clip
  456. } // namespace Media