framereader: replace swscale with libyuv, reduce cpu usage by half (#22992)
* use libyuv * cleanup old-commit-hash: 5ae51745097e3245c541456029fbc0fcce63a3c3
This commit is contained in:
@@ -21,7 +21,7 @@ else:
|
||||
if USE_FRAME_STREAM:
|
||||
cameras = ['cameras/camera_frame_stream.cc']
|
||||
else:
|
||||
libs += ['avutil', 'avcodec', 'avformat', 'swscale', 'bz2', 'ssl', 'curl', 'crypto']
|
||||
libs += ['avutil', 'avcodec', 'avformat', 'bz2', 'ssl', 'curl', 'crypto']
|
||||
# TODO: import replay_lib from root SConstruct
|
||||
cameras = ['cameras/camera_replay.cc',
|
||||
env.Object('camera-util', '#/selfdrive/ui/replay/util.cc'),
|
||||
|
||||
@@ -116,7 +116,7 @@ if arch in ['x86_64', 'Darwin'] or GetOption('extras'):
|
||||
replay_lib_src = ["replay/replay.cc", "replay/camera.cc", "replay/filereader.cc", "replay/logreader.cc", "replay/framereader.cc", "replay/route.cc", "replay/util.cc"]
|
||||
|
||||
replay_lib = qt_env.Library("qt_replay", replay_lib_src, LIBS=base_libs)
|
||||
replay_libs = [replay_lib, 'avutil', 'avcodec', 'avformat', 'bz2', 'curl', 'swscale', 'yuv'] + qt_libs
|
||||
replay_libs = [replay_lib, 'avutil', 'avcodec', 'avformat', 'bz2', 'curl', 'yuv'] + qt_libs
|
||||
qt_env.Program("replay/replay", ["replay/main.cc"], LIBS=replay_libs)
|
||||
|
||||
qt_env.Program("watch3", ["watch3.cc"], LIBS=qt_libs + ['common', 'json11'])
|
||||
|
||||
@@ -34,8 +34,6 @@ enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *
|
||||
} // namespace
|
||||
|
||||
FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) {
|
||||
input_ctx = avformat_alloc_context();
|
||||
sws_frame.reset(av_frame_alloc());
|
||||
}
|
||||
|
||||
FrameReader::~FrameReader() {
|
||||
@@ -47,9 +45,6 @@ FrameReader::~FrameReader() {
|
||||
if (input_ctx) avformat_close_input(&input_ctx);
|
||||
if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
|
||||
|
||||
if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_);
|
||||
if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_);
|
||||
|
||||
if (avio_ctx_) {
|
||||
av_freep(&avio_ctx_->buffer);
|
||||
avio_context_free(&avio_ctx_);
|
||||
@@ -60,6 +55,9 @@ bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic<bool> *
|
||||
std::string content = read(url, abort);
|
||||
if (content.empty()) return false;
|
||||
|
||||
input_ctx = avformat_alloc_context();
|
||||
if (!input_ctx) return false;
|
||||
|
||||
struct buffer_data bd = {
|
||||
.data = (uint8_t *)content.data(),
|
||||
.offset = 0,
|
||||
@@ -99,18 +97,11 @@ bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic<bool> *
|
||||
if (!no_cuda) {
|
||||
if (!initHardwareDecoder(AV_HWDEVICE_TYPE_CUDA)) {
|
||||
printf("No CUDA capable device was found. fallback to CPU decoding.\n");
|
||||
} else {
|
||||
nv12toyuv_buffer.resize(getYUVSize());
|
||||
}
|
||||
}
|
||||
|
||||
rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
|
||||
width, height, AV_PIX_FMT_BGR24,
|
||||
SWS_BILINEAR, NULL, NULL, NULL);
|
||||
if (!rgb_sws_ctx_) return false;
|
||||
yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
|
||||
width, height, AV_PIX_FMT_YUV420P,
|
||||
SWS_BILINEAR, NULL, NULL, NULL);
|
||||
if (!yuv_sws_ctx_) return false;
|
||||
|
||||
ret = avcodec_open2(decoder_ctx, decoder, NULL);
|
||||
if (ret < 0) return false;
|
||||
|
||||
@@ -149,17 +140,6 @@ bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// get sws source format
|
||||
AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr);
|
||||
assert(hw_frames_const != 0);
|
||||
for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) {
|
||||
if (sws_isSupportedInput(*p)) {
|
||||
sws_src_format = *p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
av_hwframe_constraints_free(&hw_frames_const);
|
||||
|
||||
decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
|
||||
decoder_ctx->opaque = &hw_pix_fmt;
|
||||
decoder_ctx->get_format = get_hw_format;
|
||||
@@ -228,27 +208,26 @@ AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
|
||||
}
|
||||
|
||||
bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) {
|
||||
if (yuv) {
|
||||
if (sws_src_format == AV_PIX_FMT_NV12) {
|
||||
// libswscale crash if height is not 16 bytes aligned for NV12->YUV420 conversion
|
||||
assert(sws_src_format == AV_PIX_FMT_NV12);
|
||||
if (hw_pix_fmt == AV_PIX_FMT_CUDA) {
|
||||
uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data();
|
||||
uint8_t *u = y + width * height;
|
||||
uint8_t *v = u + (width / 2) * (height / 2);
|
||||
libyuv::NV12ToI420(f->data[0], f->linesize[0], f->data[1], f->linesize[1],
|
||||
y, width, u, width / 2, v, width / 2, width, height);
|
||||
libyuv::I420ToRGB24(y, width, u, width / 2, v, width / 2,
|
||||
rgb, width * 3, width, height);
|
||||
} else {
|
||||
if (yuv) {
|
||||
uint8_t *u = yuv + width * height;
|
||||
uint8_t *v = u + (width / 2) * (height / 2);
|
||||
libyuv::NV12ToI420(f->data[0], f->linesize[0],
|
||||
f->data[1], f->linesize[1],
|
||||
yuv, width,
|
||||
u, width / 2,
|
||||
v, width / 2,
|
||||
width, height);
|
||||
} else {
|
||||
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
|
||||
int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
|
||||
if (ret < 0) return false;
|
||||
memcpy(yuv, f->data[0], width * height);
|
||||
memcpy(u, f->data[1], width / 2 * height / 2);
|
||||
memcpy(v, f->data[2], width / 2 * height / 2);
|
||||
}
|
||||
libyuv::I420ToRGB24(f->data[0], f->linesize[0],
|
||||
f->data[1], f->linesize[1],
|
||||
f->data[2], f->linesize[2],
|
||||
rgb, width * 3, width, height);
|
||||
}
|
||||
|
||||
// images is going to be written to output buffers, no alignment (align = 1)
|
||||
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
|
||||
int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
|
||||
return ret >= 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -9,8 +9,6 @@
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavformat/avformat.h>
|
||||
#include <libswscale/swscale.h>
|
||||
#include <libavutil/imgutils.h>
|
||||
}
|
||||
|
||||
struct AVFrameDeleter {
|
||||
@@ -42,9 +40,7 @@ private:
|
||||
bool failed = false;
|
||||
};
|
||||
std::vector<Frame> frames_;
|
||||
AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P;
|
||||
SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr;
|
||||
std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, sws_frame, hw_frame;
|
||||
std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, hw_frame;
|
||||
AVFormatContext *input_ctx = nullptr;
|
||||
AVCodecContext *decoder_ctx = nullptr;
|
||||
int key_frames_count_ = 0;
|
||||
@@ -53,4 +49,5 @@ private:
|
||||
|
||||
AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
|
||||
AVBufferRef *hw_device_ctx = nullptr;
|
||||
std::vector<uint8_t> nv12toyuv_buffer;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user