mirror of https://github.com/commaai/openpilot.git
framereader: support hardware decoding (#22906)
* support hardware accelerated video decoding
* support cuda only
* print decode info
* add yuv flag back
* fix height
* default is cuda
* fallback to CPU decoding
fix memory leak&cleanup
add output
old-commit-hash: 20d3cf1fcb
This commit is contained in:
parent
db6a8aaf06
commit
0c26c21c9c
|
@ -11,35 +11,42 @@ int readFunction(void *opaque, uint8_t *buf, int buf_size) {
|
|||
return iss.gcount() ? iss.gcount() : AVERROR_EOF;
|
||||
}
|
||||
|
||||
enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) {
|
||||
enum AVPixelFormat *hw_pix_fmt = reinterpret_cast<enum AVPixelFormat *>(ctx->opaque);
|
||||
for (const enum AVPixelFormat *p = pix_fmts; *p != -1; p++) {
|
||||
if (*p == *hw_pix_fmt) return *p;
|
||||
}
|
||||
printf("Please run replay with the --no-cuda flag!\n");
|
||||
assert(0);
|
||||
return AV_PIX_FMT_NONE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) {
|
||||
pFormatCtx_ = avformat_alloc_context();
|
||||
av_frame_ = av_frame_alloc();
|
||||
rgb_frame_ = av_frame_alloc();
|
||||
yuv_frame_ = av_frame_alloc();
|
||||
input_ctx = avformat_alloc_context();
|
||||
sws_frame.reset(av_frame_alloc());
|
||||
}
|
||||
|
||||
FrameReader::~FrameReader() {
|
||||
for (auto &f : frames_) {
|
||||
av_free_packet(&f.pkt);
|
||||
av_packet_unref(&f.pkt);
|
||||
}
|
||||
|
||||
if (pCodecCtx_) avcodec_free_context(&pCodecCtx_);
|
||||
if (pFormatCtx_) avformat_close_input(&pFormatCtx_);
|
||||
if (av_frame_) av_frame_free(&av_frame_);
|
||||
if (rgb_frame_) av_frame_free(&rgb_frame_);
|
||||
if (yuv_frame_) av_frame_free(&yuv_frame_);
|
||||
if (decoder_ctx) avcodec_free_context(&decoder_ctx);
|
||||
if (input_ctx) avformat_close_input(&input_ctx);
|
||||
if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
|
||||
|
||||
if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_);
|
||||
if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_);
|
||||
|
||||
if (avio_ctx_) {
|
||||
av_freep(&avio_ctx_->buffer);
|
||||
av_freep(&avio_ctx_);
|
||||
avio_context_free(&avio_ctx_);
|
||||
}
|
||||
}
|
||||
|
||||
bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
|
||||
bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic<bool> *abort) {
|
||||
std::string content = read(url, abort);
|
||||
if (content.empty()) return false;
|
||||
|
||||
|
@ -47,48 +54,56 @@ bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
|
|||
const int avio_ctx_buffer_size = 64 * 1024;
|
||||
unsigned char *avio_ctx_buffer = (unsigned char *)av_malloc(avio_ctx_buffer_size);
|
||||
avio_ctx_ = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size, 0, &iss, readFunction, nullptr, nullptr);
|
||||
pFormatCtx_->pb = avio_ctx_;
|
||||
input_ctx->pb = avio_ctx_;
|
||||
|
||||
pFormatCtx_->probesize = 10 * 1024 * 1024; // 10MB
|
||||
int ret = avformat_open_input(&pFormatCtx_, url.c_str(), NULL, NULL);
|
||||
input_ctx->probesize = 10 * 1024 * 1024; // 10MB
|
||||
int ret = avformat_open_input(&input_ctx, url.c_str(), NULL, NULL);
|
||||
if (ret != 0) {
|
||||
char err_str[1024] = {0};
|
||||
av_strerror(ret, err_str, std::size(err_str));
|
||||
printf("Error loading video - %s - %s\n", err_str, url.c_str());
|
||||
return false;
|
||||
}
|
||||
avformat_find_stream_info(pFormatCtx_, NULL);
|
||||
// av_dump_format(pFormatCtx_, 0, url.c_str(), 0);
|
||||
|
||||
AVStream *video = pFormatCtx_->streams[0];
|
||||
auto pCodec = avcodec_find_decoder(video->codec->codec_id);
|
||||
if (!pCodec) return false;
|
||||
ret = avformat_find_stream_info(input_ctx, nullptr);
|
||||
if (ret < 0) {
|
||||
printf("cannot find a video stream in the input file\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
pCodecCtx_ = avcodec_alloc_context3(pCodec);
|
||||
ret = avcodec_parameters_to_context(pCodecCtx_, video->codecpar);
|
||||
AVStream *video = input_ctx->streams[0];
|
||||
AVCodec *decoder = avcodec_find_decoder(video->codec->codec_id);
|
||||
if (!decoder) return false;
|
||||
|
||||
decoder_ctx = avcodec_alloc_context3(decoder);
|
||||
ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar);
|
||||
if (ret != 0) return false;
|
||||
|
||||
// pCodecCtx_->thread_count = 0;
|
||||
// pCodecCtx_->thread_type = FF_THREAD_FRAME;
|
||||
ret = avcodec_open2(pCodecCtx_, pCodec, NULL);
|
||||
if (ret < 0) return false;
|
||||
width = (decoder_ctx->width + 3) & ~3;
|
||||
height = decoder_ctx->height;
|
||||
|
||||
width = (pCodecCtx_->width + 3) & ~3;
|
||||
height = pCodecCtx_->height;
|
||||
rgb_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
|
||||
width, height, AV_PIX_FMT_BGR24,
|
||||
SWS_FAST_BILINEAR, NULL, NULL, NULL);
|
||||
if (!no_cuda) {
|
||||
if (!initHardwareDecoder(AV_HWDEVICE_TYPE_CUDA)) {
|
||||
printf("No CUDA capable device was found. fallback to CPU decoding.\n");
|
||||
}
|
||||
}
|
||||
|
||||
rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
|
||||
width, height, AV_PIX_FMT_BGR24,
|
||||
SWS_BILINEAR, NULL, NULL, NULL);
|
||||
if (!rgb_sws_ctx_) return false;
|
||||
|
||||
yuv_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
|
||||
width, height, AV_PIX_FMT_YUV420P,
|
||||
SWS_FAST_BILINEAR, NULL, NULL, NULL);
|
||||
yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
|
||||
width, height, AV_PIX_FMT_YUV420P,
|
||||
SWS_BILINEAR, NULL, NULL, NULL);
|
||||
if (!yuv_sws_ctx_) return false;
|
||||
|
||||
ret = avcodec_open2(decoder_ctx, decoder, NULL);
|
||||
if (ret < 0) return false;
|
||||
|
||||
frames_.reserve(60 * 20); // 20fps, one minute
|
||||
while (!(abort && *abort)) {
|
||||
Frame &frame = frames_.emplace_back();
|
||||
ret = av_read_frame(pFormatCtx_, &frame.pkt);
|
||||
ret = av_read_frame(input_ctx, &frame.pkt);
|
||||
if (ret < 0) {
|
||||
frames_.pop_back();
|
||||
valid_ = (ret == AVERROR_EOF);
|
||||
|
@ -100,6 +115,43 @@ bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
|
|||
return valid_;
|
||||
}
|
||||
|
||||
bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) {
|
||||
for (int i = 0;; i++) {
|
||||
const AVCodecHWConfig *config = avcodec_get_hw_config(decoder_ctx->codec, i);
|
||||
if (!config) {
|
||||
printf("decoder %s does not support hw device type %s.\n",
|
||||
decoder_ctx->codec->name, av_hwdevice_get_type_name(hw_device_type));
|
||||
return false;
|
||||
}
|
||||
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == hw_device_type) {
|
||||
hw_pix_fmt = config->pix_fmt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int ret = av_hwdevice_ctx_create(&hw_device_ctx, hw_device_type, nullptr, nullptr, 0);
|
||||
if (ret < 0) {
|
||||
printf("Failed to create specified HW device %d.\n", ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
// get sws source format
|
||||
AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr);
|
||||
assert(hw_frames_const != 0);
|
||||
for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) {
|
||||
if (sws_isSupportedInput(*p)) {
|
||||
sws_src_format = *p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
av_hwframe_constraints_free(&hw_frames_const);
|
||||
|
||||
decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
|
||||
decoder_ctx->opaque = &hw_pix_fmt;
|
||||
decoder_ctx->get_format = get_hw_format;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FrameReader::get(int idx, uint8_t *rgb, uint8_t *yuv) {
|
||||
assert(rgb || yuv);
|
||||
if (!valid_ || idx < 0 || idx >= frames_.size()) {
|
||||
|
@ -125,35 +177,51 @@ bool FrameReader::decode(int idx, uint8_t *rgb, uint8_t *yuv) {
|
|||
for (int i = from_idx; i <= idx; ++i) {
|
||||
Frame &frame = frames_[i];
|
||||
if ((!frame.decoded || i == idx) && !frame.failed) {
|
||||
frame.decoded = decodeFrame(&frame.pkt);
|
||||
AVFrame *f = decodeFrame(&frame.pkt);
|
||||
frame.decoded = f != nullptr;
|
||||
frame.failed = !frame.decoded;
|
||||
if (frame.decoded && i == idx) {
|
||||
return copyBuffers(av_frame_, rgb, yuv);
|
||||
return copyBuffers(f, rgb, yuv);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FrameReader::decodeFrame(AVPacket *pkt) {
|
||||
int ret = avcodec_send_packet(pCodecCtx_, pkt);
|
||||
AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
|
||||
int ret = avcodec_send_packet(decoder_ctx, pkt);
|
||||
if (ret < 0) {
|
||||
printf("Error sending a packet for decoding\n");
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
av_frame_.reset(av_frame_alloc());
|
||||
ret = avcodec_receive_frame(decoder_ctx, av_frame_.get());
|
||||
if (ret != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (av_frame_->format == hw_pix_fmt) {
|
||||
hw_frame.reset(av_frame_alloc());
|
||||
if ((ret = av_hwframe_transfer_data(hw_frame.get(), av_frame_.get(), 0)) < 0) {
|
||||
printf("error transferring the data from GPU to CPU\n");
|
||||
return nullptr;
|
||||
}
|
||||
return hw_frame.get();
|
||||
} else {
|
||||
return av_frame_.get();
|
||||
}
|
||||
ret = avcodec_receive_frame(pCodecCtx_, av_frame_);
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) {
|
||||
// images is going to be written to output buffers, no alignment (align = 1)
|
||||
if (yuv) {
|
||||
av_image_fill_arrays(yuv_frame_->data, yuv_frame_->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
|
||||
int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, yuv_frame_->data, yuv_frame_->linesize);
|
||||
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
|
||||
int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
|
||||
if (ret < 0) return false;
|
||||
}
|
||||
|
||||
av_image_fill_arrays(rgb_frame_->data, rgb_frame_->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
|
||||
int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, rgb_frame_->data, rgb_frame_->linesize);
|
||||
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
|
||||
int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
|
||||
return ret >= 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -12,11 +13,15 @@ extern "C" {
|
|||
#include <libavutil/imgutils.h>
|
||||
}
|
||||
|
||||
struct AVFrameDeleter {
|
||||
void operator()(AVFrame* frame) const { av_frame_free(&frame); }
|
||||
};
|
||||
|
||||
class FrameReader : protected FileReader {
|
||||
public:
|
||||
FrameReader(bool local_cache = false, int chunk_size = -1, int retries = 0);
|
||||
~FrameReader();
|
||||
bool load(const std::string &url, std::atomic<bool> *abort = nullptr);
|
||||
bool load(const std::string &url, bool no_cuda = false, std::atomic<bool> *abort = nullptr);
|
||||
bool get(int idx, uint8_t *rgb, uint8_t *yuv);
|
||||
int getRGBSize() const { return width * height * 3; }
|
||||
int getYUVSize() const { return width * height * 3 / 2; }
|
||||
|
@ -26,8 +31,9 @@ public:
|
|||
int width = 0, height = 0;
|
||||
|
||||
private:
|
||||
bool initHardwareDecoder(AVHWDeviceType hw_device_type);
|
||||
bool decode(int idx, uint8_t *rgb, uint8_t *yuv);
|
||||
bool decodeFrame(AVPacket *pkt);
|
||||
AVFrame * decodeFrame(AVPacket *pkt);
|
||||
bool copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv);
|
||||
|
||||
struct Frame {
|
||||
|
@ -36,11 +42,15 @@ private:
|
|||
bool failed = false;
|
||||
};
|
||||
std::vector<Frame> frames_;
|
||||
AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P;
|
||||
SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr;
|
||||
AVFrame *av_frame_, *rgb_frame_, *yuv_frame_ = nullptr;
|
||||
AVFormatContext *pFormatCtx_ = nullptr;
|
||||
AVCodecContext *pCodecCtx_ = nullptr;
|
||||
std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, sws_frame, hw_frame;
|
||||
AVFormatContext *input_ctx = nullptr;
|
||||
AVCodecContext *decoder_ctx = nullptr;
|
||||
int key_frames_count_ = 0;
|
||||
bool valid_ = false;
|
||||
AVIOContext *avio_ctx_ = nullptr;
|
||||
|
||||
AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
|
||||
AVBufferRef *hw_device_ctx = nullptr;
|
||||
};
|
||||
|
|
|
@ -102,6 +102,7 @@ int main(int argc, char *argv[]) {
|
|||
{"no-cache", REPLAY_FLAG_NO_FILE_CACHE, "turn off local cache"},
|
||||
{"qcam", REPLAY_FLAG_QCAMERA, "load qcamera"},
|
||||
{"yuv", REPLAY_FLAG_SEND_YUV, "send yuv frame"},
|
||||
{"no-cuda", REPLAY_FLAG_NO_CUDA, "disable CUDA"},
|
||||
};
|
||||
|
||||
QCommandLineParser parser;
|
||||
|
|
|
@ -16,6 +16,7 @@ enum REPLAY_FLAGS {
|
|||
REPLAY_FLAG_NO_FILE_CACHE = 0x0020,
|
||||
REPLAY_FLAG_QCAMERA = 0x0040,
|
||||
REPLAY_FLAG_SEND_YUV = 0x0080,
|
||||
REPLAY_FLAG_NO_CUDA = 0x0100,
|
||||
};
|
||||
|
||||
class Replay : public QObject {
|
||||
|
|
|
@ -91,7 +91,7 @@ void Route::addFileToSegment(int n, const QString &file) {
|
|||
|
||||
// class Segment
|
||||
|
||||
Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
|
||||
Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n), flags(flags) {
|
||||
// [RoadCam, DriverCam, WideRoadCam, log]. fallback to qcamera/qlog
|
||||
const QString file_list[] = {
|
||||
(flags & REPLAY_FLAG_QCAMERA) || files.road_cam.isEmpty() ? files.qcamera : files.road_cam,
|
||||
|
@ -102,7 +102,7 @@ Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
|
|||
for (int i = 0; i < std::size(file_list); i++) {
|
||||
if (!file_list[i].isEmpty()) {
|
||||
loading_++;
|
||||
synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString(), !(flags & REPLAY_FLAG_NO_FILE_CACHE)); }));
|
||||
synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString()); }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -114,11 +114,12 @@ Segment::~Segment() {
|
|||
synchronizer_.waitForFinished();
|
||||
}
|
||||
|
||||
void Segment::loadFile(int id, const std::string file, bool local_cache) {
|
||||
void Segment::loadFile(int id, const std::string file) {
|
||||
const bool local_cache = !(flags & REPLAY_FLAG_NO_FILE_CACHE);
|
||||
bool success = false;
|
||||
if (id < MAX_CAMERAS) {
|
||||
frames[id] = std::make_unique<FrameReader>(local_cache, 20 * 1024 * 1024, 3);
|
||||
success = frames[id]->load(file, &abort_);
|
||||
success = frames[id]->load(file, flags & REPLAY_FLAG_NO_CUDA, &abort_);
|
||||
} else {
|
||||
log = std::make_unique<LogReader>(local_cache, -1, 3);
|
||||
success = log->load(file, &abort_);
|
||||
|
|
|
@ -58,9 +58,10 @@ signals:
|
|||
void loadFinished(bool success);
|
||||
|
||||
protected:
|
||||
void loadFile(int id, const std::string file, bool local_cache);
|
||||
void loadFile(int id, const std::string file);
|
||||
|
||||
std::atomic<bool> abort_ = false;
|
||||
std::atomic<int> loading_ = 0;
|
||||
QFutureSynchronizer<void> synchronizer_;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue