convert vidindex to python (#30176)

* convert vidindex to python

* fix whitespace

* corrupt file option

* fix up typings

* fix return type

* update framereader

* change length delimiter to uint32 value

* change length to uint32 value

* move url_file changes to separate PR

* cleanup caching

* revert whitespace change

* fix frame type param type
old-commit-hash: f8e488f881
This commit is contained in:
Greg Hogan 2023-10-05 18:40:24 -07:00 committed by GitHub
parent 4d732ad439
commit 29bb2cf867
8 changed files with 309 additions and 527 deletions

View File

@ -28,10 +28,8 @@ chffr/app2
chffr/backend/env chffr/backend/env
selfdrive/nav selfdrive/nav
selfdrive/baseui selfdrive/baseui
chffr/lib/vidindex/vidindex
selfdrive/test/simulator2 selfdrive/test/simulator2
**/cache_data **/cache_data
xx/chffr/lib/vidindex/vidindex
xx/plus xx/plus
xx/community xx/community
xx/projects xx/projects

View File

@ -14,6 +14,7 @@ from lru import LRU
import _io import _io
from openpilot.tools.lib.cache import cache_path_for_file_path, DEFAULT_CACHE_DIR from openpilot.tools.lib.cache import cache_path_for_file_path, DEFAULT_CACHE_DIR
from openpilot.tools.lib.exceptions import DataUnreadableError from openpilot.tools.lib.exceptions import DataUnreadableError
from openpilot.tools.lib.vidindex import hevc_index
from openpilot.common.file_helpers import atomic_write_in_dir from openpilot.common.file_helpers import atomic_write_in_dir
from openpilot.tools.lib.filereader import FileReader from openpilot.tools.lib.filereader import FileReader
@ -75,31 +76,6 @@ def ffprobe(fn, fmt=None):
return json.loads(ffprobe_output) return json.loads(ffprobe_output)
def vidindex(fn, typ):
vidindex_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "vidindex")
vidindex = os.path.join(vidindex_dir, "vidindex")
subprocess.check_call(["make"], cwd=vidindex_dir, stdout=subprocess.DEVNULL)
with tempfile.NamedTemporaryFile() as prefix_f, \
tempfile.NamedTemporaryFile() as index_f:
try:
subprocess.check_call([vidindex, typ, fn, prefix_f.name, index_f.name])
except subprocess.CalledProcessError as e:
raise DataUnreadableError(f"vidindex failed on file {fn}") from e
with open(index_f.name, "rb") as f:
index = f.read()
with open(prefix_f.name, "rb") as f:
prefix = f.read()
index = np.frombuffer(index, np.uint32).reshape(-1, 2)
assert index[-1, 0] == 0xFFFFFFFF
assert index[-1, 1] == os.path.getsize(fn)
return index, prefix
def cache_fn(func): def cache_fn(func):
@wraps(func) @wraps(func)
def cache_inner(fn, *args, **kwargs): def cache_inner(fn, *args, **kwargs):
@ -114,7 +90,6 @@ def cache_fn(func):
cache_value = pickle.load(cache_file) cache_value = pickle.load(cache_file)
else: else:
cache_value = func(fn, *args, **kwargs) cache_value = func(fn, *args, **kwargs)
if cache_path: if cache_path:
with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file: with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file:
pickle.dump(cache_value, cache_file, -1) pickle.dump(cache_value, cache_file, -1)
@ -125,13 +100,13 @@ def cache_fn(func):
@cache_fn @cache_fn
def index_stream(fn, typ): def index_stream(fn, ft):
assert typ in ("hevc", ) if ft != FrameType.h265_stream:
raise NotImplementedError("Only h265 supported")
with FileReader(fn) as f: frame_types, dat_len, prefix = hevc_index(fn)
assert os.path.exists(f.name), fn index = np.array(frame_types + [(0xFFFFFFFF, dat_len)], dtype=np.uint32)
index, prefix = vidindex(f.name, typ) probe = ffprobe(fn, "hevc")
probe = ffprobe(f.name, typ)
return { return {
'index': index, 'index': index,
@ -140,42 +115,8 @@ def index_stream(fn, typ):
} }
def index_videos(camera_paths, cache_dir=DEFAULT_CACHE_DIR):
"""Requires that paths in camera_paths are contiguous and of the same type."""
if len(camera_paths) < 1:
raise ValueError("must provide at least one video to index")
frame_type = fingerprint_video(camera_paths[0])
for fn in camera_paths:
index_video(fn, frame_type, cache_dir)
def index_video(fn, frame_type=None, cache_dir=DEFAULT_CACHE_DIR):
cache_path = cache_path_for_file_path(fn, cache_dir)
if os.path.exists(cache_path):
return
if frame_type is None:
frame_type = fingerprint_video(fn[0])
if frame_type == FrameType.h265_stream:
index_stream(fn, "hevc", cache_dir=cache_dir)
else:
raise NotImplementedError("Only h265 supported")
def get_video_index(fn, frame_type, cache_dir=DEFAULT_CACHE_DIR): def get_video_index(fn, frame_type, cache_dir=DEFAULT_CACHE_DIR):
cache_path = cache_path_for_file_path(fn, cache_dir) return index_stream(fn, frame_type, cache_dir=cache_dir)
if not os.path.exists(cache_path):
index_video(fn, frame_type, cache_dir)
if not os.path.exists(cache_path):
return None
with open(cache_path, "rb") as cache_file:
return pickle.load(cache_file)
def read_file_check_size(f, sz, cookie): def read_file_check_size(f, sz, cookie):
buff = bytearray(sz) buff = bytearray(sz)

301
tools/lib/vidindex.py Executable file
View File

@ -0,0 +1,301 @@
#!/usr/bin/env python3
import argparse
import struct
from enum import IntEnum
from typing import Tuple
from openpilot.tools.lib.filereader import FileReader
# H.265 specification
# https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.265-201802-S!!PDF-E&type=items
NAL_UNIT_START_CODE = b"\x00\x00\x01"
NAL_UNIT_START_CODE_SIZE = len(NAL_UNIT_START_CODE)
NAL_UNIT_HEADER_SIZE = 2
class HevcNalUnitType(IntEnum):
TRAIL_N = 0 # RBSP structure: slice_segment_layer_rbsp( )
TRAIL_R = 1 # RBSP structure: slice_segment_layer_rbsp( )
TSA_N = 2 # RBSP structure: slice_segment_layer_rbsp( )
TSA_R = 3 # RBSP structure: slice_segment_layer_rbsp( )
STSA_N = 4 # RBSP structure: slice_segment_layer_rbsp( )
STSA_R = 5 # RBSP structure: slice_segment_layer_rbsp( )
RADL_N = 6 # RBSP structure: slice_segment_layer_rbsp( )
RADL_R = 7 # RBSP structure: slice_segment_layer_rbsp( )
RASL_N = 8 # RBSP structure: slice_segment_layer_rbsp( )
RASL_R = 9 # RBSP structure: slice_segment_layer_rbsp( )
RSV_VCL_N10 = 10
RSV_VCL_R11 = 11
RSV_VCL_N12 = 12
RSV_VCL_R13 = 13
RSV_VCL_N14 = 14
RSV_VCL_R15 = 15
BLA_W_LP = 16 # RBSP structure: slice_segment_layer_rbsp( )
BLA_W_RADL = 17 # RBSP structure: slice_segment_layer_rbsp( )
BLA_N_LP = 18 # RBSP structure: slice_segment_layer_rbsp( )
IDR_W_RADL = 19 # RBSP structure: slice_segment_layer_rbsp( )
IDR_N_LP = 20 # RBSP structure: slice_segment_layer_rbsp( )
CRA_NUT = 21 # RBSP structure: slice_segment_layer_rbsp( )
RSV_IRAP_VCL22 = 22
RSV_IRAP_VCL23 = 23
RSV_VCL24 = 24
RSV_VCL25 = 25
RSV_VCL26 = 26
RSV_VCL27 = 27
RSV_VCL28 = 28
RSV_VCL29 = 29
RSV_VCL30 = 30
RSV_VCL31 = 31
VPS_NUT = 32 # RBSP structure: video_parameter_set_rbsp( )
SPS_NUT = 33 # RBSP structure: seq_parameter_set_rbsp( )
PPS_NUT = 34 # RBSP structure: pic_parameter_set_rbsp( )
AUD_NUT = 35
EOS_NUT = 36
EOB_NUT = 37
FD_NUT = 38
PREFIX_SEI_NUT = 39
SUFFIX_SEI_NUT = 40
RSV_NVCL41 = 41
RSV_NVCL42 = 42
RSV_NVCL43 = 43
RSV_NVCL44 = 44
RSV_NVCL45 = 45
RSV_NVCL46 = 46
RSV_NVCL47 = 47
UNSPEC48 = 48
UNSPEC49 = 49
UNSPEC50 = 50
UNSPEC51 = 51
UNSPEC52 = 52
UNSPEC53 = 53
UNSPEC54 = 54
UNSPEC55 = 55
UNSPEC56 = 56
UNSPEC57 = 57
UNSPEC58 = 58
UNSPEC59 = 59
UNSPEC60 = 60
UNSPEC61 = 61
UNSPEC62 = 62
UNSPEC63 = 63
# B.2.2 Byte stream NAL unit semantics
# - The nal_unit_type within the nal_unit( ) syntax structure is equal to VPS_NUT, SPS_NUT or PPS_NUT.
# - The byte stream NAL unit syntax structure contains the first NAL unit of an access unit in decoding
# order, as specified in clause 7.4.2.4.4.
HEVC_PARAMETER_SET_NAL_UNITS = (
HevcNalUnitType.VPS_NUT,
HevcNalUnitType.SPS_NUT,
HevcNalUnitType.PPS_NUT,
)
# 3.29 coded slice segment NAL unit: A NAL unit that has nal_unit_type in the range of TRAIL_N to RASL_R,
# inclusive, or in the range of BLA_W_LP to RSV_IRAP_VCL23, inclusive, which indicates that the NAL unit
# contains a coded slice segment
HEVC_CODED_SLICE_SEGMENT_NAL_UNITS = (
HevcNalUnitType.TRAIL_N,
HevcNalUnitType.TRAIL_R,
HevcNalUnitType.TSA_N,
HevcNalUnitType.TSA_R,
HevcNalUnitType.STSA_N,
HevcNalUnitType.STSA_R,
HevcNalUnitType.RADL_N,
HevcNalUnitType.RADL_R,
HevcNalUnitType.RASL_N,
HevcNalUnitType.RASL_R,
HevcNalUnitType.BLA_W_LP,
HevcNalUnitType.BLA_W_RADL,
HevcNalUnitType.BLA_N_LP,
HevcNalUnitType.IDR_W_RADL,
HevcNalUnitType.IDR_N_LP,
HevcNalUnitType.CRA_NUT,
)
class VideoFileInvalid(Exception):
pass
def get_ue(dat: bytes, start_idx: int, skip_bits: int) -> Tuple[int, int]:
prefix_val = 0
prefix_len = 0
suffix_val = 0
suffix_len = 0
i = start_idx
while i < len(dat):
j = 7
while j >= 0:
if skip_bits > 0:
skip_bits -= 1
elif prefix_val == 0:
prefix_val = (dat[i] >> j) & 1
prefix_len += 1
else:
suffix_val = (suffix_val << 1) | ((dat[i] >> j) & 1)
suffix_len += 1
j -= 1
if prefix_val == 1 and prefix_len - 1 == suffix_len:
val = 2**(prefix_len-1) - 1 + suffix_val
size = prefix_len + suffix_len
return val, size
i += 1
raise VideoFileInvalid("invalid exponential-golomb code")
def require_nal_unit_start(dat: bytes, nal_unit_start: int) -> None:
if nal_unit_start >= len(dat):
raise ValueError("start index must be less than data length")
if nal_unit_start < 1:
raise ValueError("start index must be greater than zero")
if dat[nal_unit_start-1] != 0x00:
raise VideoFileInvalid("start code must be preceded by 0x00")
if dat[nal_unit_start:nal_unit_start + NAL_UNIT_START_CODE_SIZE] != NAL_UNIT_START_CODE:
raise VideoFileInvalid("data must begin with start code")
def get_hevc_nal_unit_length(dat: bytes, nal_unit_start: int) -> int:
try:
pos = dat.index(NAL_UNIT_START_CODE, nal_unit_start + NAL_UNIT_START_CODE_SIZE)
except ValueError:
pos = -1
# length of NAL unit is byte count up to next NAL unit start index
nal_unit_len = (pos if pos != -1 else len(dat)) - nal_unit_start
return nal_unit_len
def get_hevc_nal_unit_type(dat: bytes, nal_unit_start: int) -> HevcNalUnitType:
# 7.3.1.2 NAL unit header syntax
# nal_unit_header( ) { // descriptor
# forbidden_zero_bit f(1)
# nal_unit_type u(6)
# nuh_layer_id u(6)
# nuh_temporal_id_plus1 u(3)
# }
header_start = nal_unit_start + NAL_UNIT_START_CODE_SIZE
nal_unit_header = dat[header_start:header_start + NAL_UNIT_HEADER_SIZE]
if len(nal_unit_header) != 2:
raise VideoFileInvalid("data to short to contain nal unit header")
return HevcNalUnitType((nal_unit_header[0] >> 1) & 0x3F)
def get_hevc_slice_type(dat: bytes, nal_unit_start: int, nal_unit_type: HevcNalUnitType) -> Tuple[int, bool]:
# 7.3.2.9 Slice segment layer RBSP syntax
# slice_segment_layer_rbsp( ) {
# slice_segment_header( )
# slice_segment_data( )
# rbsp_slice_segment_trailing_bits( )
# }
# ...
# 7.3.6.1 General slice segment header syntax
# slice_segment_header( ) { // descriptor
# first_slice_segment_in_pic_flag u(1)
# if( nal_unit_type >= BLA_W_LP && nal_unit_type <= RSV_IRAP_VCL23 )
# no_output_of_prior_pics_flag u(1)
# slice_pic_parameter_set_id ue(v)
# if( !first_slice_segment_in_pic_flag ) {
# if( dependent_slice_segments_enabled_flag )
# dependent_slice_segment_flag u(1)
# slice_segment_address u(v)
# }
# if( !dependent_slice_segment_flag ) {
# for( i = 0; i < num_extra_slice_header_bits; i++ )
# slice_reserved_flag[ i ] u(1)
# slice_type ue(v)
# ...
rbsp_start = nal_unit_start + NAL_UNIT_START_CODE_SIZE + NAL_UNIT_HEADER_SIZE
skip_bits = 0
# 7.4.7.1 General slice segment header semantics
# first_slice_segment_in_pic_flag equal to 1 specifies that the slice segment is the first slice segment of the picture in
# decoding order. first_slice_segment_in_pic_flag equal to 0 specifies that the slice segment is not the first slice segment
# of the picture in decoding order.
is_first_slice = dat[rbsp_start] >> 7 & 1 == 1
if not is_first_slice:
# TODO: parse dependent_slice_segment_flag and slice_segment_address and get real slice_type
# for now since we don't use it return -1 for slice_type
return (-1, is_first_slice)
skip_bits += 1 # skip past first_slice_segment_in_pic_flag
if nal_unit_type >= HevcNalUnitType.BLA_W_LP and nal_unit_type <= HevcNalUnitType.RSV_IRAP_VCL23:
# 7.4.7.1 General slice segment header semantics
# no_output_of_prior_pics_flag affects the output of previously-decoded pictures in the decoded picture buffer after the
# decoding of an IDR or a BLA picture that is not the first picture in the bitstream as specified in Annex C.
skip_bits += 1 # skip past no_output_of_prior_pics_flag
# 7.4.7.1 General slice segment header semantics
# slice_pic_parameter_set_id specifies the value of pps_pic_parameter_set_id for the PPS in use.
# The value of slice_pic_parameter_set_id shall be in the range of 0 to 63, inclusive.
_, size = get_ue(dat, rbsp_start, skip_bits)
skip_bits += size # skip past slice_pic_parameter_set_id
# 7.4.3.3.1 General picture parameter set RBSP semanal_unit_lenntics
# num_extra_slice_header_bits specifies the number of extra slice header bits that are present in the slice header RBSP
# for coded pictures referring to the PPS. The value of num_extra_slice_header_bits shall be in the range of 0 to 2, inclusive,
# in bitstreams conforming to this version of this Specification. Other values for num_extra_slice_header_bits are reserved
# for future use by ITU-T | ISO/IEC. However, decoders shall allow num_extra_slice_header_bits to have any value.
# TODO: get from PPS_NUT pic_parameter_set_rbsp( ) for corresponding slice_pic_parameter_set_id
num_extra_slice_header_bits = 0
skip_bits += num_extra_slice_header_bits
# 7.4.7.1 General slice segment header semantics
# slice_type specifies the coding type of the slice according to Table 7-7.
# Table 7-7 - Name association to slice_type
# slice_type | Name of slice_type
# 0 | B (B slice)
# 1 | P (P slice)
# 2 | I (I slice)
# unsigned integer 0-th order Exp-Golomb-coded syntax element with the left bit first
slice_type, _ = get_ue(dat, rbsp_start, skip_bits)
if slice_type > 2:
raise VideoFileInvalid("slice_type must be 0, 1, or 2")
return slice_type, is_first_slice
def hevc_index(hevc_file_name: str, allow_corrupt: bool=False) -> Tuple[list, int, bytes]:
with FileReader(hevc_file_name) as f:
dat = f.read()
if len(dat) < NAL_UNIT_START_CODE_SIZE + 1:
raise VideoFileInvalid("data is too short")
prefix_dat = b""
frame_types = list()
try:
i = 1 # skip past first byte 0x00 (verified by get_hevc_nal_info)
while i < len(dat):
require_nal_unit_start(dat, i)
nal_unit_len = get_hevc_nal_unit_length(dat, i)
nal_unit_type = get_hevc_nal_unit_type(dat, i)
if nal_unit_type in HEVC_PARAMETER_SET_NAL_UNITS:
prefix_dat += dat[i:i+nal_unit_len]
elif nal_unit_type in HEVC_CODED_SLICE_SEGMENT_NAL_UNITS:
slice_type, is_first_slice = get_hevc_slice_type(dat, i, nal_unit_type)
if is_first_slice:
frame_types.append((slice_type, i))
i += nal_unit_len
except Exception:
if not allow_corrupt:
raise
return frame_types, len(dat), prefix_dat
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("input_file", type=str)
parser.add_argument("output_prefix_file", type=str)
parser.add_argument("output_index_file", type=str)
args = parser.parse_args()
frame_types, dat_len, prefix_dat = hevc_index(args.input_file)
with open(args.output_prefix_file, "wb") as f:
f.write(prefix_dat)
with open(args.output_index_file, "wb") as f:
for ft, fp in frame_types:
f.write(struct.pack("<II", ft, fp))
f.write(struct.pack("<II", 0xFFFFFFFF, dat_len))
if __name__ == "__main__":
main()

View File

@ -1 +0,0 @@
vidindex

View File

@ -1,6 +0,0 @@
CC := gcc
vidindex: bitstream.c bitstream.h vidindex.c
$(eval $@_TMP := $(shell mktemp))
$(CC) -std=c99 bitstream.c vidindex.c -o $($@_TMP)
mv $($@_TMP) $@

View File

@ -1,118 +0,0 @@
#include "./bitstream.h"
#include <stdbool.h>
#include <assert.h>
static const uint32_t BS_MASKS[33] = {
0, 0x1L, 0x3L, 0x7L, 0xFL, 0x1FL,
0x3FL, 0x7FL, 0xFFL, 0x1FFL, 0x3FFL, 0x7FFL,
0xFFFL, 0x1FFFL, 0x3FFFL, 0x7FFFL, 0xFFFFL, 0x1FFFFL,
0x3FFFFL, 0x7FFFFL, 0xFFFFFL, 0x1FFFFFL, 0x3FFFFFL, 0x7FFFFFL,
0xFFFFFFL, 0x1FFFFFFL, 0x3FFFFFFL, 0x7FFFFFFL, 0xFFFFFFFL, 0x1FFFFFFFL,
0x3FFFFFFFL, 0x7FFFFFFFL, 0xFFFFFFFFL};
void bs_init(struct bitstream* bs, const uint8_t* buffer, size_t input_size) {
bs->buffer_ptr = buffer;
bs->buffer_end = buffer + input_size;
bs->value = 0;
bs->pos = 0;
bs->shift = 8;
bs->size = input_size * 8;
}
uint32_t bs_get(struct bitstream* bs, int n) {
if (n > 32)
return 0;
bs->pos += n;
bs->shift += n;
while (bs->shift > 8) {
if (bs->buffer_ptr < bs->buffer_end) {
bs->value <<= 8;
bs->value |= *bs->buffer_ptr++;
bs->shift -= 8;
} else {
bs_seek(bs, bs->pos - n);
return 0;
// bs->value <<= 8;
// bs->shift -= 8;
}
}
return (bs->value >> (8 - bs->shift)) & BS_MASKS[n];
}
void bs_seek(struct bitstream* bs, size_t new_pos) {
bs->pos = (new_pos / 32) * 32;
bs->shift = 8;
bs->value = 0;
bs_get(bs, new_pos % 32);
}
uint32_t bs_peek(struct bitstream* bs, int n) {
struct bitstream bak = *bs;
return bs_get(&bak, n);
}
size_t bs_remain(struct bitstream* bs) {
return bs->size - bs->pos;
}
int bs_eof(struct bitstream* bs) {
return bs_remain(bs) == 0;
}
uint32_t bs_ue(struct bitstream* bs) {
static const uint8_t exp_golomb_bits[256] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
uint32_t bits, read = 0;
int bits_left;
uint8_t coded;
int done = 0;
bits = 0;
// we want to read 8 bits at a time - if we don't have 8 bits,
// read what's left, and shift. The exp_golomb_bits calc remains the
// same.
while (!done) {
bits_left = bs_remain(bs);
if (bits_left < 8) {
read = bs_peek(bs, bits_left) << (8 - bits_left);
done = 1;
} else {
read = bs_peek(bs, 8);
if (read == 0) {
bs_get(bs, 8);
bits += 8;
} else {
done = 1;
}
}
}
coded = exp_golomb_bits[read];
bs_get(bs, coded);
bits += coded;
// printf("ue - bits %d\n", bits);
return bs_get(bs, bits + 1) - 1;
}
int32_t bs_se(struct bitstream* bs) {
uint32_t ret;
ret = bs_ue(bs);
if ((ret & 0x1) == 0) {
ret >>= 1;
int32_t temp = 0 - ret;
return temp;
}
return (ret + 1) >> 1;
}

View File

@ -1,26 +0,0 @@
#ifndef bitstream_H
#define bitstream_H
#include <stddef.h>
#include <stdint.h>
struct bitstream {
const uint8_t *buffer_ptr;
const uint8_t *buffer_end;
uint64_t value;
uint32_t pos;
uint32_t shift;
size_t size;
};
void bs_init(struct bitstream *bs, const uint8_t *buffer, size_t input_size);
void bs_seek(struct bitstream *bs, size_t new_pos);
uint32_t bs_get(struct bitstream *bs, int n);
uint32_t bs_peek(struct bitstream *bs, int n);
size_t bs_remain(struct bitstream *bs);
int bs_eof(struct bitstream *bs);
uint32_t bs_ue(struct bitstream *bs);
int32_t bs_se(struct bitstream *bs);
#endif

View File

@ -1,307 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "./bitstream.h"
#define START_CODE 0x000001
static uint32_t read24be(const uint8_t* ptr) {
return (ptr[0] << 16) | (ptr[1] << 8) | ptr[2];
}
static void write32le(FILE *of, uint32_t v) {
uint8_t va[4] = {
v & 0xff, (v >> 8) & 0xff, (v >> 16) & 0xff, (v >> 24) & 0xff
};
fwrite(va, 1, sizeof(va), of);
}
// Table 7-1
enum hevc_nal_type {
HEVC_NAL_TYPE_TRAIL_N = 0,
HEVC_NAL_TYPE_TRAIL_R = 1,
HEVC_NAL_TYPE_TSA_N = 2,
HEVC_NAL_TYPE_TSA_R = 3,
HEVC_NAL_TYPE_STSA_N = 4,
HEVC_NAL_TYPE_STSA_R = 5,
HEVC_NAL_TYPE_RADL_N = 6,
HEVC_NAL_TYPE_RADL_R = 7,
HEVC_NAL_TYPE_RASL_N = 8,
HEVC_NAL_TYPE_RASL_R = 9,
HEVC_NAL_TYPE_BLA_W_LP = 16,
HEVC_NAL_TYPE_BLA_W_RADL = 17,
HEVC_NAL_TYPE_BLA_N_LP = 18,
HEVC_NAL_TYPE_IDR_W_RADL = 19,
HEVC_NAL_TYPE_IDR_N_LP = 20,
HEVC_NAL_TYPE_CRA_NUT = 21,
HEVC_NAL_TYPE_RSV_IRAP_VCL23 = 23,
HEVC_NAL_TYPE_VPS_NUT = 32,
HEVC_NAL_TYPE_SPS_NUT = 33,
HEVC_NAL_TYPE_PPS_NUT = 34,
HEVC_NAL_TYPE_AUD_NUT = 35,
HEVC_NAL_TYPE_EOS_NUT = 36,
HEVC_NAL_TYPE_EOB_NUT = 37,
HEVC_NAL_TYPE_FD_NUT = 38,
HEVC_NAL_TYPE_PREFIX_SEI_NUT = 39,
HEVC_NAL_TYPE_SUFFIX_SEI_NUT = 40,
};
// Table 7-7
enum hevc_slice_type {
HEVC_SLICE_B = 0,
HEVC_SLICE_P = 1,
HEVC_SLICE_I = 2,
};
static void hevc_index(const uint8_t *data, size_t file_size, FILE *of_prefix, FILE *of_index) {
const uint8_t* ptr = data;
const uint8_t* ptr_end = data + file_size;
assert(ptr[0] == 0);
ptr++;
assert(read24be(ptr) == START_CODE);
// pps. ignore for now
uint32_t num_extra_slice_header_bits = 0;
uint32_t dependent_slice_segments_enabled_flag = 0;
while (ptr < ptr_end) {
const uint8_t* next = ptr+1;
for (; next < ptr_end-4; next++) {
if (read24be(next) == START_CODE) break;
}
size_t nal_size = next - ptr;
if (nal_size < 6) {
break;
}
{
struct bitstream bs = {0};
bs_init(&bs, ptr, nal_size);
uint32_t start_code = bs_get(&bs, 24);
assert(start_code == 0x000001);
// nal_unit_header
uint32_t forbidden_zero_bit = bs_get(&bs, 1);
uint32_t nal_unit_type = bs_get(&bs, 6);
uint32_t nuh_layer_id = bs_get(&bs, 6);
uint32_t nuh_temporal_id_plus1 = bs_get(&bs, 3);
// if (nal_unit_type != 1) printf("%3d -- %3d %10d %lu\n", nal_unit_type, frame_num, (uint32_t)(ptr-data), nal_size);
switch (nal_unit_type) {
case HEVC_NAL_TYPE_VPS_NUT:
case HEVC_NAL_TYPE_SPS_NUT:
case HEVC_NAL_TYPE_PPS_NUT:
fwrite(ptr, 1, nal_size, of_prefix);
break;
case HEVC_NAL_TYPE_TRAIL_N:
case HEVC_NAL_TYPE_TRAIL_R:
case HEVC_NAL_TYPE_TSA_N:
case HEVC_NAL_TYPE_TSA_R:
case HEVC_NAL_TYPE_STSA_N:
case HEVC_NAL_TYPE_STSA_R:
case HEVC_NAL_TYPE_RADL_N:
case HEVC_NAL_TYPE_RADL_R:
case HEVC_NAL_TYPE_RASL_N:
case HEVC_NAL_TYPE_RASL_R:
case HEVC_NAL_TYPE_BLA_W_LP:
case HEVC_NAL_TYPE_BLA_W_RADL:
case HEVC_NAL_TYPE_BLA_N_LP:
case HEVC_NAL_TYPE_IDR_W_RADL:
case HEVC_NAL_TYPE_IDR_N_LP:
case HEVC_NAL_TYPE_CRA_NUT: {
// slice_segment_header
uint32_t first_slice_segment_in_pic_flag = bs_get(&bs, 1);
if (nal_unit_type >= HEVC_NAL_TYPE_BLA_W_LP && nal_unit_type <= HEVC_NAL_TYPE_RSV_IRAP_VCL23) {
uint32_t no_output_of_prior_pics_flag = bs_get(&bs, 1);
}
uint32_t slice_pic_parameter_set_id = bs_get(&bs, 1);
if (!first_slice_segment_in_pic_flag) {
// ...
break;
}
if (!dependent_slice_segments_enabled_flag) {
for (int i=0; i<num_extra_slice_header_bits; i++) {
bs_get(&bs, 1);
}
uint32_t slice_type = bs_ue(&bs);
// write the index
write32le(of_index, slice_type);
write32le(of_index, ptr - data);
// ...
}
break;
}
}
//...
// emulation_prevention_three_byte
}
ptr = next;
}
write32le(of_index, -1);
write32le(of_index, file_size);
}
// Table 7-1
enum h264_nal_type {
H264_NAL_SLICE = 1,
H264_NAL_DPA = 2,
H264_NAL_DPB = 3,
H264_NAL_DPC = 4,
H264_NAL_IDR_SLICE = 5,
H264_NAL_SEI = 6,
H264_NAL_SPS = 7,
H264_NAL_PPS = 8,
H264_NAL_AUD = 9,
H264_NAL_END_SEQUENCE = 10,
H264_NAL_END_STREAM = 11,
H264_NAL_FILLER_DATA = 12,
H264_NAL_SPS_EXT = 13,
H264_NAL_AUXILIARY_SLICE = 19,
};
enum h264_slice_type {
H264_SLICE_P = 0,
H264_SLICE_B = 1,
H264_SLICE_I = 2,
// ...
};
static void h264_index(const uint8_t *data, size_t file_size, FILE *of_prefix, FILE *of_index) {
const uint8_t* ptr = data;
const uint8_t* ptr_end = data + file_size;
assert(ptr[0] == 0);
ptr++;
assert(read24be(ptr) == START_CODE);
uint32_t sps_log2_max_frame_num_minus4;
int last_frame_num = -1;
while (ptr < ptr_end) {
const uint8_t* next = ptr+1;
for (; next < ptr_end-4; next++) {
if (read24be(next) == START_CODE) break;
}
size_t nal_size = next - ptr;
if (nal_size < 5) {
break;
}
{
struct bitstream bs = {0};
bs_init(&bs, ptr, nal_size);
uint32_t start_code = bs_get(&bs, 24);
assert(start_code == 0x000001);
// nal_unit_header
uint32_t forbidden_zero_bit = bs_get(&bs, 1);
uint32_t nal_ref_idx = bs_get(&bs, 2);
uint32_t nal_unit_type = bs_get(&bs, 5);
switch (nal_unit_type) {
case H264_NAL_SPS:
{
uint32_t profile_idx = bs_get(&bs, 8);
uint32_t constraint_sets = bs_get(&bs, 4);
uint32_t reserved = bs_get(&bs, 5);
uint32_t level_idc = bs_get(&bs, 5);
uint32_t seq_parameter_set_id = bs_ue(&bs);
sps_log2_max_frame_num_minus4 = bs_ue(&bs);
}
// fallthrough
case H264_NAL_PPS:
fwrite(ptr, 1, nal_size, of_prefix);
break;
case H264_NAL_SLICE:
case H264_NAL_IDR_SLICE: {
// slice header
uint32_t first_mb_in_slice = bs_ue(&bs);
uint32_t slice_type = bs_ue(&bs);
uint32_t pic_parameter_set_id = bs_ue(&bs);
uint32_t frame_num = bs_get(&bs, sps_log2_max_frame_num_minus4+4);
if (first_mb_in_slice == 0) {
write32le(of_index, slice_type);
write32le(of_index, ptr - data);
}
break;
}
}
}
ptr = next;
}
write32le(of_index, -1);
write32le(of_index, file_size);
}
int main(int argc, char** argv) {
if (argc != 5) {
fprintf(stderr, "usage: %s h264|hevc file_path out_prefix out_index\n", argv[0]);
exit(1);
}
const char* file_type = argv[1];
const char* file_path = argv[2];
int fd = open(file_path, O_RDONLY, 0);
if (fd < 0) {
fprintf(stderr, "error: couldn't open %s\n", file_path);
exit(1);
}
FILE *of_prefix = fopen(argv[3], "wb");
assert(of_prefix);
FILE *of_index = fopen(argv[4], "wb");
assert(of_index);
off_t file_size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_SET);
assert(file_size > 4);
const uint8_t* data = (const uint8_t*)mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
assert(data != MAP_FAILED);
if (strcmp(file_type, "hevc") == 0) {
hevc_index(data, file_size, of_prefix, of_index);
} else if (strcmp(file_type, "h264") == 0) {
h264_index(data, file_size, of_prefix, of_index);
} else {
assert(false);
}
munmap((void*)data, file_size);
close(fd);
return 0;
}