Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/advanced-topics/fuzz_introspector.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ seconds.
$ python3 infra/helper.py introspector libdwarf --seconds=30
```

If the above command was succesful, you should see output along the lines of:
If the above command was successful, you should see output along the lines of:

```bash
INFO:root:To browse the report, run: python3 -m http.server 8008 --directory /home/my_user/oss-fuzz/build/out/libdwarf/introspector-report/inspector and navigate to localhost:8008/fuzz_report.html in your browser
Expand All @@ -91,7 +91,7 @@ and will extract code coverage based on the publicly available corpora.
$ python3 infra/helper.py introspector libdwarf --public-corpora
```

Assuming the above command is succesful you can view the report using `python3 -m http.server`
Assuming the above command is successful you can view the report using `python3 -m http.server`
following the example described above.


Expand Down
235 changes: 235 additions & 0 deletions projects/vlc/generate_seeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# generate_seeds.py <fuzz-corpus-root>

import os
import re
import struct
import sys

Expand Down Expand Up @@ -2968,6 +2969,239 @@ def gen_mkv(root):
_build_mkv_dvd_chapters_seed())


# ──────────────────────────────────────────────────
# MP4 extras (modules/demux/mp4/libmp4.c)
# ──────────────────────────────────────────────────
#
# The upstream vlc-fuzz-corpus seeds in seeds/mp4/ (aac_audio.mp4,
# avc_video.mp4, fragmented.mp4, with_sidx.mp4, …) exercise the common
# ftyp/moov/trak/mdia/stbl tree, leaving several specialized libmp4.c
# parsers at 0% coverage in the production OSS-Fuzz report:
#
# * MP4_ReadBox_st3d / prhd / equi / cbmp — spherical/VR metadata
# (sv3d > proj > {prhd,equi,cbmp}; st3d at any depth)
# * MP4_ReadBox_tfrf / tfxd / XML360 — Smooth Streaming /
# Google360 uuid-typed boxes routed through MP4_ReadBox_uuid
# * MP4_ReadBox_urn — DataReference 'urn '
# variant; the upstream corpus only uses 'url '
#
# The seeds below place those boxes directly under a minimal moov so
# MP4_BoxGetRoot walks them during demux_New, even though mp4.c::Open
# subsequently fails (no trak with ES). Box parsing completes before
# that failure, which is the only requirement for hitting the parsers.
# The mp4 dictionary is also enlarged from 3 tokens to ~200 by
# harvesting every ATOM_xxxx 4CC define from libmp4.h so libfuzzer
# mutation has a chance of synthesizing the dispatch keys.

UUID_TFRF = bytes([0xd4, 0x80, 0x7e, 0xf2, 0xca, 0x39, 0x46, 0x95,
0x8e, 0x54, 0x26, 0xcb, 0x9e, 0x46, 0xa7, 0x9f])
UUID_TFXD = bytes([0x6d, 0x1d, 0x9b, 0x05, 0x42, 0xd5, 0x44, 0xe6,
0x80, 0xe2, 0x14, 0x1d, 0xaf, 0xf7, 0x57, 0xb2])
UUID_XML360 = bytes([0xff, 0xcc, 0x82, 0x63, 0xf8, 0x55, 0x4a, 0x93,
0x88, 0x14, 0x58, 0x7a, 0x02, 0x52, 0x1f, 0xdd])


def mp4_uuid_box(uuid: bytes, payload: bytes) -> bytes:
assert len(uuid) == 16
return box(b'uuid', uuid + payload)


def _mp4_ftyp_mp42() -> bytes:
# mp42/isom brands fall through the default branch in mp4.c::Open;
# heic/heix/mif1/jpeg/avci/avif/f4v are explicitly diverted to the
# heif submodule and would cause our seed to be rejected outright.
return box(b'ftyp', b'mp42' + struct.pack('>I', 0) + b'mp42isom')


def _mp4_mvhd_minimal() -> bytes:
body = struct.pack('>II', 0, 0)
body += struct.pack('>II', 1000, 0)
body += struct.pack('>I', 0x00010000)
body += struct.pack('>H', 0x0100)
body += bytes(10)
body += struct.pack('>9I',
0x00010000, 0, 0,
0, 0x00010000, 0,
0, 0, 0x40000000)
body += bytes(24)
body += struct.pack('>I', 2)
return fullbox(b'mvhd', 0, 0, body)


def seed_mp4_spherical() -> bytes:
"""Drives MP4_ReadBox_st3d / prhd / equi / cbmp by carrying the
sv3d > proj > {prhd,equi,cbmp} chain plus a sibling st3d. sv3d
and st3d both have i_parent=0 in the dispatch table so they
parse at any depth; placing them under moov keeps the seed
small."""
prhd = fullbox(b'prhd', 0, 0,
struct.pack('>iii', 0, 0, 0))
equi = fullbox(b'equi', 0, 0,
struct.pack('>IIII', 0, 0, 0, 0))
cbmp = fullbox(b'cbmp', 0, 0,
struct.pack('>II', 0, 0))
proj = box(b'proj', prhd + equi + cbmp)
sv3d = box(b'sv3d', proj)
st3d = fullbox(b'st3d', 0, 0, bytes([0x00]))
moov = box(b'moov', _mp4_mvhd_minimal() + sv3d + st3d)
return _mp4_ftyp_mp42() + moov


def seed_mp4_uuid_boxes() -> bytes:
"""Hits MP4_ReadBox_uuid's UUID-dispatch ladder for the three
handled extended types: TfrfBoxUUID, TfxdBoxUUID, XML360BoxUUID.
MP4_ReadBox_tfrf / tfxd / XML360 are all 0% covered in the
production report."""
tfrf_payload = (bytes([0x00, 0x00, 0x00, 0x00])
+ bytes([0x01])
+ struct.pack('>II', 0, 100))
tfrf = mp4_uuid_box(UUID_TFRF, tfrf_payload)

tfxd_payload = (bytes([0x00, 0x00, 0x00, 0x00])
+ struct.pack('>II', 0, 100))
tfxd = mp4_uuid_box(UUID_TFXD, tfxd_payload)

xml360 = mp4_uuid_box(
UUID_XML360,
b'<rdf:Description'
b' xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"'
b' xmlns:GSpherical="http://ns.google.com/videos/1.0/spherical/"'
b' GSpherical:Spherical="true"'
b' GSpherical:Stitched="true"'
b' GSpherical:ProjectionType="equirectangular"/>\x00')

moov = box(b'moov', _mp4_mvhd_minimal() + tfrf + tfxd + xml360)
return _mp4_ftyp_mp42() + moov


def seed_mp4_dref_urn() -> bytes:
"""Reaches MP4_ReadBox_urn by emitting a urn entry inside a dref
at moov-root. Both dref and urn carry i_parent=0 in the dispatch
table so they parse outside the usual trak/mdia/minf/dinf chain."""
urn = fullbox(b'urn ', 0, 0,
b'urn:example:fuzz\x00urn:example:loc\x00')
dref_payload = struct.pack('>I', 1) + urn
dref = fullbox(b'dref', 0, 0, dref_payload)
moov = box(b'moov', _mp4_mvhd_minimal() + dref)
return _mp4_ftyp_mp42() + moov


MP4_EXTRA_SEEDS = {
'spherical.mp4': seed_mp4_spherical,
'uuid_boxes.mp4': seed_mp4_uuid_boxes,
'dref_urn.mp4': seed_mp4_dref_urn,
}


# Curated fallback used when libmp4.h isn't readable (e.g. running
# generate_seeds.py outside the container). Kept in sync with the
# dispatch table in libmp4.c (5044+) but trimmed to atoms whose
# 4CCs are printable ASCII and parser-relevant.
_MP4_FALLBACK_ATOMS = [
# Structural / brand
'ftyp', 'moov', 'foov', 'moof', 'mdat', 'free', 'skip', 'wide',
'udta', 'pnot', 'pict', 'uuid', 'styp', 'cmov', 'dcom', 'cmvd',
'sidx',
# Track / media
'trak', 'tkhd', 'tref', 'load', 'mdia', 'mdhd', 'hdlr', 'minf',
'vmhd', 'smhd', 'hmhd', 'nmhd', 'dinf', 'dref', 'url ', 'urn ',
'stbl', 'elst', 'edts', 'mvhd', 'iods',
# Sample table
'stsd', 'stts', 'stsc', 'stsz', 'stz2', 'stco', 'co64', 'ctts',
'cslg', 'stss', 'stsh', 'sdtp', 'padb', 'stps',
# Sample groups / aux
'sbgp', 'sgpd', 'saio', 'saiz',
# Movie extension / fragments
'mvex', 'mehd', 'trex', 'leva', 'moof', 'mfhd', 'traf', 'tfhd',
'trun', 'tfdt', 'tfra', 'mfra', 'mfro', 'sidx', 'prft', 'emsg',
'subs',
# User data / metadata
'name', 'kind', 'chap', 'sync', 'hint', 'cont', 'alis', 'rsrc',
'gnre', 'covr', 'tags', 'ilst', 'data', 'mean', 'keys', 'chpl',
'ID32', 'hdr3', 'mvcg', 'mvci',
# Visual sample entries / codec config
'avc1', 'avc3', 'avc4', 'hvc1', 'hev1', 'hvt1', 'lhv1', 'av01',
'vp08', 'vp09', 'mp4v', 'mp4a', 'jpeg', 'jpgC', 'jp2 ', 'mjp2',
'btrt', 'avcC', 'hvcC', 'av1C', 'dvcC', 'dvvC', 'lhvC', 'vpcC',
'fiel', 'pasp', 'colr', 'clap', 'esds', 'jpeC', 'dac3', 'dec3',
'enda', 'chnl', 'chan', 'mhaC', 'mhap', 'iso2', 'iso3', 'iso6',
# HDR / display
'clli', 'mdcv', 'smdm', 'coll',
# Spherical / VR
'sv3d', 'st3d', 'proj', 'prhd', 'equi', 'cbmp', 'svhd',
# Encryption
'sinf', 'frma', 'schm', 'schi', 'tenc', 'pssh', 'senc', 'sbgp',
'sgpd', 'cbcs', 'cbc1', 'cenc', 'cens',
# HEIF / item-based
'meta', 'pitm', 'iinf', 'infe', 'iloc', 'iref', 'dimg', 'thmb',
'cdsc', 'auxl', 'iprp', 'ipco', 'ipma', 'ispe', 'pixi', 'irot',
'imir', 'idat', 'grid', 'iovl', 'iden', 'hvcC', 'avcC', 'av1C',
'jpeC', 'lhvC',
# Apple / QuickTime / metadata atom IDs
'wave', 'alac', 'in24', 'in32', 'lpcm', 'sowt', 'twos', 'ulaw',
'alaw', 'samr', 'sawb', 'sawp', '.mp3', '.MP3', 'ms\x00\x55',
# Branding strings frequently checked in mp4.c
'mp42', 'mp41', 'isom', 'iso2', 'iso6', 'iso8', 'qt ', '3gp4',
'3gp5', 'M4A ', 'M4V ', 'mp71', 'avif', 'avis', 'heic', 'heix',
'mif1', 'msf1', 'dash', 'cmfc', 'piff', 'CAEP', 'caaa', 'caqv',
'crsm', 'cvmp', 'sams', 'msnv', 'm4a ',
# Misc / 3GPP / Nero / Smooth streaming
'tfrf', 'tfxd', 'kind', 'load', 'rmra', 'rmcs', 'rmdr',
'rmla', 'rmvc', 'rmqu', 'rmcd', 'rdrf', 'WLOC', 'WCOL',
'WTRK', 'WSEL',
]


def _harvest_libmp4_atoms() -> list:
"""Parse libmp4.h for ATOM_xxx VLC_FOURCC(..) defines so the mp4
dictionary stays in sync with the source. Falls back to the
curated _MP4_FALLBACK_ATOMS when libmp4.h isn't reachable
(running outside the container during development)."""
here = os.path.dirname(os.path.abspath(__file__))
candidates = [
os.path.join(here, 'vlc', 'modules', 'demux', 'mp4', 'libmp4.h'),
'/src/vlc/modules/demux/mp4/libmp4.h',
]
path = next((p for p in candidates if os.path.exists(p)), None)
if path is None:
return list(_MP4_FALLBACK_ATOMS)
tokens = set()
pat = re.compile(
r"VLC_FOURCC\(\s*'(.)'\s*,\s*'(.)'\s*,\s*'(.)'\s*,\s*'(.)'\s*\)")
with open(path) as f:
for line in f:
for m in pat.finditer(line):
tokens.add(''.join(m.groups()))
# Always include the curated set so spherical/uuid/etc. tokens
# survive even if libmp4.h evolves and renames atoms.
tokens.update(_MP4_FALLBACK_ATOMS)
return sorted(tokens)


def mp4_dictionary() -> str:
tokens = _harvest_libmp4_atoms()
lines = ['# MP4 / ISOBMFF box / brand tokens harvested from libmp4.h']
for t in tokens:
# libFuzzer rejects unprintable bytes outside \x escapes, and
# rejects unbalanced quotes; encode every byte as \xHH.
encoded = ''.join('\\x%02x' % b for b in t.encode('latin-1'))
lines.append('"' + encoded + '"')
return '\n'.join(lines) + '\n'


def gen_mp4_extras(root):
seed_dir = os.path.join(root, 'seeds', 'mp4')
dict_dir = os.path.join(root, 'dictionaries')
os.makedirs(seed_dir, exist_ok=True)
os.makedirs(dict_dir, exist_ok=True)
for filename, gen in MP4_EXTRA_SEEDS.items():
data = gen()
_write(os.path.join(seed_dir, filename), data)
with open(os.path.join(dict_dir, 'mp4.dict'), 'w') as f:
f.write(mp4_dictionary())
print(' dictionaries/mp4.dict written')


# ──────────────────────────────────────────────────
# main
# ──────────────────────────────────────────────────
Expand All @@ -2991,6 +3225,7 @@ def main():
gen_heif_extra(root)
gen_ogg(root)
gen_mkv(root)
gen_mp4_extras(root)


if __name__ == '__main__':
Expand Down
Loading