Hi! Today I decided to fuzz html-tokenizer which you can find here: https://github.com/Shopify/html_tokenizer
Ok, so I think that we can maybe use afl-ruby and stuff and sure, that works, but there is no coverage. Also afl-ruby adds a ton of overhead when fuzzing even with its own forkserver and stuff.
Another idea which I have is that instead of compiling the extension as a shared library, we should just compile it as a standalone program. For that we need to modify the makefile and stuff.
Yeah, that is definitely the way to go.
I modified the Makefile generated by mkmf and currently my makefile looks like this:
SHELL = /bin/sh
# V=0 quiet, V=1 verbose. other values don't work.
V = 0
Q1 = $(V:1=)
Q = $(Q1:0=@)
ECHO1 = $(V:1=@ :)
ECHO = $(ECHO1:0=@ echo)
NULLCMD = :
#### Start of system configuration section. ####
srcdir = .
topdir = /usr/include/ruby-3.0.0
hdrdir = $(topdir)
arch_hdrdir = /usr/include/x86_64-linux-gnu/ruby-3.0.0
PATH_SEPARATOR = :
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
prefix = $(DESTDIR)/usr
rubysitearchprefix = $(sitearchlibdir)/$(RUBY_BASE_NAME)
rubyarchprefix = $(archlibdir)/$(RUBY_BASE_NAME)
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
exec_prefix = $(prefix)
vendorarchhdrdir = $(sitearchincludedir)/$(RUBY_VERSION_NAME)/vendor_ruby
sitearchhdrdir = $(sitearchincludedir)/$(RUBY_VERSION_NAME)/site_ruby
rubyarchhdrdir = $(archincludedir)/$(RUBY_VERSION_NAME)
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
sitehdrdir = $(rubyhdrdir)/site_ruby
rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
vendorarchdir = $(rubysitearchprefix)/vendor_ruby/$(ruby_version)
vendorlibdir = $(vendordir)/$(ruby_version)
vendordir = $(rubylibprefix)/vendor_ruby
sitearchdir = $(DESTDIR)/usr/local/lib/x86_64-linux-gnu/site_ruby
sitelibdir = $(sitedir)/$(ruby_version)
sitedir = $(DESTDIR)/usr/local/lib/site_ruby
rubyarchdir = $(rubyarchprefix)/$(ruby_version)
rubylibdir = $(rubylibprefix)/$(ruby_version)
sitearchincludedir = $(includedir)/$(sitearch)
archincludedir = $(includedir)/$(arch)
sitearchlibdir = $(libdir)/$(sitearch)
archlibdir = $(libdir)/$(arch)
ridir = $(datarootdir)/$(RI_BASE_NAME)
mandir = $(datarootdir)/man
localedir = $(datarootdir)/locale
libdir = $(exec_prefix)/lib
psdir = $(docdir)
pdfdir = $(docdir)
dvidir = $(docdir)
htmldir = $(docdir)
infodir = $(datarootdir)/info
docdir = $(datarootdir)/doc/$(PACKAGE)
oldincludedir = $(DESTDIR)/usr/include
includedir = $(prefix)/include
runstatedir = $(DESTDIR)/var/run
localstatedir = $(DESTDIR)/var
sharedstatedir = $(prefix)/com
sysconfdir = $(DESTDIR)/etc
datadir = $(datarootdir)
datarootdir = $(prefix)/share
libexecdir = $(exec_prefix)/libexec
sbindir = $(exec_prefix)/sbin
bindir = $(exec_prefix)/bin
archdir = $(rubyarchdir)
CC_WRAPPER =
CC = x86_64-linux-gnu-gcc
CXX = x86_64-linux-gnu-g++
LIBRUBY = $(LIBRUBY_SO)
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
empty =
OUTFLAG = -o $(empty)
COUTFLAG = -o $(empty)
CSRCFLAG = $(empty)
RUBY_EXTCONF_H =
cflags = $(optflags) $(debugflags) $(warnflags)
cxxflags =
optflags = -O3
debugflags = -ggdb3
warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable
cppflags =
CCDLFLAGS = -fPIC
CFLAGS = $(CCDLFLAGS) -g -O2 -ffile-prefix-map=/build/ruby3.0-ohOwi0/ruby3.0-3.0.2=. -fstack-protector-strong -Wformat -Werror=format-security -fPIC -g -O1 -ggdb $(ARCH_FLAG)
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
DEFS =
CPPFLAGS = -Wdate-time -D_FORTIFY_SOURCE=2 $(DEFS) $(cppflags)
CXXFLAGS = $(CCDLFLAGS) -g -O2 -ffile-prefix-map=/build/ruby3.0-ohOwi0/ruby3.0-3.0.2=. -fstack-protector-strong -Wformat -Werror=format-security -std=c++11 -g -O1 -ggdb $(ARCH_FLAG)
ldflags = -L. -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic
dldflags = -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-z,now
ARCH_FLAG =
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
LDSHARED = $(CC) -shared
LDSHAREDXX = $(CXX) -shared
AR = x86_64-linux-gnu-gcc-ar
EXEEXT =
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)3.0
RUBY_SO_NAME = ruby-3.0
RUBYW_INSTALL_NAME =
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
RUBYW_BASE_NAME = rubyw
RUBY_BASE_NAME = ruby
arch = x86_64-linux-gnu
sitearch = $(arch)
ruby_version = 3.0.0
ruby = $(bindir)/$(RUBY_BASE_NAME)3.0
RUBY = $(ruby)
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
RM = rm -f
RM_RF = $(RUBY) -run -e rm -- -rf
RMDIRS = rmdir --ignore-fail-on-non-empty -p
MAKEDIRS = /usr/bin/mkdir -p
INSTALL = /usr/bin/install -c
INSTALL_PROG = $(INSTALL) -m 0755
INSTALL_DATA = $(INSTALL) -m 644
COPY = cp
TOUCH = exit >
#### End of system configuration section. ####
preload =
libpath = . $(archlibdir)
LIBPATH = -L. -L$(archlibdir)
DEFFILE =
CLEANFILES = mkmf.log
DISTCLEANFILES =
DISTCLEANDIRS =
extout =
extout_prefix =
target_prefix =
LOCAL_LIBS =
LIBS = $(LIBRUBYARG_SHARED) -lm -lc
ORIG_SRCS = html_tokenizer.c parser.c tokenizer.c
SRCS = $(ORIG_SRCS)
OBJS = html_tokenizer.o parser.o tokenizer.o
HDRS = $(srcdir)/html_tokenizer.h $(srcdir)/parser.h $(srcdir)/tokenizer.h
LOCAL_HDRS =
TARGET = html_tokenizer_ext
TARGET_NAME = html_tokenizer_ext
TARGET_ENTRY = Init_$(TARGET_NAME)
#DLLIB = $(TARGET).so
DLLIB=fuzzer
EXTSTATIC =
STATIC_LIB =
TIMESTAMP_DIR = .
BINDIR = $(bindir)
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
HDRDIR = $(sitehdrdir)$(target_prefix)
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
TARGET_SO_DIR =
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
CLEANLIBS = $(TARGET_SO)
CLEANOBJS = *.o *.bak
all: $(DLLIB)
static: $(STATIC_LIB)
.PHONY: all install static install-so install-rb
.PHONY: clean clean-so clean-static clean-rb
clean-static::
clean-rb-default::
clean-rb::
clean-so::
clean: clean-so clean-static clean-rb-default clean-rb
-$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
distclean-rb-default::
distclean-rb::
distclean-so::
distclean-static::
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
realclean: distclean
install: install-so install-rb
install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
clean-static::
-$(Q)$(RM) $(STATIC_LIB)
install-rb: pre-install-rb do-install-rb install-rb-default
install-rb-default: pre-install-rb-default do-install-rb-default
pre-install-rb: Makefile
pre-install-rb-default: Makefile
do-install-rb:
do-install-rb-default:
pre-install-rb-default:
@$(NULLCMD)
$(TIMESTAMP_DIR)/.sitearchdir.time:
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
$(Q) $(TOUCH) $@
site-install: site-install-so site-install-rb
site-install-so: install-so
site-install-rb: install-rb
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
.cc.o:
$(ECHO) compiling $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.cc.S:
$(ECHO) translating $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
.mm.o:
$(ECHO) compiling $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.mm.S:
$(ECHO) translating $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
.cxx.o:
$(ECHO) compiling $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.cxx.S:
$(ECHO) translating $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
.cpp.o:
$(ECHO) compiling $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.cpp.S:
$(ECHO) translating $(<)
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
.c.o:
$(ECHO) compiling $(<)
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.c.S:
$(ECHO) translating $(<)
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
.m.o:
$(ECHO) compiling $(<)
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
.m.S:
$(ECHO) translating $(<)
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
$(TARGET_SO): $(OBJS) Makefile
$(ECHO) linking shared-object $(DLLIB)
-$(Q)$(RM) $(@)
# $(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
$(Q) $(CC) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
$(OBJS): $(HDRS) $(ruby_headers)
and it compiles us a neat little executable called “fuzzer” . I of course had to modify the parser.c file to include a “main” function.
I actually had a bit of trouble just figuring out how to call a method of an object, however thankfully I discovered this: https://blog.peterzhu.ca/ruby-c-ext-part-6/ . That and the entire series was quite helpful in this quest.
Here is the current harness code:
// Main fuzzer
int main(int argc, char** argv) {
// printf("Called main!\n");
ruby_init();
VALUE cFoo = rb_define_class("Foo", rb_cObject);
// rb_define_class_under(mod, "Example1", rb_cObject);
rb_define_alloc_func(cFoo, parser_allocate);
rb_define_method(cFoo, "initialize", parser_initialize_method, 0);
VALUE args[1];
// args[0] = INT2NUM((int)strlen(e2->name));
//args[0] = INT2NUM(123);
VALUE x;
x = rb_str_new_cstr("Hello, world!");
// parser_parse_method(x,x);
//VALUE e1 = rb_class_new_instance(1, args, cFoo);
VALUE obj = rb_class_new_instance(0, NULL, cFoo);
// Now print address of object in memory.
printf("Object address: %lx\n", obj);
// rb_funcall(e1, parser_initialize_method, 0);
return ruby_cleanup(0);
}
now we just need to call the method on that object. Should be easy right?
Sooo, rb_funcall(obj, parser_initialize_method, 0);
results in a coredump, but rb_funcall(obj, rb_intern("initialize"), 0);
seems to run fine.
Ok, so now the very last step is to try to call the parse function????
3 hours later…
Ok, so I think that I finally have a working thing. Look at this:
// Main fuzzer
#define FUZZ_LOOP_COUNT 100000
__AFL_FUZZ_INIT();
static VALUE mHtmlTokenizer = Qnil;
int main(int argc, char** argv) {
ruby_init();
__AFL_INIT();
unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
while (__AFL_LOOP(FUZZ_LOOP_COUNT)) {
int len = __AFL_FUZZ_TESTCASE_LEN;
VALUE cFoo = rb_define_class("Foo", rb_cObject);
rb_define_alloc_func(cFoo, parser_allocate);
rb_define_method(cFoo, "initialize", parser_initialize_method, 0); // One argument
// Now define the other method:
// rb_define_method(cParser, "parse", parser_parse_method, 1);
rb_define_method(cFoo, "parse", parser_parse_method, 1);
VALUE x;
//x = rb_str_new_cstr("<div>"); // Example html string.
x = rb_str_new_cstr(buf); // Create string from fuzz buffer
VALUE obj = rb_class_new_instance(0, NULL, cFoo);
rb_funcall(obj, rb_intern("initialize"), 0);
// Now try to parse.
//printf("Now trying to call parse!!!!\n");
//printf("Here is the buffer %s\n", buf);
rb_funcall(obj, rb_intern("parse"), 1, x);
//printf("Done!\n");
}
return ruby_cleanup(0);
}
this seems to work decent, so I am going to just use this. (While debugging I actually made a quick stackoverflow post here: https://stackoverflow.com/questions/78208606/how-to-properly-call-an-object-method-in-ruby-c-api and then I answered my own question.)
Any crashes??? Weeeellll… no. After fuzzing overnight my fuzzer found no crashes. I guess that is a good thing, because that means that this library is secure and well tested. Great!!
Well thank you for reading!