use strict; use warnings; package App::HTTP_Proxy_IMP; our $VERSION = '0.958'; use fields ( 'addr', # \@addr to listen on 'impns', # \@namespace for IMP plugins 'filter', # \@plugins to load 'logrx', # regexp for filtering log messages 'pcapdir', # dir to store pcap files of requests 'mitm_ca', # file containing cert and key of proxy cert 'capath', # path to CA to verify server cert 'no_check_certificate', # don't check server certificates 'childs', # use this number of childs ( 0 = don't fork) 'max_connect_per_child', # max number of connections before child exits ); use App::HTTP_Proxy_IMP::IMP; use App::HTTP_Proxy_IMP::Conn; use App::HTTP_Proxy_IMP::Request; use App::HTTP_Proxy_IMP::Relay; use AnyEvent; use Getopt::Long qw(:config posix_default bundling); use App::HTTP_Proxy_IMP::Debug qw(debug $DEBUG $DEBUG_RX); use Net::Inspect::Debug qw(%TRACE); use IO::Socket::SSL::Intercept; use IO::Socket::SSL::Utils; use Carp 'croak'; use POSIX '_exit'; # try IPv6 using IO::Socket::IP or IO::Socket::INET6 # fallback to IPv4 only my $sockclass; BEGIN { for(qw( IO::Socket::IP IO::Socket::INET6 IO::Socket::INET )) { if ( eval "require $_" ) { $sockclass = $_; last; } } $sockclass or die "cannot find usable socket class"; } sub new { my ($class,@args) = @_; my $self = fields::new($class); $self->{impns} = [qw(App::HTTP_Proxy_IMP::IMP Net::IMP::HTTP Net::IMP)]; %$self = ( %$self, %{ shift(@args) }) if @args && ref($args[0]); $self->getoptions(@args) if @args; return $self; } sub start { my $self = shift; $self = $self->new(@_) or return if ! ref($self); # package->start my $pcapdir = $self->{pcapdir}; if ( $pcapdir ) { croak("pcap directory not writeable") unless -d $pcapdir && -w _; eval { require Net::PcapWriter } or croak( "cannot load Net::PcapWriter, which is needed with --pcapdir option"); } my $mitm; if ( my $f = $self->{mitm_ca} ) { my $serial = 1; my $cache = {}; my $cachedir = "$f.cache"; if ( -d $cachedir || mkdir($cachedir,0700)) { for my $f (glob("$cachedir/*.pem")) { -f $f && -r _ && -s _ or next; my $time = (stat(_))[9]; my $key = PEM_file2key($f) or next; my $cert = PEM_file2cert($f) or next; my $sn = CERT_asHash($cert)->{serial}; $serial = $sn+1 if $sn>=$serial; my ($id) = $f=~m{/([^/]+)\.pem$}; $cache->{$id} = { cert => $cert, key => $key, atime => $time, }; debug("loaded certificate id=$id from cache"); } my $cache_hash = $cache; $cache = sub { my $id = shift; my $e; if ( ! @_ ){ # get $e = $cache_hash->{$id} or return; } else { my ($cert,$key) = @_; $e = $cache_hash->{$id} = { cert => $cert, key => $key, }; } my $f = "$cachedir/$id.pem"; if ( @_ || ! -f $f and open( my $fh,">",$f )) { debug("save mitm certificate and key to $cachedir/$id.pem"); print $fh PEM_cert2string($e->{cert}), PEM_key2string($e->{key}) } else { utime(undef,undef,$f); } $e->{atime} = time(); return ($e->{cert},$e->{key}); }; } $mitm = IO::Socket::SSL::Intercept->new( proxy_cert_file => $f, proxy_key_file => $f, cache => $cache, serial => $serial, ); } my $imp_factory; my $filter = $self->{filter}; if ($filter && @$filter ) { my $ns = $self->{impns}; my @mod; my $ev = App::HTTP_Proxy_IMP::EventLoop->new; for my $f (@$filter) { if ( ref($f) ) { # already factory object push @mod,$f; next; } my $f = $f; # copy my $args = $f =~s{=(.*)}{} && $1; my $found; for my $prefix ('', map { "${_}::" } @$ns) { my $mod = $prefix.$f; if ( eval "require $mod" ) { $found = $mod; last; } } croak("IMP module $f could not be loaded: $@") if ! $found; my %args = $args ? $found->str2cfg($args) :(); my @err = $found->validate_cfg(%args); die "bad config for $found: @err" if @err; push @mod, $found->new_factory(%args, eventlib => $ev ) } my $logsub = $self->{logrx} && do { my $rx = $self->{logrx}; sub { my ($level,$msg,$dir,$off,$len) = @_; $level =~ $rx or return; print STDERR "[$level]($dir:$off,$len) $msg\n"; }; }; $imp_factory = App::HTTP_Proxy_IMP::IMP->new_factory( mod => \@mod, logsub => $logsub, ); } if ( $self->{childs} ) { $self->{childs} = [ map { undef } (1..$self->{childs}) ]; } my $capath; if ( ! $mitm ) { # no interception = no certificate checking } elsif ( $self->{no_check_certificate} ) { # no certificate checking } elsif ( $capath = $self->{capath} ) { # use this capath } else { # try to guess capath if ( eval { require Mozilla::CA } ) { $capath = Mozilla::CA::SSL_ca_file(); } elsif ( glob("/etc/ssl/certs/*.pem") ) { $capath = "/etc/ssl/certs"; } elsif ( -f "/etc/ssl/certs.pem" && -r _ && -s _ ) { $capath = "/etc/ssl/certs.pem"; } else { croak "cannot determine CA path, needed for SSL interception" } } # create connection fabric, attach request handling my $req = App::HTTP_Proxy_IMP::Request->new; my $conn = App::HTTP_Proxy_IMP::Conn->new($req, pcapdir => $pcapdir, mitm => $mitm, capath => $capath, imp_factory => $imp_factory ); # create listeners my @listen; $self->{addr} = [ $self->{addr} ] if $self->{addr} && ref($self->{addr}) ne 'ARRAY'; for my $spec (@{$self->{addr}}) { my ($addr,$upstream) = ref($spec) eq 'ARRAY' ? @$spec: ref($spec) ? ( $spec,undef ): split('=',$spec,2); my $srv; if ( ref($addr)) { # listing socket already $srv = $addr; (my $port,$addr) = AnyEvent::Socket::unpack_sockaddr( getsockname($srv)); $addr = AnyEvent::Socket::format_address($addr); $addr = $addr =~m{:} ? "[$addr]:$port" : "$addr:$port"; } else { $srv = $sockclass->new( LocalAddr => $addr, Listen => 10, ReuseAddr => 1, ) or croak("cannot listen to $addr: $!"); } $spec = [ $addr,$upstream,$srv ]; push @listen, AnyEvent->io( fh => $srv, poll => 'r', cb => sub { my $cl = $srv->accept or return; debug("new request from %s:%s on %s",$cl->peerhost,$cl->peerport,$addr); if ( $self->{max_connect_per_child}>0 and 0 == --$self->{max_connect_per_child} ) { # last connection for child # fork-away and handle outstanding connections, parent will # in the meantime fork a replacement child defined( my $pid = fork()) or die "failed to fork: $!"; if ( $pid ) { $DEBUG && debug("forked away child $$ as $pid"); _exit(0); } else { $0 =~s{\Q[worker]}{[death-trip]}; undef @listen; # only handle outstanding connections App::HTTP_Proxy_IMP::Relay->exit_if_no_relays(1); $DEBUG && debug( "forked away child $$ to handle last connections"); } } App::HTTP_Proxy_IMP::Relay->new($cl,$upstream,$conn); } ); debug("listening on $addr"); } $self->{max_connect_per_child} = 0 if ! $self->{childs}; return 1 if defined wantarray; $self->loop; } sub DESTROY { my $self = shift; ref(my $ch = delete $self->{childs}) or return; kill 9, grep { $_ } @$ch; } { my $loop; my @once; sub once { shift; push @once, shift; $loop->send if $loop; } sub loop { my $self = shift; return $self->parent_loop if $self->{childs}; my $usr2 = AnyEvent->signal( signal => 'USR2', cb => sub { my $was_debug = $DEBUG; $DEBUG = 1; debug("($$) ".( $was_debug ? 'disable':'enable' ) ." debugging"); $DEBUG = ! $was_debug; }); # on SIGUSR1 dump state of all relays my $usr1 = AnyEvent->signal( signal => 'USR1', cb => sub { # temporaly enable debugging, even if off my $msg = "-------- active relays ------------------\n"; my @relays = App::HTTP_Proxy_IMP::Relay->relays; if ( ! @relays ) { $msg .= " * NO RELAYS\n" } else { $msg .= $_->dump_state."\n" for(@relays); } $msg .= "-------- active relays ------------------\n"; my $od = $DEBUG; $DEBUG = 1; debug($msg); $DEBUG = $od; }); while (1) { shift(@once)->() while (@once); $loop = AnyEvent->condvar; $loop->recv; } } # parent mainloop: keep children running sub parent_loop { my $self = shift; $DEBUG && debug("parent $$"); $SIG{USR1} = sub { my @pid = grep { $_ } @{$self->{childs}} or return; debug("propagating USR1 to @pid"); kill 'USR1', @pid; }; $SIG{USR2} = sub { my @pid = grep { $_ } @{$self->{childs}} or return; my $was_debug = $DEBUG; $DEBUG = 1; debug("propagating USR2 to @pid"); kill 'USR2', @pid; $DEBUG = ! $was_debug; }; while ( my $ch = $self->{childs} ) { # check if anything needs to be started for(@$ch) { $_ and next; # child is up # start new child defined( my $pid = fork()) or do { warn "fork failed: $!"; sleep(1); next; }; if ( $pid == 0 ) { # child $0 = "[worker] $0"; $self->{childs} = undef; return $self->loop; } $_ = $pid; $DEBUG && debug("(re)starting child, pid=$pid"); } # wait for child exit my $pid = waitpid(-1,0) or next; $DEBUG && debug("child $pid exit with code ".($?>>8)); my $ch = $self->{childs} or return; for(@$ch) { $_ = undef,last if $_ == $pid } } } } sub getoptions { my $self = shift; local @ARGV = @_; GetOptions( 'h|help' => sub { usage() }, 'P|pcapdir=s' => \$self->{pcapdir}, 'mitm-ca=s' => \$self->{mitm_ca}, 'capath=s' => \$self->{capath}, 'no-check-certificate=s' => \$self->{no_check_certificate}, 'C|childs=i' => \$self->{childs}, 'M|maxconn=i' => \$self->{max_connect_per_child}, 'F|filter=s' => sub { if ($_[1] eq '-') { # discard all previously defined @{$self->{filter}} = (); } else { push @{$self->{filter}}, $_[1] } }, 'imp-ns=s' => sub { if ($_[1] eq '-') { # discard all previously defined @{$self->{impns}} = (); } else { push @{$self->{impns}}, $_[1] } }, 'l|log:s' => sub { $self->{logrx} = $_[1] ? eval { qr/$_[1]/ } || "bad rx $_[1]" : qr/./; }, 'd|debug:s' => sub { $DEBUG = 1; if ($_[1]) { my $rx = eval { qr{$_[1]} }; croak("invalid regex '$_[1]' for debugging: $@") if ! $rx; $DEBUG_RX = $rx; } }, 'T|trace=s' => sub { $TRACE{$_} = 1 for split(m/,/,$_[1]) }, ); my @addr = @ARGV; $self->{logrx} //= qr/./; $self->{addr} or @addr or usage("no listener given"); $self->{addr} = \@addr; 1; } sub usage { my ($msg,$cmd) = @_; $cmd ||= $0; print STDERR "ERROR: $msg\n" if $msg; print STDERR <1) or just make sure, that child gets restarted on errors (N=1) -M|--maxconn N child will exit (and gets restarted) after N connections -F|--filter F add named IMP plugin as filter, can be used multiple times with --filter mod=args arguments can be given to the filter --imp-ns N perl module namespace, were it will look for IMP plugins. Can be given multiple times. Plugins outside these namespace need to be given with full name. Defaults to App::HTTP_Proxy_IMP, Net::IMP -l|--log [rx] print log messages where category matches rx (default all) # options intended for development and debugging: -P|--pcapdir D save connections as pcap files into D, needs Net::PcapWriter -d|--debug [RX] debug mode, if RX is given restricts debugging to packages matching RX -T|--trace T enable Net::Inspect traces Examples: start proxy at 127.0.0.1:8888 and log all requests to /tmp as pcap files $cmd --filter Net::IMP::SessionLog=dir=/tmp/&format=pcap 127.0.0.1:8888 start proxy at 127.0.0.1:8888 and log all form fields $cmd --filter LogFormData 127.0.0.1:8888 start proxy at 127.0.0.1:8888 with CSRF protection plugin $cmd --filter CSRFprotect 127.0.0.1:8888 start proxy at 127.0.0.1:8888 with CSRF protection plugin, using upstream proxy proxy:8888 $cmd --filter CSRFprotect 127.0.0.1:8888=proxy:8888 USAGE exit(2); } ############################################################################ # AnyEvent wrapper to privide Net::IMP::Remote etc with acccess to # IO events ############################################################################ package App::HTTP_Proxy_IMP::EventLoop; sub new { bless {},shift } { my %watchr; sub onread { my ($self,$fh,$cb) = @_; defined( my $fn = fileno($fh)) or die "invalid filehandle"; if ( $cb ) { $watchr{$fn} = AnyEvent->io( fh => $fh, cb => $cb, poll => 'r' ); } else { undef $watchr{$fn}; } } } { my %watchw; sub onwrite { my ($self,$fh,$cb) = @_; defined( my $fn = fileno($fh)) or die "invalid filehandle"; if ( $cb ) { $watchw{$fn} = AnyEvent->io( fh => $fh, cb => $cb, poll => 'w' ); } else { undef $watchw{$fn}; } } } sub now { return AnyEvent->now } sub timer { my ($self,$after,$cb,$interval) = @_; return AnyEvent->timer( after => $after, cb => $cb, $interval ? ( interval => $interval ):() ); } 1; __END__ =head1 NAME App::HTTP_Proxy_IMP - HTTP proxy with the ability to inspect and modify content =head1 SYNOPSIS # only use cmdline args App::HTTP_Proxy_IMP->new(@ARGV)->start; # only use given args App::HTTP_Proxy_IMP->new(\%options)->start; # combine cmdline args with given defaults App::HTTP_Proxy_IMP->new(\%options,@ARGV)->start; # short for App::HTTP_Proxy_IMP->new(...)->start; App::HTTP_Proxy_IMP->start(...); # show cmdline usage App::HTTP_Proxy_IMP->usage(); =head1 DESCRIPTION App::HTTP_Proxy_IMP implements an HTTP proxy, which can inspect and modify the HTTP header or content before forwarding. Inspection and modification is done with plugins implementing the L interface. The proxy is single-threaded and non-forking, but due to the event-driven model it can still process multiple connections in parallel. It is mainly intended to be used as a platform for easy prototyping of interesting ideas using IMP plugins, but should be also fast enough to be used to enhance, secure, restrict or protocol the browsing experience for small groups. =head2 Public Methods =over 4 =item * new([\%OPTIONS],[@ARGV]) Creates a new object. The first argument might be an hash reference with options. All other arguments will be used as ARGV for cmdline parsing and might result in overwriting the defaults from OPTIONS. The following options and its matching cmdline arguments are defined: =over 8 =item filter ARRAY | -F|--filter mod List of IMP filters, which should be used for inspection and modification. These can be a fully qualified name, or a short name, which need to be combined with one of the given namespace prefixes to get the full name. It can also be already an IMP factory object. The cmdline option can be given multiple times. If '-' is given as name on the cmdline all previously defined filters are discarded. =item impns ARRAY | --imp-ns prefix Namespace prefixes to make adding filters from cmdline shorter. Defaults to L, L. The cmdline option can be given multiple times. If '-' is given at cmdline all previously defined prefixes (including defaults) are discarded. =item addr [spec+] Array of listener/upstream specifications for proxy. Each specification can be =over 12 =item ip:port - address where the proxy should listen =item ip:port=target_ip:port - listener address and upstream proxy address =item socket - precreated listener socket =item [ socket, target_ip:port ] - precreated listener socket and address of upstream proxy =back On the cmdline these are given as the remaining arguments, e.g. after all other options. =item mitm_ca proxy_ca.pem When this parameter is given it will intercept SSL connections by ending the connection from the server at the proxy and creating a new connection with a new certificate signed by the given ca.pem (e.g. man in the middle). Thus it will be able to analyse and manipulate encrypted connections. ca.pem needs to include the CA cert and key in PEM format. Also you better import the CA certificate into your browser, or you will get warnings on access to SSL sites, because of the correctly detected man in the middle attack. To generate the proxy certificate you might use openssl: openssl genrsa -out key.pem 1024 openssl req -new -x509 -extensions v3_ca -key key.pem -out proxy_ca.pem cat key.pem >> proxy_ca.pem # export to PKCS12 for import into browser openssl pkcs12 -export -in proxy_ca.pem -inkey proxy_ca.pem -out proxy_ca.p12 It will try to create the directory proxy_ca.pem.cache and use it as a cache for generated cloned certificates. If this is not possible the cloned certificates will persist over restarts of the proxy. =item capath certs.pem | cert-dir The path (file with certificates or directory) with the CAs, which are used to verify SSL certificates when doing SSL interception. If not given it will check initially for various path, starting with using Mozilla::CA, trying /etc/ssl/certs and /etc/ssl/certs.pem before giving up and exiting. =item no_check_certificate If true disables checking of SSL certificates when doing SSL interception. =item childs N If N>0 it will fork N children to handle the requests. Whenever a child exits it will be restarted immediatly. This can be used to spread the load over multiple processors or to keep the proxy running, even if a child crashed. =item max_connect_per_child N | --maxconn N option If N>0 the child will fork itself after N connections to handle the unfinished connections. The parent will immediatly restart the child. If options childs is not greater than 0 this option will be ignored. This option is useful if the childs are leaking memory due to bad IMP plugins. =back The following options are only for the cmdline =over 8 =item -d|--debug [RX] Enable debugging. If RX is given it will be used as a regular expression to restrict debugging to given packages. Outside the cmdline these settings can be done by setting C<$DEBUG> and C<$DEBUG_RX> exported by L. =item -T|--trace T Enable tracing for L modules. Outside the cmdline these settings can be done by setting C<%TRACE> from the L package. =back =item * start Start the proxy, e.g. start listeners and process incoming connections. No arguments are expected if called on an object, but one can use the form C<< App::HTTP_Proxy_IMP->start(@args) >> as a shorter alternative to C<< App::HTTP_Proxy_IMP->new(@args)->start >>. If no return value is expected from this method it will enter into an endless loop by calling C. If a value is expected it will return 1, and the caller hast to call C itself. =item * loop Class method, which calls AnyEvent mainloop using C<< AnyEvent->condvar->recv >> and handles all callback send by C. =item * once Sometimes it is necessary to let the current event handler finish, before calling some specific action. The class method C schedules a subroutine to be called outside any other event handlers. =back =head2 Reaction to Signals The installs some signal handlers: =over 4 =item SIGUSR1 Dump current state to STDERR, e.g. active connections and their state. =item SIGUSR2 Toggles debugging (e.g. enable|disable). =back =head1 AUTHOR Steffen Ullrich =head1 COPYRIGHT Copyright 2012,2013 Steffen Ullrich. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.