#-*- perl -*- vim:set ft=text:
# Last modified 070414
#
# Filters start with a line "FILTER: name description". They are then
# referrable # from the actionsfile with +filter{name}
#
# Inside the filters, write one Perl-Style substitution (job) per line.
# Jobs that precede the first FILTER: line are ignored. Matching is
# by default case-insensitive.
#
# For Details see the 'pcrs' manpage contained in this distribution,
# or at and the perlre(1),
# perlop(1) and pcre(3) manpages.
#
# Note that you are free to choose the delimiter as you see fit.
#
# Note: In addidion to the Perl options gimsx, the following nonstandard
# options are supported:
#
# 'U' turns the default to ungreedy matching. Add ? to quantifiers to
# switch back to greedy.
# 'T' (trivial) prevents parsing for backreferences in the substitute.
# Use if you want to include text like '$&' in your substitute without
# quoting.
# For readability most filters have option 'x' (multiline patterns)
# enabled, this means that in the first operand all regular white space
# just indicated formatting and is ignored. But white space in the
# second operand is actually meaningful.
FILTER: decode-urls convert encoded URL bits back to normal characters
s¬(href=['"][^'"]*)%20¬$1 ¬gU
s¬(href=['"][^'"]*)%25¬$1%¬gU
s¬(href=['"][^'"]*)%26¬$1&¬gU
s¬(href=['"][^'"]*)%2F¬$1/¬gU
s¬(href=['"][^'"]*)%3A¬$1:¬gU
s¬(href=['"][^'"]*)%3D¬$1=¬gU
s¬(href=['"][^'"]*)%3F¬$1?¬gU
FILTER: frameset-borders Give frames a border and make them resizable
s¬( ]*)noresize \
¬$1¬xgU
s¬( ]*)\bscrolling=(['"]?)(no|0)\2 \
¬$1scrolling=$2auto$2¬xgU
s¬( ]*)\bframeborder=(['"]?)(no|0)\2 \
¬$1frameborder=$2yes$2¬xgU
s¬(
]*)\bborder=(['"]?)(no|0)\2 \
¬$1border=$2\2$2¬xgU
s¬(]*)\bframespacing=(['"]?)(no|0)\2 \
¬$1framespacing=$2yes$2¬xgU
s¬(]*)\bframeborder=(['"]?)(no|0)\2 \
¬$1frameborder=$2yes$2¬xgU
FILTER: html-annoyances Get rid of particularly annoying HTML abuse
# The tag was a crime!
s¬| ¬¬ig
# Word break confuses some browsers
s¬\s*\s*¬¬ig
# Widths/heights should not be absolute
s¬<\s*(table|tr|td)\b([^>]*?)\s+(?:width|height)\s*=\s*"*[0-9]+%*"* \
¬<$1$2¬xg
# New browser windows (if allowed -- see no-popups filter below) should be
# resizeable and have a location and status bar
s¬(]+scrolling=)(['"]?)(?:no|0)\2 ¬$1$2auto$2¬xgU
s¬( ]+status=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
s¬( ]+resizable=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
s¬( ]+menubar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
s¬( ]+toolbar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
s¬( ]+titlebar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
s¬( ]+location=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU
FILTER: js-annoyances Get rid of particularly annoying JavaScript abuse
# Note: Most of these jobs would be safer if restricted to a
# ) \
¬$1never¬xsigU
# If we allow window.open, we want normal window features:
# Test: http://www.htmlgoodies.com/beyond/notitle.html
s¬(open\s*\([^)\\>]+alwayslowered=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU
s¬(open\s*\([^)\\>]+alwaysraised=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU
s¬(open\s*\([^)\\>]+fullscreen=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU
s¬(open\s*\([^)\\>]+hotkeys=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU
s¬(open\s*\([^)\\>]+zlock=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU
s¬(open\s*\([^)\\>]+scrollbars=)(["']?)(?:no|0)\2 ¬$1$2auto$2¬xsigU
s¬(open\s*\([^)\\>]+scrolling=)(["']?)(?:no|0)\2 ¬$1$2auto$2¬xsigU
s¬(open\s*\([^)\\>]+directories=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+resizable=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+status=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+location=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+menubar=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+titlebar=)(["']?)(?:yes|1)\2 ¬$1$2yes$2¬xsigU
s¬(open\s*\([^)\\>]+toolbar=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU
FILTER: content-cookies Kill cookies that come in the HTML or JS content
# JS cookies, like found on privacy.net:
s¬document\.cookie(?=[ \t\r\n]*=) \
¬Privoxy cookie zapped¬xg
# HTML cookies:
s¬ ]*> \
¬¬xgU
FILTER: webbugs Squish WebBugs (1x1 invisible GIFs used for user tracking)
s¬ ]* \
(?:width|height)\s*=\s*['"]?1(?=\D) [^>]* \
(?:width|height)\s*=\s*['"]?1(?=\D) [^>]*? \
> \
¬¬xsiUg
FILTER: popups Kill all popups in JS and HTML
# JS
s¬([\n =;{}]|window\.)open\s*\\?\( \
¬$1concat(¬xg
# HTML
s¬\btarget\s*=\s*(['"]?)(_blank|_new)\1? \
¬notarget¬xg
FILTER: refresh-tags Kill automatic refresh tags (for dial-on-demand setups)
# Note: Only deactivates refreshes with more than 9 seconds delay to
# preserve monster-stupid but common redirections via meta tags.
#
s¬ \2]*))?\2\s* \
> \
¬ ¬xU
FILTER: nimda Remove Nimda (virus) code
s¬ \
¬ \
WARNING: This Server is infected with \
Nimda !\
¬xg
FILTER: notrack-google Remove the tracking from Google.com results
s¬url=http://www.google.(com|ca|co.uk)/[^ ]*[&]q=([^&]+)[&][^ ]* \
¬$2¬xg
s¬href="?http://www.google.(com|ca|co.uk)/[^ ]*[&]q=([^&">]+)"? \
¬href="$2"¬xg
s¬href="?/url\?sa=[^>]+[&]q=([^&>]+)[&]e=[^>"]+"? \
¬href="$1"¬xg
s¬%3F¬?¬g
s¬%25¬%¬g
FILTER: notrack-googlei Remove the tracking from images.Google.com results
# Pointless because the whole page is JavaScript now.
#s¬href="?/imgres\?imgurl=[^>&]+[&]imgrefurl=([^>&]+)[&][hw]=[^>"]+"? \
# ¬href="$1"¬xg
s¬%3F¬?¬g
s¬%25¬%¬g
FILTER: unsuck-groups Try to fix the awful horror of Google Groups
# Google Groups in particular also uses extraordinarily annoying
# 1-pixel clear GIFs with very long heights to ''format'' the results.
s¬src="(/groups)?/img/dot_clear\.gif" \
¬¬xg
s¬(\s+(width|height|border|alt|align|id)="?[0-9a-z_]*"?)+ \
¬¬xg
s¬ \
¬¬xg
s¬ ¬¬xg
s¬ ]*> ¬¬xg
s¬]*> ¬ ¬xg
s¬?iframe \s*[^>]*> ¬¬xg
# And a few tons of resizing JavaScript. Keep it simple...
s¬function \s+([a-zA-Z_]+)\(\) \s+{ \
¬function \1() { }\n \
function \1Noop () {¬xg
FILTER: notrack-ask Remove the tracking bits from Ask/Teoma results
s¬onmouseover="return ss('Go to [^']*')" onmouseout="cs()"¬¬g
s¬href="http://[a-z.]*\.(ask|teoma)\.com/r\?[^"]*u=([^"]*)" \
¬href="$2"¬xg
FILTER: notrack-yahoo Remove the tracking bits from search.Yahoo.com results
# Yahoo search
s¬href=(['"]?)http://rds\.yahoo\.com/S=[0-9]+/K=[^*]+ \
/\*-(http|https|ftp)(:%3A)// \
¬href=$1$2://¬xg
# Yahoo directory
s¬href=(['"]?)http://rds\.yahoo\.com/S=[^*]+ \
/\*(http|https|ftp)(:|%3A)// \
¬href=$1$2://¬xg
FILTER: notrack-av Remove the tracking bits from Altavista results
s¬href=(['"])?http://av\.rds\.yahoo\.com/[^*]*\*\*(http|https|ftp)(:|%3[Aa])// \
¬href=$1$2://¬xg
FILTER: notrack-a9 remove the tracking bits from A9.com results
s¬dragSource="dragRedirLinkCB"\s+ \
href="/-/search/a9-redirect\.jsp\?url=([^&"]+)[^"]*" \
¬href="$1"¬xg
s¬href="http%3A%2F%2F([-.a-zA-Z0-9]+)%2F \
¬href="http://$1/¬xg
FILTER: nostitials Remove interstitials from FastClick and others
s¬\s*onClick\s*=\s*"this\.href\s*=\s*FC[a-zA-Z0-9]*\(this\.href\);*"¬¬g
FILTER: hon-nonew Defeat annoying HotOrNot force rating
s¬http://meetme\.hotornot\.com/\?pickNew=1\&state=vote& \
votee=[0-9]*&vt=[0-9]*\&kwMode=[01]\& \
¬http://meetme.hotornot.com/?¬xgU
s¬> < \
¬><¬xgU
FILTER: shockwave-flash Kill embedded Shockwave Flash objects
s¬]*application/x-shockwave-flash.* \
¬¬xsigU
FILTER: quicktime-kioskmode Make Quicktime movies saveable
s¬(]*)kioskmode\s*=\s*(["']?)true\2 \
¬$1¬xg
FILTER: js-events Kill all JS event bindings [really extreme]
s¬(on|event\.) \
((mouse(over|out|down|up|move))|(un)?load|contextmenu|selectstart) \
¬never¬xg
# Not events, but abused on the same type of sites:
s¬(alert|confirm)\s*\( \
¬concat(¬xg
FILTER: img-reorder Reorder attributes in tags to make the banners-by-* filters more effective
# In the first step src is moved to the start, then width is moved to the
# second place to guarantee an order of src, width, height.
# This makes banners-by-size more effective and allows both banners-by-size
# and banners-by-link to preserve the original image URL in the alt attribute.
s¬ ]*)src\s*=\s*([^'">\\\s]+?)([^\\>]*)> \
¬ ¬xsiUg
s¬ ]*)src\s*=\s*(['"])([^ >\\\2]+)\2(.*)> \
¬ ¬xsiUg
s¬ ]*)width\s*=\s*?(["']?)(\d+?)\3(.*)> \
¬ ¬xsiUg
FILTER: banners-by-size Kill banners by size
# 234*60, 468*60 (most banners!)
s¬ ]* \
(width=(['"]?)(?:234|468)\3) [^>]* \
(height=(['"]?)60\5)[^>]* \
> \
¬ ¬xsigU
FILTER: banners-by-size-many Kill banners by size, many sizes
# 88*31, 120*60, 120*90, 120*240, 120*600, 125*125, 160*600,
# 180*150, 240*400, 250*250, 300*250, 336*280
s¬ ]* \
(width=(['"]?)88\3) [^>]* \
(height=(['"]?)31\5) [^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)120\3) [^>]* \
(height=(['"]?)(?:600?|90|240)\5) [^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)125\3) [^>]* \
(height=(['"]?)125\5) [^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)160\3) [^>]* \
(height=(['"]?)600\5)[^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)180\3) [^>]* \
(height=(['"]?)150\5)[^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)240\3) [^>]* \
(height=(['"]?)400\5) [^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)(?:250|300)\3) [^>]* \
(height=(['"]?)250\5) [^>]* \
> \
¬ ¬xsigU
s¬ ]* \
(width=(['"]?)336\3) [^>]* \
(height=(['"]?)280\5) [^>]* \
> \
¬ ¬xsigU