#-*- perl -*- vim:set ft=text: # Last modified 070414 # # Filters start with a line "FILTER: name description". They are then # referrable # from the actionsfile with +filter{name} # # Inside the filters, write one Perl-Style substitution (job) per line. # Jobs that precede the first FILTER: line are ignored. Matching is # by default case-insensitive. # # For Details see the 'pcrs' manpage contained in this distribution, # or at and the perlre(1), # perlop(1) and pcre(3) manpages. # # Note that you are free to choose the delimiter as you see fit. # # Note: In addidion to the Perl options gimsx, the following nonstandard # options are supported: # # 'U' turns the default to ungreedy matching. Add ? to quantifiers to # switch back to greedy. # 'T' (trivial) prevents parsing for backreferences in the substitute. # Use if you want to include text like '$&' in your substitute without # quoting. # For readability most filters have option 'x' (multiline patterns) # enabled, this means that in the first operand all regular white space # just indicated formatting and is ignored. But white space in the # second operand is actually meaningful. FILTER: decode-urls convert encoded URL bits back to normal characters s¬(href=['"][^'"]*)%20¬$1 ¬gU s¬(href=['"][^'"]*)%25¬$1%¬gU s¬(href=['"][^'"]*)%26¬$1&¬gU s¬(href=['"][^'"]*)%2F¬$1/¬gU s¬(href=['"][^'"]*)%3A¬$1:¬gU s¬(href=['"][^'"]*)%3D¬$1=¬gU s¬(href=['"][^'"]*)%3F¬$1?¬gU FILTER: frameset-borders Give frames a border and make them resizable s¬(]*)noresize \ ¬$1¬xgU s¬(]*)\bscrolling=(['"]?)(no|0)\2 \ ¬$1scrolling=$2auto$2¬xgU s¬(]*)\bframeborder=(['"]?)(no|0)\2 \ ¬$1frameborder=$2yes$2¬xgU s¬(]*)\bborder=(['"]?)(no|0)\2 \ ¬$1border=$2\2$2¬xgU s¬(]*)\bframespacing=(['"]?)(no|0)\2 \ ¬$1framespacing=$2yes$2¬xgU s¬(]*)\bframeborder=(['"]?)(no|0)\2 \ ¬$1frameborder=$2yes$2¬xgU FILTER: html-annoyances Get rid of particularly annoying HTML abuse # The tag was a crime! s¬|¬¬ig # Word break confuses some browsers s¬\s*\s*¬¬ig # Widths/heights should not be absolute s¬<\s*(table|tr|td)\b([^>]*?)\s+(?:width|height)\s*=\s*"*[0-9]+%*"* \ ¬<$1$2¬xg # New browser windows (if allowed -- see no-popups filter below) should be # resizeable and have a location and status bar s¬(]+scrolling=)(['"]?)(?:no|0)\2 ¬$1$2auto$2¬xgU s¬(]+status=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU s¬(]+resizable=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU s¬(]+menubar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU s¬(]+toolbar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU s¬(]+titlebar=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU s¬(]+location=)(['"]?)(?:no|0)\2 ¬$1$2yes$2¬xgU FILTER: js-annoyances Get rid of particularly annoying JavaScript abuse # Note: Most of these jobs would be safer if restricted to a # ) \ ¬$1never¬xsigU # If we allow window.open, we want normal window features: # Test: http://www.htmlgoodies.com/beyond/notitle.html s¬(open\s*\([^)\\>]+alwayslowered=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU s¬(open\s*\([^)\\>]+alwaysraised=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU s¬(open\s*\([^)\\>]+fullscreen=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU s¬(open\s*\([^)\\>]+hotkeys=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU s¬(open\s*\([^)\\>]+zlock=)(["']?)(?:yes|1)\2 ¬$1$2no$2¬xsigU s¬(open\s*\([^)\\>]+scrollbars=)(["']?)(?:no|0)\2 ¬$1$2auto$2¬xsigU s¬(open\s*\([^)\\>]+scrolling=)(["']?)(?:no|0)\2 ¬$1$2auto$2¬xsigU s¬(open\s*\([^)\\>]+directories=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+resizable=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+status=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+location=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+menubar=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+titlebar=)(["']?)(?:yes|1)\2 ¬$1$2yes$2¬xsigU s¬(open\s*\([^)\\>]+toolbar=)(["']?)(?:no|0)\2 ¬$1$2yes$2¬xsigU FILTER: content-cookies Kill cookies that come in the HTML or JS content # JS cookies, like found on privacy.net: s¬document\.cookie(?=[ \t\r\n]*=) \ ¬Privoxy cookie zapped¬xg # HTML cookies: s¬]*> \ ¬¬xgU FILTER: webbugs Squish WebBugs (1x1 invisible GIFs used for user tracking) s¬]* \ (?:width|height)\s*=\s*['"]?1(?=\D) [^>]* \ (?:width|height)\s*=\s*['"]?1(?=\D) [^>]*? \ > \ ¬¬xsiUg FILTER: popups Kill all popups in JS and HTML # JS s¬([\n =;{}]|window\.)open\s*\\?\( \ ¬$1concat(¬xg # HTML s¬\btarget\s*=\s*(['"]?)(_blank|_new)\1? \ ¬notarget¬xg FILTER: refresh-tags Kill automatic refresh tags (for dial-on-demand setups) # Note: Only deactivates refreshes with more than 9 seconds delay to # preserve monster-stupid but common redirections via meta tags. # s¬\2]*))?\2\s* \ > \ ¬¬xU FILTER: nimda Remove Nimda (virus) code s¬ \ ¬
\ WARNING: This Server is infected with \ Nimda!\ ¬xg FILTER: notrack-google Remove the tracking from Google.com results s¬url=http://www.google.(com|ca|co.uk)/[^ ]*[&]q=([^&]+)[&][^ ]* \ ¬$2¬xg s¬href="?http://www.google.(com|ca|co.uk)/[^ ]*[&]q=([^&">]+)"? \ ¬href="$2"¬xg s¬href="?/url\?sa=[^>]+[&]q=([^&>]+)[&]e=[^>"]+"? \ ¬href="$1"¬xg s¬%3F¬?¬g s¬%25¬%¬g FILTER: notrack-googlei Remove the tracking from images.Google.com results # Pointless because the whole page is JavaScript now. #s¬href="?/imgres\?imgurl=[^>&]+[&]imgrefurl=([^>&]+)[&][hw]=[^>"]+"? \ # ¬href="$1"¬xg s¬%3F¬?¬g s¬%25¬%¬g FILTER: unsuck-groups Try to fix the awful horror of Google Groups # Google Groups in particular also uses extraordinarily annoying # 1-pixel clear GIFs with very long heights to ''format'' the results. s¬src="(/groups)?/img/dot_clear\.gif" \ ¬¬xg s¬(\s+(width|height|border|alt|align|id)="?[0-9a-z_]*"?)+ \ ¬¬xg s¬ \ ¬¬xg s¬

¬

¬xg s¬]*> ¬
¬xg s¬]*> ¬¬xg s¬]*> ¬¬xg # And a few tons of resizing JavaScript. Keep it simple... s¬function \s+([a-zA-Z_]+)\(\) \s+{ \ ¬function \1() { }\n \ function \1Noop () {¬xg FILTER: notrack-ask Remove the tracking bits from Ask/Teoma results s¬onmouseover="return ss('Go to [^']*')" onmouseout="cs()"¬¬g s¬href="http://[a-z.]*\.(ask|teoma)\.com/r\?[^"]*u=([^"]*)" \ ¬href="$2"¬xg FILTER: notrack-yahoo Remove the tracking bits from search.Yahoo.com results # Yahoo search s¬href=(['"]?)http://rds\.yahoo\.com/S=[0-9]+/K=[^*]+ \ /\*-(http|https|ftp)(:%3A)// \ ¬href=$1$2://¬xg # Yahoo directory s¬href=(['"]?)http://rds\.yahoo\.com/S=[^*]+ \ /\*(http|https|ftp)(:|%3A)// \ ¬href=$1$2://¬xg FILTER: notrack-av Remove the tracking bits from Altavista results s¬href=(['"])?http://av\.rds\.yahoo\.com/[^*]*\*\*(http|https|ftp)(:|%3[Aa])// \ ¬href=$1$2://¬xg FILTER: notrack-a9 remove the tracking bits from A9.com results s¬dragSource="dragRedirLinkCB"\s+ \ href="/-/search/a9-redirect\.jsp\?url=([^&"]+)[^"]*" \ ¬href="$1"¬xg s¬href="http%3A%2F%2F([-.a-zA-Z0-9]+)%2F \ ¬href="http://$1/¬xg FILTER: nostitials Remove interstitials from FastClick and others s¬\s*onClick\s*=\s*"this\.href\s*=\s*FC[a-zA-Z0-9]*\(this\.href\);*"¬¬g FILTER: hon-nonew Defeat annoying HotOrNot force rating s¬http://meetme\.hotornot\.com/\?pickNew=1\&state=vote& \ votee=[0-9]*&vt=[0-9]*\&kwMode=[01]\& \ ¬http://meetme.hotornot.com/?¬xgU s¬>< \ ¬><¬xgU FILTER: shockwave-flash Kill embedded Shockwave Flash objects s¬]*application/x-shockwave-flash.* \ ¬¬xsigU FILTER: quicktime-kioskmode Make Quicktime movies saveable s¬(]*)kioskmode\s*=\s*(["']?)true\2 \ ¬$1¬xg FILTER: js-events Kill all JS event bindings [really extreme] s¬(on|event\.) \ ((mouse(over|out|down|up|move))|(un)?load|contextmenu|selectstart) \ ¬never¬xg # Not events, but abused on the same type of sites: s¬(alert|confirm)\s*\( \ ¬concat(¬xg FILTER: img-reorder Reorder attributes in tags to make the banners-by-* filters more effective # In the first step src is moved to the start, then width is moved to the # second place to guarantee an order of src, width, height. # This makes banners-by-size more effective and allows both banners-by-size # and banners-by-link to preserve the original image URL in the alt attribute. s¬]*)src\s*=\s*([^'">\\\s]+?)([^\\>]*)> \ ¬¬xsiUg s¬]*)src\s*=\s*(['"])([^ >\\\2]+)\2(.*)> \ ¬¬xsiUg s¬]*)width\s*=\s*?(["']?)(\d+?)\3(.*)> \ ¬¬xsiUg FILTER: banners-by-size Kill banners by size # 234*60, 468*60 (most banners!) s¬]* \ (width=(['"]?)(?:234|468)\3) [^>]* \ (height=(['"]?)60\5)[^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU FILTER: banners-by-size-many Kill banners by size, many sizes # 88*31, 120*60, 120*90, 120*240, 120*600, 125*125, 160*600, # 180*150, 240*400, 250*250, 300*250, 336*280 s¬]* \ (width=(['"]?)88\3) [^>]* \ (height=(['"]?)31\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)120\3) [^>]* \ (height=(['"]?)(?:600?|90|240)\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)125\3) [^>]* \ (height=(['"]?)125\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)160\3) [^>]* \ (height=(['"]?)600\5)[^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)180\3) [^>]* \ (height=(['"]?)150\5)[^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)240\3) [^>]* \ (height=(['"]?)400\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)(?:250|300)\3) [^>]* \ (height=(['"]?)250\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU s¬]* \ (width=(['"]?)336\3) [^>]* \ (height=(['"]?)280\5) [^>]* \ > \ ¬Privoxy ad zapped by size $2 $4¬xsigU