# SARE "General Subject" Ruleset for SpamAssassin - File ENG # Version: 01.03.12 # Created: 2004-09-13 # Modified: 2005-12-27 # Usage instructions and documentation are found in 70_sare_genlsubj0.cf #@@# Revision History: Full Revision History stored in 70_sare_genlsubj.log #@@# 01.03.12: Dec 27 2005 #@@# Minor score updates based on additional mass-check #@@# Archived from file E: SARE_SUB_ACCENT_CHAR #@@# Archived from file E: SARE_SUB_OBFU_V #@@# Archived from file E: SARE_SUB_ODDWORD_G #@@# Archived from file E: SARE_SUB_ODDWORD_I #@@# Archived from file E: SARE_SUB_ODDWORD_P #@@# Archived from file E: SARE_SUB_ODDWORD_Q #@@# Archived from file E: SARE_SUB_ODDWORD_U #@@# Archived from file E: SARE_SUB_RAND_LETTRS5 # This rule set file contains those SARE_SUB_* rules that seem to be dependent upon language/word structure, # and which could generate significant ham hits in non-English languages. ######## ###################### ################################################## # Rule definitions to avoid --lint errors on archived/moved rules. ######## ###################### ################################################## meta __SARE_SUB_FALSE __FROM_AOL_COM && !__FROM_AOL_COM meta SARE_SUB_WEBMASTER2 __SARE_SUB_FALSE meta SARE_SUB_ACCENT_CHAR __SARE_SUB_FALSE meta SARE_SUB_OBFU_V __SARE_SUB_FALSE meta SARE_SUB_ODDWORD_G __SARE_SUB_FALSE meta SARE_SUB_ODDWORD_I __SARE_SUB_FALSE meta SARE_SUB_ODDWORD_P __SARE_SUB_FALSE meta SARE_SUB_ODDWORD_Q __SARE_SUB_FALSE meta SARE_SUB_ODDWORD_U __SARE_SUB_FALSE meta SARE_SUB_RAND_LETTRS5 __SARE_SUB_FALSE ######## ###################### ################################################## # Category: __rules used by primary rules below ######## ###################### ################################################## # Attempt to identify simple subject obfuscation by character insertion header __SARE_SUB_OBFU_ASTER Subject =~ /[a-zA-Z0]\*[a-zA-Z]/ header __SARE_SUB_OBFU_CARAT Subject =~ /[a-zA-Z0]\^[a-zA-Z]/ header __SARE_SUB_OBFU_COLON Subject =~ /[a-zA-Z0]:[a-zA-Z]/ header __SARE_SUB_OBFU_COMMA Subject =~ /[a-zA-Z0],[a-zA-Z]/ header __SARE_SUB_OBFU_SLASH Subject =~ /[a-zA-Z0]\/[a-zA-Z]/ header __SARE_SUB_OBFU_LQUOT Subject =~ /[a-zA-Z0]`[a-zA-Z]/ header __SARE_SUB_OBFU_PERIOD Subject =~ /[a-zA-Z0]\.[a-zA-Z]/ header __SARE_SUB_OBFU_2PER Subject =~ /[a-zA-Z0]\.\.[a-zA-Z]/ header __SARE_SUB_OBFU_PIPE Subject =~ /[a-zA-Z0]\|[a-zA-Z]/ header __SARE_SUB_OBFU_PLUS Subject =~ /[a-zA-Z0]\+[a-zA-Z]/ header __SARE_SUB_OBFU_QUOTE Subject =~ /[a-zA-Z0]"[a-zA-Z]/ header __SARE_SUB_OBFU_SCOLON Subject =~ /[a-zA-Z0];[a-zA-Z]/ header __SARE_SUB_OBFU_USCORE Subject =~ /[a-zA-Z0]_[a-zA-Z]/ header __SARE_SUB_OBFU_HTTP Subject =~ m*http://*i ######## ###################### ################################################## # Category: Credit, debt, lending, mortgage, borrowing, investment, financing ######## ###################### ################################################## header SARE_SUB_DEBT_OB Subject =~ /(?!deb[aiu]?t)\b(?:d.?e.?b.?t|(?:d|(?:[\xD0]|\xC4[\x8E-\x91]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)))\b/i describe SARE_SUB_DEBT_OB subject has obfuscated spammer topic score SARE_SUB_DEBT_OB 2.500 #stype SARE_SUB_DEBT_OB obfu #counts SARE_SUB_DEBT_OB 4s/0h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_DEBT_OB 37s/0h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_DEBT_OB 4s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_DEBT_OB 2s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #max SARE_SUB_DEBT_OB 3s/0h of 36108 corpus (12627s/23481h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_SUB_DEBT_OB 3s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_DEBT_OB 20s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #counts SARE_SUB_DEBT_OB 2s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_DEBT_OB 2s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 ######## ###################### ################################################## # Category: Technical spamsign ######## ###################### ################################################## header SARE_SUB_6CONS_WORD Subject =~ /(?!.[HMsx]+)\b[bcghjklmnpqrstvwxz]{6,20}\b/ describe SARE_SUB_6CONS_WORD subject word consists of consecutive consonants score SARE_SUB_6CONS_WORD 0.356 #stype SARE_SUB_6CONS_WORD max:1.000 #note SARE_SUB_6CONS_WORD Score all on spam-only nCONS rules changed to max 1.000 after removing exclusions for technical words/acronyms. #note SARE_SUB_6CONS_WORD Scores reduced to lessen chance of spam FP on highly technical emails. #hist SARE_SUB_6CONS_WORD 04/20/2004 -- Added exclusion for hmmmmm #hist SARE_SUB_6CONS_WORD Jan 2005: Removed exclusion for technical words/acronyms #counts SARE_SUB_6CONS_WORD 163s/23h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_6CONS_WORD 863s/0h of 97268 corpus (79437s/17831h RM) 01/24/04 #counts SARE_SUB_6CONS_WORD 45s/9h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_6CONS_WORD 6s/1h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_6CONS_WORD 3s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_6CONS_WORD 42s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_6CONS_WORD 11s/1h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_6CONS_WORD 13s/1h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_6CONS_WORD 3s/0h of 11269 corpus (6578s/4691h CT) 06/11/05 #max SARE_SUB_6CONS_WORD 10s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_6CONS_WORD 0s/1h of 5906 corpus (1036s/4870h ft) 06/11/05 header SARE_SUB_8CONS_WORD Subject =~ /(?!.[hmsx]+)\b[bcdfghjklmnpqrstvwxz]{8}\b/i describe SARE_SUB_8CONS_WORD subject word consists of consecutive consonants score SARE_SUB_8CONS_WORD 0.360 #stype SARE_SUB_8CONS_WORD max:1.000 #counts SARE_SUB_8CONS_WORD 42s/2h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_8CONS_WORD 120s/5h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_8CONS_WORD 1s/0h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_8CONS_WORD 15s/7h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_8CONS_WORD 5s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_8CONS_WORD 19s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_8CONS_WORD 3s/1h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_8CONS_WORD 0s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_8CONS_WORD 4s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_9CONS_WORD Subject =~ /(?!.[hmz]+)\b[bcdfghjklmnpqrstvwxz]{9}\b/i describe SARE_SUB_9CONS_WORD subject word consists of consecutive consonants score SARE_SUB_9CONS_WORD 0.147 #stype SARE_SUB_9CONS_WORD max:1.000 #counts SARE_SUB_9CONS_WORD 28s/3h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_9CONS_WORD 157s/0h of 115449 corpus (94274s/21175h RM) 05/01/04 #counts SARE_SUB_9CONS_WORD 2s/0h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_9CONS_WORD 2s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #counts SARE_SUB_9CONS_WORD 4s/10h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_9CONS_WORD 1s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_9CONS_WORD 0s/0h of 49034 corpus (44877s/4157h MY) 06/11/05 #max SARE_SUB_9CONS_WORD 19s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_9CONS_WORD 0s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_9CONS_WORD 3s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 header SARE_SUB_10CONS_WORD Subject =~ /(?!.[MX]+)\b[bcdfghjklmnpqrstvwxz]{10}\b/i describe SARE_SUB_10CONS_WORD subject word consists of consecutive consonants score SARE_SUB_10CONS_WORD 0.639 #stype SARE_SUB_10CONS_WORD max:1.000 #counts SARE_SUB_10CONS_WORD 48s/2h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_10CONS_WORD 117s/0h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_10CONS_WORD 1s/1h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_10CONS_WORD 4s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_10CONS_WORD 2s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_10CONS_WORD 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 #max SARE_SUB_10CONS_WORD 7s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #counts SARE_SUB_10CONS_WORD 0s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_10CONS_WORD 2s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_10CONS_WORD 5s/0h of 10629 corpus (5847s/4782h CT) 09/18/05 #max SARE_SUB_10CONS_WORD 6s/0h of 10824 corpus (6376s/4448h CT) 05/04/05 header SARE_SUB_11CONS_WORD Subject =~ /(?!.m+)\b[bcdfghjklmnpqrstvwxz]{11}\b/i describe SARE_SUB_11CONS_WORD subject word consists of consecutive consonants score SARE_SUB_11CONS_WORD 0.352 #stype SARE_SUB_11CONS_WORD max:1.000 #counts SARE_SUB_11CONS_WORD 9s/2h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_11CONS_WORD 66s/0h of 115449 corpus (94274s/21175h RM) 05/01/04 #counts SARE_SUB_11CONS_WORD 4s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #counts SARE_SUB_11CONS_WORD 3s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_11CONS_WORD 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 #max SARE_SUB_11CONS_WORD 2s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #counts SARE_SUB_11CONS_WORD 3s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 header SARE_SUB_12CONS_WORD Subject =~ /(?!.[mrx]+)\b[bcdfghjklmnpqrstvwxz]{12}\b/i describe SARE_SUB_12CONS_WORD subject word consists of consecutive consonants score SARE_SUB_12CONS_WORD 1.000 #stype SARE_SUB_12CONS_WORD max:1.000 #counts SARE_SUB_12CONS_WORD 4s/0h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_12CONS_WORD 60s/0h of 92315 corpus (67942s/24373h RM) 07/24/04 #counts SARE_SUB_12CONS_WORD 5s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_12CONS_WORD 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 #max SARE_SUB_12CONS_WORD 9s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #counts SARE_SUB_12CONS_WORD 0s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_12CONS_WORD 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_13CONS_WORD Subject =~ /(?!.[mr]+)\b[bcdfghjklmnpqrstvwxz]{13}\b/i describe SARE_SUB_13CONS_WORD subject word consists of consecutive consonants score SARE_SUB_13CONS_WORD 0.628 #stype SARE_SUB_13CONS_WORD max:1.000 #counts SARE_SUB_13CONS_WORD 1s/0h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_13CONS_WORD 20s/0h of 96858 corpus (75462s/21396h RM) 05/01/04 #counts SARE_SUB_13CONS_WORD 1s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_13CONS_WORD 0s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #max SARE_SUB_13CONS_WORD 1s/0h of 18153 corpus (15872s/2281h) 05/03/04 #counts SARE_SUB_13CONS_WORD 1s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_13CONS_WORD 2s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_13CONS_WORD 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_14CONS_WORD Subject =~ /(?!.[mrs]+)\b[bcdfghjklmnpqrstvwxz]{14}\b/i describe SARE_SUB_14CONS_WORD subject word consists of consecutive consonants score SARE_SUB_14CONS_WORD 0.667 #counts SARE_SUB_14CONS_WORD 5s/1h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_14CONS_WORD 17s/0h of 92315 corpus (67942s/24373h RM) 07/24/04 #counts SARE_SUB_14CONS_WORD 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 #max SARE_SUB_14CONS_WORD 3s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_14CONS_WORD 0s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_14CONS_WORD 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_15CONS_WORD Subject =~ /(?!.[mr]+)\b[bcdfghjklmnpqrstvwxz]{15,}\b/i describe SARE_SUB_15CONS_WORD subject word consists of consecutive consonants score SARE_SUB_15CONS_WORD 0.222 #counts SARE_SUB_15CONS_WORD 2s/0h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_15CONS_WORD 10s/4h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_15CONS_WORD 2s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_15CONS_WORD 0s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_15CONS_WORD 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_15CONS_WORD 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 header SARE_SUB_ENC_GB2312 Subject:raw =~ /\=\?gb2312\?/i describe SARE_SUB_ENC_GB2312 Subject specifies display in non-English lang score SARE_SUB_ENC_GB2312 1.345 #hist SARE_SUB_ENC_GB2312 Created by Bob Menschel Aug 5 2004 #counts SARE_SUB_ENC_GB2312 1850s/3h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_GB2312 5156s/3h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_GB2312 76s/2h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_GB2312 3s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #counts SARE_SUB_ENC_GB2312 4s/5h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_GB2312 47s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_ENC_GB2312 1s/3h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_ENC_GB2312 5s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_ENC_GB2312 9s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_ENC_GB2312 21s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 header SARE_SUB_ENC_ISO2022JP Subject:raw =~ /\=\?iso-2022-jp\?/i describe SARE_SUB_ENC_ISO2022JP Subject specifies display in non-English lang score SARE_SUB_ENC_ISO2022JP 0.413 #ham SARE_SUB_ENC_ISO2022JP Newsletter from Japan Apple.com, possibly in error to English subscriber #hist SARE_SUB_ENC_ISO2022JP Created by Bob Menschel Oct 07 2004 #counts SARE_SUB_ENC_ISO2022JP 906s/13h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_ISO2022JP 1566s/2h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_ISO2022JP 19s/0h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_ISO2022JP 88s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #counts SARE_SUB_ENC_ISO2022JP 10s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_ISO2022JP 2s/0h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_ENC_ISO2022JP 0s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 #max SARE_SUB_ENC_ISO2022JP 2s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_ENC_ISO2022JP 15s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_ENC_ISO2022JP 17s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 header SARE_SUB_ENC_ISO2022KR Subject:raw =~ /\=\?iso-2022-kr\?/i describe SARE_SUB_ENC_ISO2022KR Subject specifies display in non-English lang score SARE_SUB_ENC_ISO2022KR 1.666 #stype SARE_SUB_ENC_ISO2022KR spamg #hist SARE_SUB_ENC_ISO2022KR Created by Bob Menschel Sep 4 2004 #counts SARE_SUB_ENC_ISO2022KR 0s/0h of 238420 corpus (112480s/125940h RM) 02/28/05 #max SARE_SUB_ENC_ISO2022KR 6s/0h of 102867 corpus (66500s/36367h RM) 12/07/04 #counts SARE_SUB_ENC_ISO2022KR 0s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_ENC_ISO2022KR 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_ENC_ISO2022KR 0s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 header SARE_SUB_ENC_KOI8R Subject:raw =~ /\=\?koi8-r\?/i describe SARE_SUB_ENC_KOI8R Subject specifies display in non-English lang score SARE_SUB_ENC_KOI8R 0.670 #hist SARE_SUB_ENC_KOI8R Created by Bob Menschel Aug 5 2004 #counts SARE_SUB_ENC_KOI8R 286s/56h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_KOI8R 941s/77h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_KOI8R 12s/2h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_KOI8R 211s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_KOI8R 3s/0h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_ENC_KOI8R 2s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_ENC_KOI8R 0s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_ENC_KOI8R 700s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #counts SARE_SUB_ENC_KOI8R 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_ENC_KS5601 Subject:raw =~ /\=\?ks_c_5601\-1987\?/i describe SARE_SUB_ENC_KS5601 Subject specifies display in non-English lang score SARE_SUB_ENC_KS5601 0.222 #counts SARE_SUB_ENC_KS5601 6s/2h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_KS5601 94s/0h of 114203 corpus (81067s/33136h RM) 01/15/05 #counts SARE_SUB_ENC_KS5601 1s/0h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_KS5601 5s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_KS5601 0s/0h of 36108 corpus (12627s/23481h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_SUB_ENC_KS5601 0s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_KS5601 1s/0h of 10629 corpus (5847s/4782h CT) 09/18/05 #counts SARE_SUB_ENC_KS5601 0s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_ENC_KS5601 1s/0h of 43961 corpus (40110s/3851h MY) 05/04/05 header __SARE_SUB_ENC_UTF8 Subject:raw =~ /utf-8/i meta SARE_SUB_ENC_UTF8 __SARE_SUB_ENC_UTF8 && !__SARE_SUB_ENC_UTF8x2 && !__SARE_SUB_ENC_UTF8x3 && !SARE_SUB_ENC_UTF8x4 describe SARE_SUB_ENC_UTF8 Message uses character set often used in spam score SARE_SUB_ENC_UTF8 0.152 #stype SARE_SUB_ENC_UTF8 max:0.555 # due to excessive ham #hist SARE_SUB_ENC_UTF8 Split by Bob Menschel Dec 2005 #ham SARE_SUB_ENC_UTF8 Reply to email on Yahoo Groups list #counts SARE_SUB_ENC_UTF8 99s/260h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_UTF8 2152s/160h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_UTF8 19s/1h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_UTF8 528s/20h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_UTF8 2s/8h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_ENC_UTF8 6s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_ENC_UTF8 0s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_ENC_UTF8 166s/2h of 54902 corpus (17729s/37173h JH-3.01) 03/13/05 #counts SARE_SUB_ENC_UTF8 1s/1h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_UTF8 23s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_ENC_UTF8 3s/8h of 7500 corpus (1767s/5733h ft) 09/18/05 header __SARE_SUB_ENC_UTF8x2 Subject:raw =~ /utf-8.*utf-8/i meta SARE_SUB_ENC_UTF8x2 __SARE_SUB_ENC_UTF8x2 && !__SARE_SUB_ENC_UTF8x3 && !SARE_SUB_ENC_UTF8x4 describe SARE_SUB_ENC_UTF8x2 Message uses character set often used in spam score SARE_SUB_ENC_UTF8x2 0.246 #counts SARE_SUB_ENC_UTF8x2 13s/25h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_UTF8x2 658s/18h of 238420 corpus (112480s/125940h RM) 02/28/05 #counts SARE_SUB_ENC_UTF8x2 0s/5h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_UTF8x2 1s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_ENC_UTF8x2 9s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #counts SARE_SUB_ENC_UTF8x2 123s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_ENC_UTF8x2 129s/0h of 54902 corpus (17729s/37173h JH-3.01) 03/13/05 #counts SARE_SUB_ENC_UTF8x2 0s/2h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_UTF8x2 93s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 header __SARE_SUB_ENC_UTF8x3 Subject:raw =~ /utf-8.*utf-8.*utf-8/i meta SARE_SUB_ENC_UTF8x3 __SARE_SUB_ENC_UTF8x3 && !SARE_SUB_ENC_UTF8x4 describe SARE_SUB_ENC_UTF8x3 Message uses character set often used in spam score SARE_SUB_ENC_UTF8x3 0.641 #counts SARE_SUB_ENC_UTF8x3 1s/9h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_UTF8x3 844s/2h of 238420 corpus (112480s/125940h RM) 02/28/05 #counts SARE_SUB_ENC_UTF8x3 9s/2h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_UTF8x3 2s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #counts SARE_SUB_ENC_UTF8x3 172s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_ENC_UTF8x3 178s/0h of 54902 corpus (17729s/37173h JH-3.01) 03/13/05 #counts SARE_SUB_ENC_UTF8x3 0s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_UTF8x3 106s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_ENC_UTF8x4 Subject:raw =~ /utf-8.*utf-8.*utf-8.*utf-8/i describe SARE_SUB_ENC_UTF8x4 Message uses character set often used in spam score SARE_SUB_ENC_UTF8x4 0.681 #counts SARE_SUB_ENC_UTF8x4 6s/3h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_UTF8x4 10s/11h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_UTF8x4 12s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_UTF8x4 1s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #counts SARE_SUB_ENC_UTF8x4 71s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #counts SARE_SUB_ENC_UTF8x4 0s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_UTF8x4 22s/0h of 11269 corpus (6578s/4691h CT) 06/11/05 header SARE_SUB_ENC_WIN1251 Subject:raw =~ /windows-1251/i describe SARE_SUB_ENC_WIN1251 Subject specifies unnecessary display type score SARE_SUB_ENC_WIN1251 1.666 #counts SARE_SUB_ENC_WIN1251 638s/6h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_WIN1251 1037s/21h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_WIN1251 37s/3h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_WIN1251 147s/0h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_ENC_WIN1251 7s/0h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_ENC_WIN1251 42s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_ENC_WIN1251 0s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #counts SARE_SUB_ENC_WIN1251 1946s/0h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #counts SARE_SUB_ENC_WIN1251 0s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_ENC_WIN1251 4s/0h of 11269 corpus (6578s/4691h CT) 06/11/05 header SARE_SUB_ENC_WIN1255 Subject:raw =~ /windows-1255/i describe SARE_SUB_ENC_WIN1255 Subject specifies unnecessary display type score SARE_SUB_ENC_WIN1255 0.647 #counts SARE_SUB_ENC_WIN1255 29s/1h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_ENC_WIN1255 95s/1h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_ENC_WIN1255 4s/0h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_ENC_WIN1255 0s/0h of 16895 corpus (14482s/2413h MY) 07/26/04 #counts SARE_SUB_ENC_WIN1255 0s/0h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #max SARE_SUB_ENC_WIN1255 2s/0h of 36108 corpus (12627s/23481h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_SUB_ENC_WIN1255 0s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_OBFU_OTHER Subject =~ /(vj|vk|xj|xk|yy|zf|zj)/i describe SARE_SUB_OBFU_OTHER FVGT - subject contains odd letter combinations score SARE_SUB_OBFU_OTHER 0.135 #hist SARE_SUB_OBFU_OTHER Frederic Tarasevicius #counts SARE_SUB_OBFU_OTHER 444s/233h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_OBFU_OTHER 1334s/326h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_OBFU_OTHER 8s/2h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_OBFU_OTHER 121s/12h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_OBFU_OTHER 5s/3h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_OBFU_OTHER 57s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_OBFU_OTHER 13s/18h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_OBFU_OTHER 127s/2h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_OBFU_OTHER 35s/14h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_OBFU_OTHER 42s/14h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #counts SARE_SUB_OBFU_OTHER 17s/2h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_OBFU_OTHER 30s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_OBFU_OTHER 0s/3h of 5906 corpus (1036s/4870h ft) 06/11/05 header SARE_SUB_OBFU_Q0 Subject =~ /[jkpqtvwz]q/i describe SARE_SUB_OBFU_Q0 FVGT - subject contains odd letter combination score SARE_SUB_OBFU_Q0 0.303 #hist SARE_SUB_OBFU_Q0 Frederic Tarasevicius #counts SARE_SUB_OBFU_Q0 473s/98h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_OBFU_Q0 1238s/160h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_OBFU_Q0 3s/3h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_OBFU_Q0 107s/5h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_OBFU_Q0 39s/1h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_OBFU_Q0 5s/1h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_OBFU_Q0 137s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_OBFU_Q0 24s/7h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_OBFU_Q0 41s/7h of 54902 corpus (17729s/37173h JH-3.01) 03/13/05 #counts SARE_SUB_OBFU_Q0 14s/5h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_OBFU_Q0 21s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_OBFU_Q0 1s/1h of 7500 corpus (1767s/5733h ft) 09/18/05 header SARE_SUB_OBFU_Q1 Subject =~ /q[fhjkmsy]/i describe SARE_SUB_OBFU_Q1 FVGT - subject contains odd letter combination score SARE_SUB_OBFU_Q1 0.227 #hist SARE_SUB_OBFU_Q1 Frederic Tarasevicius #hist SARE_SUB_OBFU_Q1 Removed QA and QN from list of matches #counts SARE_SUB_OBFU_Q1 800s/138h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_OBFU_Q1 1126s/170h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_OBFU_Q1 7s/1h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_OBFU_Q1 104s/79h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_OBFU_Q1 2s/0h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_OBFU_Q1 90s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_OBFU_Q1 10s/1h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_OBFU_Q1 168s/4h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_OBFU_Q1 33s/34h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_OBFU_Q1 41s/34h of 54902 corpus (17729s/37173h JH-3.01) 03/13/05 #counts SARE_SUB_OBFU_Q1 13s/5h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_OBFU_Q1 23s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 header SARE_SUB_OBFU_Z Subject =~ /[fjkpqx]z/i describe SARE_SUB_OBFU_Z FVGT - subject contains odd letter combination score SARE_SUB_OBFU_Z 0.259 #hist SARE_SUB_OBFU_Z Frederic Tarasevicius #counts SARE_SUB_OBFU_Z 362s/67h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_OBFU_Z 895s/127h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_OBFU_Z 7s/3h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_OBFU_Z 13s/2h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_OBFU_Z 15s/1h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_OBFU_Z 84s/3h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_OBFU_Z 1s/0h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_OBFU_Z 32s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_OBFU_Z 8s/0h of 40676 corpus (35385s/5291h MY) 12/25/05 #max SARE_SUB_OBFU_Z 103s/0h of 32844 corpus (32843s/3308h MY) 01/16/05 #counts SARE_SUB_OBFU_Z 7s/25h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_OBFU_Z 15s/25h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 header SARE_SUB_RAND_LETTRS4 Subject =~ /(?!uh+)\b[eiou][bfghjklnpqrtwz]{3}\b/i describe SARE_SUB_RAND_LETTRS4 subject has random-text spamsign score SARE_SUB_RAND_LETTRS4 0.799 #ham SARE_SUB_RAND_LETTRS4 lots of ham with leading A #counts SARE_SUB_RAND_LETTRS4 435s/61h of 428457 corpus (182181s/246276h RM) 12/24/05 #max SARE_SUB_RAND_LETTRS4 1889s/77h of 689155 corpus (348140s/341015h RM) 09/18/05 #counts SARE_SUB_RAND_LETTRS4 25s/4h of 7659 corpus (6205s/1454h AxB) 12/25/05 #counts SARE_SUB_RAND_LETTRS4 8s/0h of 11553 corpus (6185s/5368h CT) 12/25/05 #max SARE_SUB_RAND_LETTRS4 8s/0h of 11030 corpus (6598s/4432h CT) 03/10/05 #counts SARE_SUB_RAND_LETTRS4 27s/11h of 74216 corpus (34905s/39311h DOC) 12/25/05 #counts SARE_SUB_RAND_LETTRS4 11s/1h of 9833 corpus (4917s/4916h FT) 12/25/05 #counts SARE_SUB_RAND_LETTRS4 142s/0h of 40312 corpus (30637s/9675h ML) 12/25/05 #counts SARE_SUB_RAND_LETTRS4 67s/2h of 40676 corpus (35385s/5291h MY) 12/25/05 #counts SARE_SUB_RAND_LETTRS4 9s/1h of 54018 corpus (16845s/37173h JH-3.01) 06/11/05 #max SARE_SUB_RAND_LETTRS4 17s/1h of 54154 corpus (16979s/37175h JH-3.01) 02/01/05 #EOF