Status.net Word Filter Plugin (Wordfilter)

The TWiT Network has pretty strict rules about profanity across all channels including the netcasts, chatrooms and the TWiT Army Canteen. There are usually moderators lurking the IRC and microblog but once in a while some profanity gets through the cracks. Therefore I wrote a little word filter to preempt the profanity. I'm not sure if they'll use it but it was fun to write. Sorry about the profanity in this post but it's sort of necessary.
Installation
Add one of the following to config.php and save the plugin to 'local/WordfilterPlugin.php'.
addPlugin('Wordfilter', array('useWebService' => TRUE,
'levelToClean' => 10, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
'useNumberFilter' => TRUE, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
'vaynerchuk' => 'kitty' // Optional. Deafult is "[Explicit]"
));
addPlugin('Wordfilter', array('useWebService' => FALSE));
# === List search / replace terms here ===
/*
* Replacements should be less than or equal in length since size matters.
* Search terms will be replaced even if it's in the middle of a word.
* For instance "fuck" will sanitize "motherfucker" but this can cause false positives.
* For instance, 'twat' will falsely sanitize "wristwatch".
* Use spaces in search terms as delimiters to tweak this.
'twat' - will be matched anywhere, even within words
' twat' - matches words beginning with "twat"
'twat ' - matches words ending with "twat"
' twat ' - only matches the word "twat"
*/
$config['wordfilter']['search'][] = 'blatherskite'; // for testing so you don't have to swear on your site.
$config['wordfilter']['replace'][] = 'blatherin';
$config['wordfilter']['search'][] = ' twat ';
$config['wordfilter']['replace'][] = ' tool ';
$config['wordfilter']['search'][] = ' cock ';
$config['wordfilter']['replace'][] = ' hen ';
$config['wordfilter']['search'][] = 'fuck';
$config['wordfilter']['replace'][] = 'frak';
$config['wordfilter']['search'][] = 'shit';
$config['wordfilter']['replace'][] = 'poop';
$config['wordfilter']['search'][] = 'bitch';
$config['wordfilter']['replace'][] = 'dog';
/* alternate list syntax
$config['wordfilter']['search'] = array('fuck', 'shit', 'bitch');
$config['wordfilter']['replace] = array('frak', 'poop', 'dog');
*/
Plugin source code
/**
* Wordfilter Plugin
*
* @category Plugin
* @package Statusnet
* @author Kyle Hasegawa @kylehase
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @version Wordfilter.php,v 0.5 2010/01/07 23:20:54 +0900
*
*
* To use this plugin add one the following to your config.php
*
* // To use the cdyne profanity webservice
* addPlugin('Wordfilter', array('useWebService' => TRUE,
* 'levelToClean' => 10, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
* 'useNumberFilter' => TRUE, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
* 'vaynerchuk' => 'kitty' // Optional. Deafult is "[Explicit]"
* ));
*
*
* // To use a local search / replace list
* addPlugin('Wordfilter', array('useWebService' => FALSE));
*
* You'll also need to create the word list. See http://kylehasegawa.com/statusnet-word-filter-plugin-wordfilter
*
*/
class WordfilterPlugin extends Plugin {
// Option to use the cdyne profanity filter web service
public $useWebService;
// cdyne profanity filter options. See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
public $levelToClean;
public $useNumberFilter;
// replacement for profanity. ("vaynerchuck" is the profanity replacment string on TWiT.tv's IRC)
public $vaynerchuk;
function __construct($webSvc=NULL, $webSvcLev=NULL, $numFil=NULL, $vayner=NULL) {
$this->useWebService = $webSvc;
$this->webServiceLevel = $webSvcLev;
$this->useNumberFilter = $numFil;
$this->vaynerchuk = $vayner;
parent::__construct();
}
// Hook StartNoticeSave
function onStartNoticeSave($notice) {
if($this->useWebService) $this->_webServiceReplace($notice);
else $this->_localReplace($notice);
}
/*
* Replace using the local config file replacements
* TODO Move local search/replace lists to the database when Status.net admin interface is ready
*/
private function _localReplace($notice) {
// Get search and replace arrays from the config file
$search = common_config('wordfilter','search');
$replace = common_config('wordfilter','replace');
// Wrap notice in spaces since search terms are space dilimited (faster and easier than regex)
$notice->content = ' '.$notice->content.' ';
// Replace any strings found (case insensitive)
$notice->content = str_ireplace($search, $replace, $notice->content);
// Trim extra whitespace and update the notice content
$notice->content = trim($notice->content);
// Re-render the filtered content and update the rendered noitice content
$notice->rendered = common_render_content($notice->content, $notice);
}
/*
* Replace using the cdyne profanity webservice
* cdyne also offers a more flexible filter but it requires registration
*
*/
private function _webServiceReplace($notice)
{
// Test for SOAP
if(! class_exists(SoapClient)) {
throw new Exception("WordFilterPlugin webservice mode requires SOAP");
return;
}
try {
// Setup the SOAP client
$cdyne = new SoapClient("http://ws.cdyne.com/ProfanityWS/Profanity.asmx?WSDL");
// Run the remote SOAP call
$result = $cdyne->ProfanityFilter(array(
'Text' => $notice->content,
'LevelToClean' => $this->levelToClean,
'UseNumberFilter' => $this->useNumberFilter));
}
catch(Exception $e) {
throw $e;
return;
}
// If profanity is found then update the notice content
if(1 == $result->ProfanityFilterResult->FoundProfanity) {
// Replace the default replacement string "[Explicit]" with one supplied in config
if(strlen($this->vaynerchuk) > 0) {
$notice->content = str_replace('[Explicit]', $this->vaynerchuk, $result->ProfanityFilterResult->CleanText);
}
// Or not
else {
$notice->content = $result->ProfanityFilterResult->CleanText;
}
$notice->rendered = common_render_content($notice->content, $notice);
}
}
}
Update
Regarding longer replacements, according to thefrogman
longer words show up in their entirety on army [web interface] but get cutoff in twhirl [clients]. Didn't seem to break anythingSo it's not a major problem if the replacement string is longer than the original.
Update v0.3
Version 0.3 changes things around a bit to prevent false positives.
Update v0.4 2010/01/06
Updated comments and documentation for Status.net v0.9
Update v0.5 2010/01/07
Added ability to use Cdyne's profanity filter webservice instead of a word list
Improved performance by rendering filtered content rather than filtering rendered content
Click the "Revisions" tab above to see the old versions
- Code /
- Laconica /
- Linux FLOSS /
- microblogging /
- php /
- plugin /
- profanity /
- status.net /


