Status.net Word Filter Plugin (Wordfilter)

The TWiT Network has pretty strict rules about profanity across all channels including the netcasts, chatrooms and the TWiT Army Canteen. There are usually moderators lurking the IRC and microblog but once in a while some profanity gets through the cracks. Therefore I wrote a little word filter to preempt the profanity. I'm not sure if they'll use it but it was fun to write. Sorry about the profanity in this post but it's sort of necessary.

Installation

Add one of the following to config.php and save the plugin to 'local/WordfilterPlugin.php'.

// To use the cdyne profanity webservice
 addPlugin('Wordfilter', array('useWebService'   => TRUE,
        'levelToClean'    => 10, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
        'useNumberFilter'  => TRUE, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
        'vaynerchuk'      => 'kitty' // Optional. Deafult is "[Explicit]"
        ));

// To use a local search / replace list
addPlugin('Wordfilter', array('useWebService' => FALSE));

# === List search / replace terms here ===

/*
 * Replacements should be less than or equal in length since size matters.
 * Search terms will be replaced even if it's in the middle of a word.
 * For instance "fuck" will sanitize "motherfucker" but this can cause false positives.
 * For instance, 'twat' will falsely sanitize "wristwatch".
 * Use spaces in search terms as delimiters to tweak this.

     'twat'    - will be matched anywhere, even within words
     ' twat'   - matches words beginning with "twat"
     'twat '   - matches words ending with "twat"
     ' twat '  - only matches the word "twat"

 */


$config['wordfilter']['search'][] = 'blatherskite'; // for testing so you don't have to swear on your site.
$config['wordfilter']['replace'][] = 'blatherin';

$config['wordfilter']['search'][] = ' twat ';
$config['wordfilter']['replace'][] = ' tool ';

$config['wordfilter']['search'][] = ' cock ';
$config['wordfilter']['replace'][] = ' hen ';

$config['wordfilter']['search'][] = 'fuck';
$config['wordfilter']['replace'][] = 'frak';

$config['wordfilter']['search'][] = 'shit';
$config['wordfilter']['replace'][] = 'poop';

$config['wordfilter']['search'][] = 'bitch';
$config['wordfilter']['replace'][] = 'dog';

/* alternate list syntax
$config['wordfilter']['search'] = array('fuck', 'shit', 'bitch');
$config['wordfilter']['replace] = array('frak', 'poop', 'dog');
*/

Plugin source code

<?php if (!defined('STATUSNET')) exit(1);

/**
 * Wordfilter Plugin
 *
 * @category Plugin
 * @package  Statusnet
 * @author   Kyle Hasegawa  @kylehase
 * @license  http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
 * @version  Wordfilter.php,v 0.5 2010/01/07 23:20:54 +0900
 *
 *
 * To use this plugin add one the following to your config.php
 *  
 *  // To use the cdyne profanity webservice
 *  addPlugin('Wordfilter', array('useWebService'   => TRUE,
 *         'levelToClean'    => 10, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
 *         'useNumberFilter'  => TRUE, // See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
 *         'vaynerchuk'      => 'kitty' // Optional. Deafult is "[Explicit]"
 *         ));
 *
 *
 *  // To use a local search / replace list
 *  addPlugin('Wordfilter', array('useWebService' => FALSE));
 *
 *  You'll also need to create the word list. See http://kylehasegawa.com/statusnet-word-filter-plugin-wordfilter
 *  
 */


class WordfilterPlugin extends Plugin {

    // Option to use the cdyne profanity filter web service
    public $useWebService;

    // cdyne profanity filter options. See http://ws.cdyne.com/ProfanityWS/Profanity.asmx
    public $levelToClean;
    public $useNumberFilter;

    // replacement for profanity. ("vaynerchuck" is the profanity replacment string on TWiT.tv's IRC)
    public $vaynerchuk;

    function __construct($webSvc=NULL, $webSvcLev=NULL, $numFil=NULL, $vayner=NULL) {
        $this->useWebService = $webSvc;
        $this->webServiceLevel = $webSvcLev;
        $this->useNumberFilter = $numFil;
        $this->vaynerchuk = $vayner;
        parent::__construct();
    }

    // Hook StartNoticeSave
    function onStartNoticeSave($notice) {
        if($this->useWebService) $this->_webServiceReplace($notice);
        else $this->_localReplace($notice);
    }

    /*
     * Replace using the local config file replacements
     * TODO Move local search/replace lists to the database when Status.net admin interface is ready
    */

    private function _localReplace($notice) {
        // Get search and replace arrays from the config file
        $search = common_config('wordfilter','search');
        $replace = common_config('wordfilter','replace');

        // Wrap notice in spaces since search terms are space dilimited (faster and easier than regex)
        $notice->content = ' '.$notice->content.' ';

        // Replace any strings found (case insensitive)
        $notice->content = str_ireplace($search, $replace, $notice->content);

        // Trim extra whitespace and update the notice content
        $notice->content = trim($notice->content);

        // Re-render the filtered content and update the rendered noitice content
        $notice->rendered = common_render_content($notice->content, $notice);
    }

    /*
     * Replace using the cdyne profanity webservice
     * cdyne also offers a more flexible filter but it requires registration
     *
     */

    private function _webServiceReplace($notice)
    {
        // Test for SOAP
        if(! class_exists(SoapClient)) {
            throw new Exception("WordFilterPlugin webservice mode requires SOAP");
            return;
        }

        try {
            // Setup the SOAP client
            $cdyne = new SoapClient("http://ws.cdyne.com/ProfanityWS/Profanity.asmx?WSDL");

            // Run the remote SOAP call
            $result = $cdyne->ProfanityFilter(array(
                'Text'            => $notice->content,
                'LevelToClean'    => $this->levelToClean,
                'UseNumberFilter' => $this->useNumberFilter));
        }
        catch(Exception $e) {
            throw $e;
            return;
        }

        // If profanity is found then update the notice content
        if(1 == $result->ProfanityFilterResult->FoundProfanity) {
     
             // Replace the default replacement string "[Explicit]" with one supplied in config
             if(strlen($this->vaynerchuk) > 0) {
                 $notice->content = str_replace('[Explicit]', $this->vaynerchuk, $result->ProfanityFilterResult->CleanText);
             }
             // Or not
             else {
                 $notice->content = $result->ProfanityFilterResult->CleanText;
             }
             $notice->rendered = common_render_content($notice->content, $notice);
        }
    }
}

Update

Regarding longer replacements, according to thefrogman

longer words show up in their entirety on army [web interface] but get cutoff in twhirl [clients]. Didn't seem to break anything
So it's not a major problem if the replacement string is longer than the original.

Update v0.3

Version 0.3 changes things around a bit to prevent false positives.

Update v0.4 2010/01/06

Updated comments and documentation for Status.net v0.9

Update v0.5 2010/01/07

Added ability to use Cdyne's profanity filter webservice instead of a word list
Improved performance by rendering filtered content rather than filtering rendered content

Click the "Revisions" tab above to see the old versions

All code on this site is free for use at your own risk and provided as-is under the WTFPL license unless otherwise stated. Attribution is appreciated but not required.
Blog content, with the exception of externally quoted material, is licensed under the Creative Commons Attribution 3.0 license