/*
 * Adaptation of Teragram dictionaries to Ruby 
 */
#include <ruby.h>
#include <vulgarityfilter.h>		// Class: Dictionaries
#include "teragram.h"

//#define _VERBOSE_DEBUG
#ifdef _VERBOSE_DEBUG
#	include <stdio.h>
#	define teragram_printf(...) printf(__VA_ARGS__)
#else
#	define teragram_printf(...)
#endif

Dictionaries gstDicts;

/* holder for ruby "Teragram" module/exceptions */
VALUE rb_cTeragram, rb_eTeragramException;

/* Filters the input string for vulgarity, masking vulgar content in-place. */
VALUE teragram_filter(VALUE self, VALUE string) 
{
	teragram_printf( "%s\n", __FUNCTION__ );
	VALUE RetVal = Qnil;
	int iStrLen = 0, ret = 0, iVulgarCounter = 0;
	char* acTmpMsg = NULL;
	char* acOutput = NULL;
    char* input_text = StringValuePtr(string);

#if defined _VERBOSE_DEBUG
    fprintf(stderr, "DEBUG: word = '%s'\n", input_text);
#endif
	
	if( !input_text ) 
		return (VALUE)NULL;//Give me nothing, and I'll give you nothing in return.

	iStrLen = strlen(input_text);

	/*//////////////////////////////////////////////////////////////////////////
	//	Allocate a temporary buffer to hold the filtered text.
	//////////////////////////////////////////////////////////////////////////*/
	acTmpMsg = (char*)malloc(iStrLen + 1);
	if( !acTmpMsg ) 
	{
		/* XXX should this just print to stderr and continue instead? */
		rb_raise(rb_eTeragramException, "%", "Vulgarity filter is out of memory");

		/* If I can't guarantee that the text is NOT vulgar, I have to omit all
		 * of it. Return nothing.
		 */
		return (VALUE)NULL;
	}

	/*//////////////////////////////////////////////////////////////////////////
	//	Execute the SCE-RT vulgarity filter on the input text
	//////////////////////////////////////////////////////////////////////////*/
	ret = iMaskVulgarWords(
		&gstDicts,
		input_text,
		acTmpMsg,
		&iVulgarCounter
	);

	if( ret != FILTER_SUCCESS ) 
	{
		/* XXX should this just print to stderr and continue instead? */
		rb_raise(rb_eTeragramException, "%", "Failed to perform full-word filtering on input text");
		free(acTmpMsg);

		/* If I can't guarantee that the text is NOT vulgar, I have to omit all
		 * of it. Return nothing.
		 */
		return (VALUE)NULL;
	}

	/*//////////////////////////////////////////////////////////////////////////
	// Perform a substring filter on the content.
	//////////////////////////////////////////////////////////////////////////*/
	acOutput = (char*)malloc(iStrLen + 1);
	ret = iMaskSubstringMatches(
		gstDicts.fpat,
		1,
		acTmpMsg,
		acOutput,
		&iVulgarCounter
	);

	if( ret != FILTER_SUCCESS ) 
	{
		rb_raise(rb_eTeragramException, "%", "Failed to perform substring filtering on input text");
		free(acOutput);
		free(acTmpMsg);
		return (VALUE)NULL;
	}

	// convert char * to Ruby string type
	RetVal = rb_str_new2(acOutput);

	//  free our allocated buffers
	free(acOutput);					// filtered output as char *
	free(acTmpMsg);					// temp filtered string

	return RetVal;
}


VALUE teragram_dictdir(teragram_t* self) 
{
	teragram_printf( "%s(self = 0x%x)\n", __FUNCTION__, self );
	teragram_t* teragram;
	Data_Get_Struct(self, teragram_t, teragram);
	teragram_printf( "teragram->dictdir = '%s'\n", StringValuePtr(teragram->dictdir) );

	if( self )
		return teragram->dictdir;
	else 
		return (VALUE)NULL;
}


void teragram_mark(teragram_t* self) 
{
    rb_gc_mark(self->dictdir);
}


void teragram_free(teragram_t* self) 
{
    free(self);
}


VALUE teragram_allocate(VALUE klass) 
{
    teragram_t *t = malloc(sizeof(teragram_t));
    t->dictdir = Qnil;
    return Data_Wrap_Struct(klass, teragram_mark, teragram_free, t);
}


VALUE teragram_is_vulgar(VALUE klass, VALUE string) 
{
	int iStrLen = 0, ret = 0;
	char* input_text = StringValuePtr(string);

	if( input_text && strlen(input_text) )
	{
		////////////////////////////////////////////////////////////////////////////
		////////////////////////////////////////////////////////////////////////////
		// Execute the SCE-RT vulgarity detection on the input text.
		ret = iDetermineVulgarity(&gstDicts, input_text);
		if( (ret == FILTER_FAILED) || (ret == FILTER_VULGAR) )
			return Qtrue;

		ret = iDetermineSubStringMatch(gstDicts.fpat, input_text);
		if( (ret == FILTER_FAILED) || (ret == FILTER_VULGAR) )
			return Qtrue;
	}

	return Qfalse;
}


VALUE teragram_initialize(VALUE self, VALUE dictdir) 
{
	teragram_printf( "%s( 0x%x, '%s' )\n", __FUNCTION__, self, StringValuePtr(dictdir) );
    teragram_t* teragram;

    if( !rb_respond_to(dictdir, rb_intern("to_s")) )
        rb_raise(rb_eArgError, "dictdir must be a string that responds to to_s");

    Data_Get_Struct(self, teragram_t, teragram);
    teragram->dictdir = dictdir;

    int ret = load_vulgarity_dictionaries(StringValuePtr(dictdir), &gstDicts);
	teragram_printf( "load_vulgarity_dictionaries returned %d\n", ret );

    return self;
}


void Init_teragram() 
{
	teragram_printf( "%s\n", __FUNCTION__ );
	rb_cTeragram			= rb_define_class( "Teragram", rb_cObject );
	rb_eTeragramException	= rb_define_class_under( rb_cTeragram, "Exception", rb_eStandardError );

	rb_define_alloc_func(rb_cTeragram, teragram_allocate);

	/*rb_define_module_function(rb_cTeragram, "filter", teragram_filter, 1);*/
	rb_define_method(rb_cTeragram, "initialize", teragram_initialize, 1);
	rb_define_method(rb_cTeragram, "dictdir", teragram_dictdir, 0);
	rb_define_method(rb_cTeragram, "filter", teragram_filter, 1);
	rb_define_method(rb_cTeragram, "is_vulgar?", teragram_is_vulgar, 1);
}

