Skip to content

Commit

Permalink
ADDED: support for case sensitive search.
Browse files Browse the repository at this point in the history
    For this, database should be created using dictfmt --case-sensitive.

   In this case 00-database-case-sensitive headword is created which is
   checked by dictd
  • Loading branch information
cheusov committed May 12, 2007
1 parent 1688b6e commit 5470376
Show file tree
Hide file tree
Showing 14 changed files with 298 additions and 65 deletions.
4 changes: 3 additions & 1 deletion defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: defs.h,v 1.11 2005/03/29 17:55:50 cheusov Exp $
* $Id: defs.h,v 1.12 2007/05/12 13:53:32 cheusov Exp $
*/

#ifndef _DEFS_H_
Expand Down Expand Up @@ -48,6 +48,7 @@
#define DICT_FLAG_8BIT_NEW DICT_ENTRY_PREFIX"-8bit-new"
#define DICT_FLAG_8BIT_OLD DICT_ENTRY_PREFIX"-8bit"
#define DICT_FLAG_ALLCHARS DICT_ENTRY_PREFIX"-allchars"
#define DICT_FLAG_CASESENSITIVE DICT_ENTRY_PREFIX"-case-sensitive"
#define DICT_FLAG_VIRTUAL DICT_ENTRY_PREFIX"-virtual"
#define DICT_FLAG_ALPHABET DICT_ENTRY_PREFIX"-alphabet"
#define DICT_FLAG_DEFAULT_STRAT DICT_ENTRY_PREFIX"-default-strategy"
Expand Down Expand Up @@ -175,6 +176,7 @@ typedef struct dictIndex {
int flag_utf8; /* not zero if it has 00-database-utf8 entry*/
int flag_8bit; /* not zero if it has 00-database-8bit-new entry*/
int flag_allchars; /* not zero if it has 00-database-allchars entry*/
int flag_casesensitive;/* not zero if it has 00-database-case-sensitive entry*/

const int *isspacealnum;
} dictIndex;
Expand Down
10 changes: 5 additions & 5 deletions dictd.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: dictd.c,v 1.136 2006/12/12 21:20:22 cheusov Exp $
* $Id: dictd.c,v 1.137 2007/05/12 13:53:32 cheusov Exp $
*
*/

Expand Down Expand Up @@ -839,7 +839,7 @@ static int init_database( const void *datum )
PRINTF (DBG_INIT, (":I: Opening indices\n"));
}

db->index = dict_index_open( db->indexFilename, 1, 0, 0 );
db->index = dict_index_open( db->indexFilename, 1, NULL );

if (db->indexFilename){
PRINTF (DBG_INIT, (":I: .index <ok>\n"));
Expand All @@ -848,10 +848,10 @@ static int init_database( const void *datum )
if (db->index){
db->index_suffix = dict_index_open(
db->indexsuffixFilename,
0, db->index->flag_utf8, db->index->flag_allchars);
0, db->index);
db->index_word = dict_index_open(
db->indexwordFilename,
0, db->index->flag_utf8, db->index->flag_allchars);
0, db->index);
}

if (db->index_suffix){
Expand Down Expand Up @@ -1123,7 +1123,7 @@ const char *dict_get_banner( int shortFlag )
{
static char *shortBuffer = NULL;
static char *longBuffer = NULL;
const char *id = "$Id: dictd.c,v 1.136 2006/12/12 21:20:22 cheusov Exp $";
const char *id = "$Id: dictd.c,v 1.137 2007/05/12 13:53:32 cheusov Exp $";
struct utsname uts;

if (shortFlag && shortBuffer) return shortBuffer;
Expand Down
57 changes: 43 additions & 14 deletions dictfmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: dictfmt.c,v 1.67 2006/05/27 14:25:58 cheusov Exp $
* $Id: dictfmt.c,v 1.68 2007/05/12 13:53:32 cheusov Exp $
*
* Sun Jul 5 18:48:33 1998: added patches for Gutenberg's '1995 CIA World
* Factbook' from David Frey <[email protected]>.
Expand Down Expand Up @@ -61,6 +61,8 @@

#define BSIZE 10240

#define IDXDATSEP "\034"

static int Debug;
static FILE *str;

Expand All @@ -71,6 +73,7 @@ static int bit8_mode = 0;
static int index_keep_orig_mode = 0;

static int allchars_mode = 0;
static int cs_mode = 0;

static int quiet_mode = 0;

Expand All @@ -96,6 +99,7 @@ static int ignore_hw_shortname = 0;
static int ignore_hw_info = 0;
static int ignore_hw_def_strat = 0;

static const char *idxdatsep = IDXDATSEP;
static const char *locale = NULL;
static const char *default_strategy = NULL;
static const char *mime_header = NULL;
Expand Down Expand Up @@ -446,7 +450,9 @@ static void write_hw_to_index (const char *word, int start, int end)
destroy_and_exit (1);
}

if (tolower_alnumspace (word, new_word, allchars_mode, utf8_mode)){
if (tolower_alnumspace (
word, new_word, allchars_mode, cs_mode, utf8_mode))
{
fprintf (stderr, "'%s' is not a UTF-8 string", word);

destroy_and_exit (1);
Expand Down Expand Up @@ -496,7 +502,7 @@ static void update_alphabet (const char *word)

len = strlen (word);
p = (char *) alloca (len + 1);
tolower_alnumspace (word, p, allchars_mode, utf8_mode);
tolower_alnumspace (word, p, allchars_mode, cs_mode, utf8_mode);

memset (&ps, 0, sizeof (ps));

Expand All @@ -513,22 +519,17 @@ static void update_alphabet (const char *word)
}
}

static void fmt_newheadword( const char *word )
/* return 1 if word should be skipped */
static int fmt_newheadword_special (const char *word)
{
static char prev[1024] = "";
static int start = 0;
static int end;
char * sep = NULL;
char * p;

if (
word &&
(!strcmp (word, "00-database-default-strategy") ||
!strcmp (word, "00databasedefaultstrategy")))
{
if (ignore_hw_def_strat){
fmt_ignore_headword = 1;
return;
return 1;
}

/* we will ignore following occurences of 00-database-default-strategy*/
Expand All @@ -542,7 +543,7 @@ static void fmt_newheadword( const char *word )
{
if (ignore_hw_url){
fmt_ignore_headword = 1;
return;
return 1;
}

/* we will ignore all the following occurences of 00-database-url*/
Expand All @@ -556,7 +557,7 @@ static void fmt_newheadword( const char *word )
{
if (ignore_hw_shortname){
fmt_ignore_headword = 1;
return;
return 1;
}

/* we will ignore all the following occurences of 00-database-short*/
Expand All @@ -570,13 +571,27 @@ static void fmt_newheadword( const char *word )
{
if (ignore_hw_info){
fmt_ignore_headword = 1;
return;
return 1;
}

/* we will ignore all the following occurences of 00-database-short*/
ignore_hw_info = 1;
}

return 0;
}

static void fmt_newheadword( const char *word )
{
static char prev[1024] = "";
static int start = 0;
static int end;
char * sep = NULL;
char * p;

if (fmt_newheadword_special (word))
return;

update_alphabet (word);

fmt_ignore_headword = 0;
Expand Down Expand Up @@ -731,6 +746,7 @@ static void help( FILE *out_stream )
in the .dict file. For use with '--headword-separator.",
"--index-keep-orig fourth column in .index file stores original headword\n\
which is returned by MATCH command",
"--case-sensitive Create .index/.dict files for case sensitive search",
"--without-headword headwords will not be copied to .dict file",
"--without-header header will not be copied to DB info entry",
"--without-url URL will not be copied to DB info entry",
Expand Down Expand Up @@ -943,6 +959,14 @@ static void fmt_headword_for_allchars (void)
}
}

static void fmt_headword_for_casesensitive (void)
{
if (cs_mode){
fmt_newheadword("00-database-case-sensitive");
fmt_newline();
}
}

/* ...before reading the input */
static void fmt_predefined_headwords_before ()
{
Expand All @@ -952,6 +976,7 @@ static void fmt_predefined_headwords_before ()
fmt_headword_for_utf8 ();
fmt_headword_for_8bit ();
fmt_headword_for_allchars ();
fmt_headword_for_casesensitive ();
fmt_headword_for_def_strat ();
fmt_headword_for_MIME_header ();

Expand Down Expand Up @@ -1018,6 +1043,7 @@ int main( int argc, char **argv )
{ "mime-header", 1, 0, 513 },
{ "utf8", 0, 0, 514 },
{ "index-keep-orig", 0, 0, 515 },
{ "case-sensitive", 0, 0, 516 },
};

init (argv[0]);
Expand Down Expand Up @@ -1091,6 +1117,9 @@ int main( int argc, char **argv )
case 515:
index_keep_orig_mode = 1;
break;
case 516:
cs_mode = 1;
break;
case 't':
without_info = 1;
without_hw = 1;
Expand Down
Loading

0 comments on commit 5470376

Please sign in to comment.