• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • tdespell2
 

tdespell2

  • tdespell2
  • plugins
  • ispell
ispell_checker.cpp
1 /* tdespell2 - adopted from Enchant
2  * Copyright (C) 2003 Dom Lachowicz
3  * Copyright (C) 2004 Zack Rusin <zack@kde.org>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  * In addition, as a special exception, Dom Lachowicz
21  * gives permission to link the code of this program with
22  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23  * spell checker backend) and distribute linked combinations including
24  * the two. You must obey the GNU Lesser General Public License in all
25  * respects for all of the code used other than said providers. If you modify
26  * this file, you may extend this exception to your version of the
27  * file, but you are not obligated to do so. If you do not wish to
28  * do so, delete this exception statement from your version.
29  */
30 
31 #include <config.h>
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include <string>
38 #include <vector>
39 
40 #include "sp_spell.h"
41 #include "ispell_checker.h"
42 
43 #include <tqmap.h>
44 #include <tqdir.h>
45 #include <tqfileinfo.h>
46 
47 /***************************************************************************/
48 
49 typedef struct str_ispell_map
50 {
51  const char * lang;
52  const char * dict;
53  const char * enc;
54 } IspellMap;
55 
56 static const char *ispell_dirs [] = {
57 #ifdef ISPELL_LIBDIR
58  ISPELL_LIBDIR,
59 #else
60  "/usr/" SYSTEM_LIBDIR "/ispell",
61  "/usr/lib/ispell",
62  "/usr/local/" SYSTEM_LIBDIR "/ispell",
63  "/usr/local/lib/ispell",
64  "/usr/local/share/ispell",
65  "/usr/share/ispell",
66  "/usr/pkg/lib",
67 #endif
68  0
69 };
70 static const IspellMap ispell_map [] = {
71  {"ca" ,"catala.hash" ,"iso-8859-1" },
72  {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
73  {"cs" ,"czech.hash" ,"iso-8859-2" },
74  {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
75  {"da" ,"dansk.hash" ,"iso-8859-1" },
76  {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
77  {"de" ,"deutsch.hash" ,"iso-8859-1" },
78  {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
79  {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
80  {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
81  {"el" ,"ellhnika.hash" ,"iso-8859-7" },
82  {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
83  {"en" ,"british.hash" ,"iso-8859-1" },
84  {"en_AU" ,"british.hash" ,"iso-8859-1" },
85  {"en_BZ" ,"british.hash" ,"iso-8859-1" },
86  {"en_CA" ,"british.hash" ,"iso-8859-1" },
87  {"en_GB" ,"british.hash" ,"iso-8859-1" },
88  {"en_IE" ,"british.hash" ,"iso-8859-1" },
89  {"en_JM" ,"british.hash" ,"iso-8859-1" },
90  {"en_NZ" ,"british.hash" ,"iso-8859-1" },
91  {"en_TT" ,"british.hash" ,"iso-8859-1" },
92  {"en_ZA" ,"british.hash" ,"iso-8859-1" },
93  {"en_ZW" ,"british.hash" ,"iso-8859-1" },
94  {"en_PH" ,"american.hash" ,"iso-8859-1" },
95  {"en_US" ,"american.hash" ,"iso-8859-1" },
96  {"eo" ,"esperanto.hash" ,"iso-8859-3" },
97  {"es" ,"espanol.hash" ,"iso-8859-1" },
98  {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
99  {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
100  {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
101  {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
102  {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
103  {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
104  {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
105  {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
106  {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
107  {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
108  {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
109  {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
110  {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
111  {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
112  {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
113  {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
114  {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
115  {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
116  {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
117  {"fi" ,"finnish.hash" ,"iso-8859-1" },
118  {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
119  {"fr" ,"francais.hash" ,"iso-8859-1" },
120  {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
121  {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
122  {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
123  {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
124  {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
125  {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
126  {"hu" ,"hungarian.hash" ,"iso-8859-2" },
127  {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
128  {"ga" ,"irish.hash" ,"iso-8859-1" },
129  {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
130  {"gl" ,"galician.hash" ,"iso-8859-1" },
131  {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
132  {"ia" ,"interlingua.hash" ,"iso-8859-1" },
133  {"it" ,"italian.hash" ,"iso-8859-1" },
134  {"it_IT" ,"italian.hash" ,"iso-8859-1" },
135  {"it_CH" ,"italian.hash" ,"iso-8859-1" },
136  {"la" ,"mlatin.hash" ,"iso-8859-1" },
137  {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
138  {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
139  {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
140  {"nl" ,"nederlands.hash" ,"iso-8859-1" },
141  {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
142  {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
143  {"nb" ,"norsk.hash" ,"iso-8859-1" },
144  {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
145  {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
146  {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
147  {"no" ,"norsk.hash" ,"iso-8859-1" },
148  {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
149  {"pl" ,"polish.hash" ,"iso-8859-2" },
150  {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
151  {"pt" ,"brazilian.hash" ,"iso-8859-1" },
152  {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
153  {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
154  {"ru" ,"russian.hash" ,"koi8-r" },
155  {"ru_MD" ,"russian.hash" ,"koi8-r" },
156  {"ru_RU" ,"russian.hash" ,"koi8-r" },
157  {"sc" ,"sardinian.hash" ,"iso-8859-1" },
158  {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
159  {"sk" ,"slovak.hash" ,"iso-8859-2" },
160  {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
161  {"sl" ,"slovensko.hash" ,"iso-8859-2" },
162  {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
163  {"sv" ,"svenska.hash" ,"iso-8859-1" },
164  {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
165  {"uk" ,"ukrainian.hash" ,"koi8-u" },
166  {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
167  {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
168 };
169 
170 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
171 static TQMap<TQString, TQString> ispell_dict_map;
172 
173 
174 void
175 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
176 {
177  if (inEncoding && strlen(inEncoding))
178  {
179  m_translate_in = TQTextCodec::codecForName(inEncoding);
180  }
181 }
182 
183 /***************************************************************************/
184 /***************************************************************************/
185 
186 ISpellChecker::ISpellChecker()
187  : deftflag(-1),
188  prefstringchar(-1),
189  m_bSuccessfulInit(false),
190  m_BC(NULL),
191  m_cd(NULL),
192  m_cl(NULL),
193  m_cm(NULL),
194  m_ho(NULL),
195  m_nd(NULL),
196  m_so(NULL),
197  m_se(NULL),
198  m_ti(NULL),
199  m_te(NULL),
200  m_hashstrings(NULL),
201  m_hashtbl(NULL),
202  m_pflaglist(NULL),
203  m_sflaglist(NULL),
204  m_chartypes(NULL),
205  m_infile(NULL),
206  m_outfile(NULL),
207  m_askfilename(NULL),
208  m_Trynum(0),
209  m_translate_in(0)
210 {
211  memset(m_sflagindex,0,sizeof(m_sflagindex));
212  memset(m_pflagindex,0,sizeof(m_pflagindex));
213 }
214 
215 #ifndef FREEP
216 #define FREEP(p) do { if (p) free(p); } while (0)
217 #endif
218 
219 ISpellChecker::~ISpellChecker()
220 {
221  if (m_bSuccessfulInit) {
222  // only cleanup our mess if we were successfully initialized
223 
224  clearindex (m_pflagindex);
225  clearindex (m_sflagindex);
226  }
227 
228  FREEP(m_hashtbl);
229  FREEP(m_hashstrings);
230  FREEP(m_sflaglist);
231  FREEP(m_chartypes);
232 
233  delete m_translate_in;
234  m_translate_in = 0;
235 }
236 
237 bool
238 ISpellChecker::checkWord( const TQString& utf8Word )
239 {
240  ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
241  if (!m_bSuccessfulInit)
242  return false;
243 
244  if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
245  return false;
246 
247  bool retVal = false;
248  TQCString out;
249  if (!m_translate_in)
250  return false;
251  else {
252  /* convert to 8bit string and null terminate */
253  int len_out = utf8Word.length();
254 
255  out = m_translate_in->fromUnicode( utf8Word, len_out );
256  }
257 
258  if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
259  {
260  if (good(iWord, 0, 0, 1, 0) == 1 ||
261  compoundgood(iWord, 1) == 1)
262  {
263  retVal = true;
264  }
265  }
266 
267  return retVal;
268 }
269 
270 TQStringList
271 ISpellChecker::suggestWord(const TQString& utf8Word)
272 {
273  ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
274  int c;
275 
276  if (!m_bSuccessfulInit)
277  return TQStringList();
278 
279  if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
280  utf8Word.length() == 0)
281  return TQStringList();
282 
283  TQCString out;
284  if (!m_translate_in)
285  return TQStringList();
286  else
287  {
288  /* convert to 8bit string and null terminate */
289 
290  int len_out = utf8Word.length();
291  out = m_translate_in->fromUnicode( utf8Word, len_out );
292  }
293 
294  if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
295  makepossibilities(iWord);
296  else
297  return TQStringList();
298 
299  TQStringList sugg_arr;
300  for (c = 0; c < m_pcount; c++)
301  {
302  TQString utf8Word;
303 
304  if (!m_translate_in)
305  {
306  /* copy to 8bit string and null terminate */
307  utf8Word = TQString::fromUtf8( m_possibilities[c] );
308  }
309  else
310  {
311  /* convert to 32bit string and null terminate */
312  utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
313  }
314 
315  sugg_arr.append( utf8Word );
316  }
317 
318  return sugg_arr;
319 }
320 
321 static void
322 s_buildHashNames (std::vector<std::string> & names, const char * dict)
323 {
324  const char * tmp = 0;
325  int i = 0;
326 
327  names.clear ();
328 
329  while ( (tmp = ispell_dirs[i++]) ) {
330  TQCString maybeFile = TQCString( tmp ) + '/';
331  maybeFile += dict;
332  names.push_back( maybeFile.data() );
333  }
334 }
335 
336 static void
337 s_allDics()
338 {
339  const char * tmp = 0;
340  int i = 0;
341 
342  while ( (tmp = ispell_dirs[i++]) ) {
343  TQDir dir( tmp );
344  TQStringList lst = dir.entryList( "*.hash" );
345  for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
346  TQFileInfo info( *it );
347  for (size_t i = 0; i < size_ispell_map; i++)
348  {
349  const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
350  if (!strcmp (info.fileName().latin1(), mapping->dict))
351  {
352  ispell_dict_map.insert( mapping->lang, *it );
353  }
354  }
355  }
356  }
357 }
358 
359 TQValueList<TQString>
360 ISpellChecker::allDics()
361 {
362  if ( ispell_dict_map.empty() )
363  s_allDics();
364 
365  return ispell_dict_map.keys();
366 }
367 
368 TQString
369 ISpellChecker::loadDictionary (const char * szdict)
370 {
371  std::vector<std::string> dict_names;
372 
373  s_buildHashNames (dict_names, szdict);
374 
375  for (size_t i = 0; i < dict_names.size(); i++)
376  {
377  if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
378  return dict_names[i].c_str();
379  }
380 
381  return TQString::null;
382 }
383 
390 bool
391 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
392 {
393  TQString hashname;
394 
395  const char * encoding = NULL;
396  const char * szFile = NULL;
397 
398  for (size_t i = 0; i < size_ispell_map; i++)
399  {
400  const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
401  if (!strcmp (szLang, mapping->lang))
402  {
403  szFile = mapping->dict;
404  encoding = mapping->enc;
405  break;
406  }
407  }
408 
409  if (!szFile || !strlen(szFile))
410  return false;
411 
412  alloc_ispell_struct();
413 
414  hashname = loadDictionary(szFile);
415  if (hashname.isEmpty())
416  return false;
417 
418  // one of the two above calls succeeded
419  setDictionaryEncoding (hashname, encoding);
420 
421  return true;
422 }
423 
424 void
425 ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
426 {
427  /* Get Hash encoding from XML file. This should always work! */
428  try_autodetect_charset(encoding);
429 
430  if (m_translate_in)
431  {
432  /* We still have to setup prefstringchar*/
433  prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
434  : static_cast<int *>(NULL));
435 
436  if (prefstringchar < 0)
437  {
438  std::string teststring;
439  for(int n1 = 1; n1 <= 15; n1++)
440  {
441  teststring = "latin" + n1;
442  prefstringchar = findfiletype(teststring.c_str(), 1,
443  deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
444  if (prefstringchar >= 0)
445  break;
446  }
447  }
448 
449  return; /* success */
450  }
451 
452  /* Test for UTF-8 first */
453  prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
454  if (prefstringchar >= 0)
455  {
456  m_translate_in = TQTextCodec::codecForName("utf8");
457  }
458 
459  if (m_translate_in)
460  return; /* success */
461 
462  /* Test for "latinN" */
463  if (!m_translate_in)
464  {
465  /* Look for "altstringtype" names from latin1 to latin15 */
466  for(int n1 = 1; n1 <= 15; n1++)
467  {
468  TQString teststring = TQString("latin%1").arg(n1);
469  prefstringchar = findfiletype(teststring.latin1(), 1,
470  deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
471  if (prefstringchar >= 0)
472  {
473  //FIXME: latin1 might be wrong
474  m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
475  break;
476  }
477  }
478  }
479 
480  /* If nothing found, use latin1 */
481  if (!m_translate_in)
482  {
483  m_translate_in = TQTextCodec::codecForName("latin1");
484  }
485 }
486 
487 bool
488 ISpellChecker::requestDictionary(const char *szLang)
489 {
490  if (!loadDictionaryForLanguage (szLang))
491  {
492  // handle a shortened version of the language tag: en_US => en
493  std::string shortened_dict (szLang);
494  size_t uscore_pos;
495 
496  if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
497  shortened_dict = shortened_dict.substr(0, uscore_pos);
498  if (!loadDictionaryForLanguage (shortened_dict.c_str()))
499  return false;
500  } else
501  return false;
502  }
503 
504  m_bSuccessfulInit = true;
505 
506  if (prefstringchar < 0)
507  m_defdupchar = 0;
508  else
509  m_defdupchar = prefstringchar;
510 
511  return true;
512 }

tdespell2

Skip menu "tdespell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

tdespell2

Skip menu "tdespell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •   kspeech
  •     interface
  •     library
  •   tdetexteditor
  • kate
  • kded
  • kdoctools
  • kimgio
  • kjs
  • libtdemid
  • libtdescreensaver
  • tdeabc
  • tdecmshell
  • tdecore
  • tdefx
  • tdehtml
  • tdeinit
  • tdeio
  •   bookmarks
  •   httpfilter
  •   kpasswdserver
  •   kssl
  •   tdefile
  •   tdeio
  •   tdeioexec
  • tdeioslave
  •   http
  • tdemdi
  •   tdemdi
  • tdenewstuff
  • tdeparts
  • tdeprint
  • tderandr
  • tderesources
  • tdespell2
  • tdesu
  • tdeui
  • tdeunittest
  • tdeutils
  • tdewallet
Generated for tdespell2 by doxygen 1.9.1
This website is maintained by Timothy Pearson.