The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hebrewmcim.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * hebrewmcim.cpp - HebrewMCIM: Keyboard mapping for Michigan-Claremont
4  * Hebrew input
5  *
6  * $Id: hebrewmcim.cpp 2833 2013-06-29 06:40:28Z chrislit $
7  *
8  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
9  * CrossWire Bible Society
10  * P. O. Box 2528
11  * Tempe, AZ 85280-2528
12  *
13  * This program is free software; you can redistribute it and/or modify it
14  * under the terms of the GNU General Public License as published by the
15  * Free Software Foundation version 2.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * General Public License for more details.
21  *
22  */
23 
24 #include <hebrewmcim.h>
25 
27  :SWInputMethod() {
28 
29  init();
30 }
31 
32 
33 int *HebrewMCIM::translate(char in) {
34  int retVal = 0;
35  static int retString[5];
36  int retStringIndex = 0;
37 
38  memset(retString, 0, 5);
39 
40  if (getState() > 1) {
41  if (getState() >= 12) { // serious issue with internal structure
42  setState(0);
43  retString[retStringIndex++] = in;
44  return retString;
45  }
46  map<int, int>::iterator find = subst2[getState()].find(in);
47  if (find != subst2[getState()].end())
48  retVal = find->second;
49  else retVal = in;
50 
51  setState(0);
52  retString[retStringIndex++] = retVal;
53  return retString;
54  }
55  else {
56  retVal = subst[in];
57 
58  if (retVal == 0) {
59  setState(0);
60  retString[retStringIndex++] = in;
61  return retString;
62  }
63  if (retVal > 100) {
64  setState(1);
65  retString[retStringIndex++] = retVal;
66  return retString;
67  }
68  if (retVal == 50) { // multiChar
69  setState(1);
70  int *chars = multiChars[in];
71  if (chars != 0) {
72  retString[retStringIndex++] = chars[0];
73  retString[retStringIndex++] = chars[1];
74  return retString;
75  }
76  }
77  }
78  setState(retVal);
79  return 0;
80 }
81 
82 
84  memset(subst, 0, 255);
85 
86  subst[')'] = 1488;
87  subst['B'] = 1489;
88  subst['G'] = 1490;
89  subst['D'] = 1491;
90  subst['H'] = 1492;
91  subst['W'] = 1493;
92  subst['Z'] = 1494;
93  subst['X'] = 1495;
94  subst['+'] = 1496;
95  subst['Y'] = 1497;
96 
97  subst['k'] = 1498; // finals
98  subst['m'] = 1501;
99  subst['n'] = 1503;
100  subst['c'] = 1509;
101 
102  subst['P'] = 1508;
103  subst['K'] = 1499;
104  subst['L'] = 1500;
105  subst['M'] = 1502;
106  subst['N'] = 1504;
107  subst['S'] = 1505;
108  subst['('] = 1506;
109  subst['p'] = 1507;
110  subst['C'] = 1510;
111  subst['Q'] = 1511;
112  subst['R'] = 1512;
113  subst['#'] = 1513;
114 
115  // special multiChars
116  subst['&'] = 50;
117  subst['$'] = 50;
118 
119  static int x[] = {1513, 1474};
120  multiChars['&'] = x;
121  static int y[] = {1513, 1473};
122  multiChars['$'] = y;
123 
124  subst['T'] = 1514;
125 
126  // VOWELS
127  subst['A'] = 1463;
128  subst['F'] = 1464;
129  subst['E'] = 1462;
130  subst['"'] = 1461;
131  subst['I'] = 1460;
132  subst['O'] = 1465;
133  subst['U'] = 1467;
134 
135 
136 
137  // OTHER DIACRITICS
138  subst['.'] = 1468;
139  subst['-'] = 1470;
140  subst[','] = 1471;
141 
142  // Compound input
143 
144  // CANTILLATION
145 
146  subst[':'] = 2;
147  subst2[2]['A'] = 1458;
148  subst2[2]['E'] = 1457;
149  subst2[2]['F'] = 1459;
150 
151 
152  /* Telisha qetana is postpositive as in '04' above. However, Michigan
153 # code '24' is for a medial telisha. Graphically, there is no
154 # difference.
155  */
156  subst['2'] = 5;
157  subst2[5]['4'] = 1449;
158 
159 
160  /* Note Michigan encoding distinguishes between medial metheg '35' (occuring
161 # on the left of the vowel), and the ordinary meteg '95' (occuring on the
162 # right of the vowel). It is also used for silluq.
163  */
164  subst['3'] = 6;
165  subst2[6]['3'] = 1433;
166  subst2[6]['5'] = 1469;
167 
168 
169  /* The Michigan code of telisha gedola in medial position. Graphically,
170 # there is no difference.
171  */
172  subst['4'] = 7;
173  subst2[7]['4'] = 1440;
174 
175  subst['6'] = 8;
176  subst2[8]['0'] = 1451;
177  subst2[8]['1'] = 1436;
178 
179  subst['1'] = 4;
180  subst2[4]['0'] = 1434;
181 
182  /* In the poetic books, prepositive dehi occurs; it's unclear whether
183 # tipeha also occurs in the poetic books. Otherwise, we could simply
184 # check for what book in the Tanach we are in. Michigan uses the same
185 # code for each.
186  */
187 
188  subst2[4]['3'] = 1430;
189 
190  /* This is the poetic accent mugrash, which also includes rebia, but is
191 # encoded separately as '81' in the Michigan text.
192  */
193  subst2[4]['1'] = 1437;
194  subst2[4]['4'] = 1440;
195 
196 
197  subst['0'] = 3;
198  subst2[3]['0'] = 1475;
199  subst2[3]['1'] = 1426;
200 
201  /* According to BHS, zarqa and sinnor are both postpositive. However,
202 # the Michigan encoding uses one code for both. The Unicode zarqa
203 # (0x0598) is definitely NOT postpositive. And further, the shape of
204 # the symbol is different in BHS and Uniocde. This needs further
205 # research to determine what's going on here. For now, we follow BHS
206 # and use the postpositive Unicode zinor or both accents.
207  */
208 
209  subst2[3]['2'] = 1454;
210 
211  /* Pashta is postpositive, and the Unicode equivalent reflects
212 # this. However, there is a poetic equivalent -- azla legarmeh --
213 # which is not postpositive, but no equivalent code point exists in
214 # Unicode. The Michigan encoding does not distinguish between the two,
215 # although it could be algorithmically determined.
216  */
217 
218  subst2[3]['3'] = 1433;
219  subst2[3]['4'] = 1449;
220  subst2[3]['5'] = 1472;
221 
222 
223  /* This is the Unicode Hebrew *accent*; there is also another Hebrew
224 # *punctuation* called GERSHAYIM 0x05F4. I'm using the more
225 # traditional rounded marks, rather than the alternate straight
226 # marks.
227  */
228 
229  subst2[8]['2'] = 1438;
230 
231  // Also known as azla
232  subst2[8]['3'] = 1448;
233  subst2[8]['4'] = 1452;
234  subst2[8]['5'] = 1427;
235 
236 
237  subst['8'] = 9;
238  subst2[9]['0'] = 1428;
239  subst2[9]['1'] = 1431;
240 
241  /* Note, this accent is actually sinnorit, but it does not exist as a
242 # separate glyph in the Unicode standard. The 'ZINOR' Unicode accent
243 # is postpositive, while sinnorit is not. ZARQA is as close as I can
244 # get to this.
245  */
246  subst2[9]['2'] = 1432;
247 
248  /* The Unicode form does not match the form used by BHS, but the names
249 # are the same.
250  */
251  subst2[9]['3'] = 1441;
252  subst2[9]['4'] = 1439;
253  subst2[9]['5'] = 1429;
254 
255  subst['7'] = 10;
256  subst2[10]['0'] = 1444;
257  subst2[10]['1'] = 1445;
258  subst2[10]['2'] = 1446;
259  subst2[10]['3'] = 1430; // also '13', '73' also is used for majela
260  subst2[10]['4'] = 1443;
261  subst2[10]['5'] = 1469; // this is silluq; should appear to the left of the vowel
262 
263  subst['9'] = 11;
264  subst2[11]['1'] = 1435;
265  subst2[11]['2'] = 1425;
266  subst2[11]['3'] = 1450;
267  subst2[11]['4'] = 1447;
268  subst2[11]['5'] = 1469; // should appear to the right of the vowel
269 
270 }
271 
272  /*
273 
274 
275 # CANTILLION MARKS
276 
277  my $ETNAHTA = '&#1425;';
278 # officially the Unicode name for this symbol was "SEGOL." However, that is
279 # not a unique name, conflicting with the vowel of the same name. Further,
280 # the position of the symbol is different. I have changed the name of the
281 # accent to "SEGOLTA," the traditional name for this accent.
282  my $SEGOLTA = '&#1426;';
283  my $SHALSHELET = '&#1427;';
284  my $ZAQEF_QATAN = '&#1428;';
285  my $ZAQEF_GADOL = '&#1429;';
286  my $TIPEHA = '&#1430;';
287  my $REVIA = '&#1431;';
288  my $ZARQA = '&#1432;';
289  my $PASHTA = '&#1433;';
290  my $YETIV = '&#1434;';
291  my $TEVIR = '&#1435;';
292  my $GERESH = '&#1436;';
293  my $GERESH_MUQDAM = '&#1437;';
294  my $GERSHAYIM = '&#1438;';
295  my $QARNEY_PARA = '&#1439;';
296  my $TELISHA_GEDOLA = '&#1440;';
297  my $PAZER = '&#1441;';
298  my $MUNAH = '&#1443;';
299  my $MAHAPAKH = '&#1444;';
300  my $MERKHA = '&#1445;';
301  my $MERKHA_KEFULA = '&#1446;';
302  my $DARGA = '&#1447;';
303  my $QADMA = '&#1448;';
304  my $TELISHA_QETANA = '&#1449;';
305  my $YERAH_BEN_YOMO = '&#1450;';
306  my $OLE = '&#1451;';
307  my $ILUY = '&#1452;';
308  my $DEHI = '&#1453;';
309  my $ZINOR = '&#1454;';
310 # HEBREW MARK
311  my $MASORA_CIRCLE = '&#1455;';
312 # HEBREW EXTENDED-A points and punctuation
313  my $SHEVA = '&#1456;';
314  my $HATAF_SEGOL = '&#1457;';
315  my $HATAF_PATAH = '&#1458;';
316  my $HATAF_QAMATS = '&#1459;';
317  my $HIRIQ = '&#1460;';
318  my $TSERE = '&#1461;';
319  my $SEGOL = '&#1462;';
320 # furtive Patah is not a distinct character
321  my $PATAH = '&#1463;';
322  my $QAMATS = '&#1464;';
323  my $HOLAM = '&#1465;';
324  my $QUBUTS = '&#1467;';
325 # also used as shuruq
326 # falls within the base letter
327  my $DAGESH_OR_MAPIQ = '&#1468;';
328 # also used as siluq
329  my $METAG = '&#1469;';
330  my $MAQAF = '&#1470;';
331  my $RAFE = '&#1471;';
332 # Also used for legarmeh
333 # may be treated as spacing punctuation, not as a point
334  my $PASEQ = '&#1472;';
335  my $SHIN_DOT = '&#1473;';
336  my $SIN_DOT = '&#1474;';
337  my $SOF_PASUQ = '&#1475;';
338 # HEBREW MARK
339  my $UPPER_DOT = '&#1476;';
340 # HEBREW LETTERS based on ISO 8859-8
341 # aleph
342 # x (alef symbol - 2135)
343  my $ALEF = '&#1488;';
344 # x (bet symbol - 2136)
345  my $BET = '&#1489;';
346 # x (gimel symbol - 2137)
347  my $GIMEL = '&#1490;';
348 # x (dalet symbol - 2138)
349  my $DALET = '&#1491;';
350  my $HE = '&#1492;';
351  my $VAV = '&#1493;';
352  my $ZAYIN = '&#1494;';
353  my $HET = '&#1495;';
354  my $TET = '&#1496;';
355  my $YOD = '&#1497;';
356  my $FINAL_KAF = '&#1498;';
357  my $KAF = '&#1499;';
358  my $LAMED = '&#1500;';
359  my $FINAL_MEM = '&#1501;';
360  my $MEM = '&#1502;';
361  my $FINAL_NUN = '&#1503;';
362  my $NUN = '&#1504;';
363  my $SAMEKH = '&#1505;';
364  my $AYIN = '&#1506;';
365  my $FINAL_PE = '&#1507;';
366  my $PE = '&#1508;';
367  my $FINAL_TSADI = '&#1509;';
368 # also known as zade
369  my $TSADI = '&#1510;';
370  my $QOF = '&#1511;';
371  my $RESH = '&#1512;';
372  my $SHIN = '&#1513;';
373  my $TAV = '&#1514;';
374 # Yiddish digraphs
375 # Hebrew Ligature
376 # tsvey vovn
377  my $DOUBLE_VAV = '&#1520;';
378  my $VAV_YOD = '&#1521;';
379 # tsvey yudn
380  my $DOUBLE_YOD = '&#1522;';
381 
382 # Additional punctuation
383  my $PUNCT_GERESH = '&#1523;';
384  my $PUNCT_GERSHAYIM = '&#1524;';
385 # Reserved: 0x05F5"
386 # x (hebrew point judeo-spanish varika - FB1E)
387 #my $JUDEO_SPANISH_VARIKA = pack("U",0xFB1E); # UTF-8 OxFB1E
388 
389 #############################
390 # End of Unicode 2.0 Hebrew #
391 #############################
392 
393 # A hash whose key is a Michagan code, and whose value is a Unicode
394 # equvalent
395 
396  char subst[] = new char [255];
397  subst[')'] = 1488;
398  'B' => $BET,
399  'G' => $GIMEL,
400  'D' => $DALET,
401  'H' => $HE,
402  'W' => $VAV,
403  'Z' => $ZAYIN,
404  'X' => $HET,
405  '+' => $TET,
406  'Y' => $YOD,
407  'K' => $KAF,
408  'L' => $LAMED,
409  'M' => $MEM,
410  'N' => $NUN,
411  'S' => $SAMEKH,
412  '(' => $AYIN,
413  'P' => $PE,
414  'C' => $TSADI,
415  'Q' => $QOF,
416  'R' => $RESH,
417  '#' => $SHIN, # the letter shin without a point
418  '&' => ($SHIN . $SIN_DOT),
419  '$' => ($SHIN . $SHIN_DOT), # '
420  'T' => $TAV,
421 # VOWELS
422  'A' => $PATAH,
423  'F' => $QAMATS,
424  'E' => $SEGOL,
425  '"' => $TSERE,
426  'I' => $HIRIQ,
427  'O' => $HOLAM,
428  'U' => $QUBUTS,
429  ':' => $SHEVA,
430  ':A' => $HATAF_PATAH,
431  ':E' => $HATAF_SEGOL,
432  ':F' => $HATAF_QAMATS,
433 # OTHER DIACRITICS
434  '.' => $DAGESH_OR_MAPIQ,
435  '-' => $MAQAF,
436  ',' => $RAFE,
437 # CANTILLATION
438  '00' => $SOF_PASUQ,
439  '01' => $SEGOLTA,
440 # According to BHS, zarqa and sinnor are both postpositive. However,
441 # the Michigan encoding uses one code for both. The Unicode zarqa
442 # (0x0598) is definitely NOT postpositive. And further, the shape of
443 # the symbol is different in BHS and Uniocde. This needs further
444 # research to determine what's going on here. For now, we follow BHS
445 # and use the postpositive Unicode zinor or both accents.
446  '02' => $ZINOR,
447 # Pashta is postpositive, and the Unicode equivalent reflects
448 # this. However, there is a poetic equivalent -- azla legarmeh --
449 # which is not postpositive, but no equivalent code point exists in
450 # Unicode. The Michigan encoding does not distinguish between the two,
451 # although it could be algorithmically determined.
452  '03' => $PASHTA,
453  '04' => $TELISHA_QETANA,
454  '05' => $PASEQ,
455  '10' => $YETIV,
456 # In the poetic books, prepositive dehi occurs; it's unclear whether
457 # tipeha also occurs in the poetic books. Otherwise, we could simply
458 # check for what book in the Tanach we are in. Michigan uses the same
459 # code for each.
460  '13' => $TIPEHA, # also $DEHI
461 # This is the poetic accent mugrash, which also includes rebia, but is
462 # encoded separately as '81' in the Michigan text.
463  '11' => $GERESH_MUQDAM,
464  '14' => $TELISHA_GEDOLA,
465 # Telisha qetana is postpositive as in '04' above. However, Michigan
466 # code '24' is for a medial telisha. Graphically, there is no
467 # difference.
468  '24' => $TELISHA_QETANA,
469  '33' => $PASHTA,
470 # The Michigan code of telisha gedola in medial position. Graphically,
471 # there is no difference.
472  '44' => $TELISHA_GEDOLA,
473  '60' => $OLE,
474  '61' => $GERESH,
475 # This is the Unicode Hebrew *accent*; there is also another Hebrew
476 # *punctuation* called GERSHAYIM 0x05F4. I'm using the more
477 # traditional rounded marks, rather than the alternate straight
478 # marks.
479  '62' => $GERSHAYIM,
480 # Also known as azla
481  '63' => $QADMA,
482  '64' => $ILUY,
483  '65' => $SHALSHELET,
484  '80' => $ZAQEF_QATAN,
485  '81' => $REVIA,
486 # Note, this accent is actually sinnorit, but it does not exist as a
487 # separate glyph in the Unicode standard. The 'ZINOR' Unicode accent
488 # is postpositive, while sinnorit is not. ZARQA is as close as I can
489 # get to this.
490  '82' => $ZARQA,
491 # The Unicode form does not match the form used by BHS, but the names
492 # are the same.
493  '83' => $PAZER,
494  '84' => $QARNEY_PARA,
495  '85' => $ZAQEF_GADOL,
496 # Note Michigan encoding distinguishes between medial metheg '35' (occuring
497 # on the left of the vowel), and the ordinary meteg '95' (occuring on the
498 # right of the vowel). It is also used for silluq.
499  '35' => $METAG,
500  '70' => $MAHAPAKH,
501  '71' => $MERKHA,
502  '72' => $MERKHA_KEFULA,
503  '73' => $TIPEHA, # also '13', '73' also is used for majela
504  '74' => $MUNAH,
505  '75' => $METAG, # this is silluq; should appear to the left of the vowel
506  '91' => $TEVIR,
507  '92' => $ETNAHTA,
508  '93' => $YERAH_BEN_YOMO,
509  '94' => $DARGA,
510  '95' => $METAG, # should appear to the right of the vowel
511 
512 # Not used by the Michigan Encoding
513 # $UPPER_DOT = '05C4';
514  );
515 
516 # declare other variables
517  my (@bhsLines,
518  @bhsVerse,
519  @entity_line) = ();
520 
521  my ($i,
522  $verse,
523  $word,
524  $character) = 0;
525 
526  my ($element,
527  $saveGuttural) = "";
528 
529 # read in a line
530  while (<>) {
531 # Process one verse
532 # iterate over every character and change to XML decimal entity
533  CHAR: for ( $i = 0; ($i < scalar(@bhsVerse)); $i++) {
534  # find and convert final kaf, mem, nun, pe, tsade
535  ( # if final form
536  $bhsVerse[$i] =~ /[KMNPC]/
537  )
538  &&
539  (
540  ( # whitespace or
541  $bhsVerse[$i+1] =~ /[ \-?]/
542  )
543  ||
544  ( # EOL or
545  $i == ( scalar(@bhsVerse) - 1 )
546  )
547  ||
548  ( # sof pasuq or
549  ( $bhsVerse[$i+1] =~ /0/ ) &&
550  ( $bhsVerse[$i+2] =~ /0/ )
551  )
552  ||
553  ( # one accent followed by white, eol or
554  (
555  ( $bhsVerse[$i+1] =~ /\d/ ) &&
556  ( $bhsVerse[$i+2] =~ /\d/ )
557  ) &&
558  (
559  ( $bhsVerse[$i+3] =~ /[ \-?]/ ) ||
560  ( $i == ( scalar(@bhsVerse) - 1 ) )
561  )
562  )
563  ||
564  ( # two accents followed by white, eol
565  (
566  ( $bhsVerse[$i+1] =~ /\d/ ) &&
567  ( $bhsVerse[$i+2] =~ /\d/ ) &&
568  ( $bhsVerse[$i+3] =~ /\d/ ) &&
569  ( $bhsVerse[$i+4] =~ /\d/ )
570  ) &&
571  (
572  ( $bhsVerse[$i+5] =~ /[ \-?]/ ) ||
573  ( $i == ( scalar(@bhsVerse) - 1 ) )
574  )
575  )
576  ||
577  ( # followed by a vowel and white, eol, sof pasuq
578  ( $bhsVerse[$i+1] =~ /[:F]/ ) &&
579  ( # followed by
580  ( $bhsVerse[$i+2] =~ /[ \-?]/ ) || # whitespace or
581  ( $i == ( scalar(@bhsVerse) - 1 ) ) || # eol or
582  ( # sof pasuq
583  ( $bhsVerse[$i+2] =~ /0/ ) &&
584  ( $bhsVerse[$i+3] =~ /0/ )
585  )
586  )
587  )
588  ) # end of what follows after final letter
589  &&
590  do {
591  $bhsVerse[$i] =~ /K/ && eval { push @entity_line,$FINAL_KAF; }
592  && next CHAR;
593  $bhsVerse[$i] =~ /M/ && eval { push @entity_line,$FINAL_MEM; }
594  && next CHAR;
595  $bhsVerse[$i] =~ /N/ && eval { push @entity_line,$FINAL_NUN; }
596  && next CHAR;
597  $bhsVerse[$i] =~ /P/ && eval { push @entity_line,$FINAL_PE; }
598  && next CHAR;
599  $bhsVerse[$i] =~ /C/ && eval { push @entity_line,$FINAL_TSADI; }
600  && next CHAR;
601  };
602  # find and convert "furtive patach"
603  ( $bhsVerse[$i] =~ /A/ ) && # If the letter is a patach
604  ( $bhsVerse[$i-1] =~ /[)HX(]/ ) && # and is preceeded by a guttural
605  ( ( $bhsVerse[$i-2] =~ /[AEFOU]/ ) || # and is preceeded by a vowel
606  ( ( $bhsVerse[$i-2] =~ /\./ ) && # or by suruq
607  ( $bhsVerse[$i-3] =~ /W/ ) ) || #
608  ( ( $bhsVerse[$i-2] =~ /W/ ) && # or by holem (written plene)
609  ( $bhsVerse[$i-3] =~ /O/ ) ) || #
610  ( ( $bhsVerse[$i-2] =~ /Y/ ) && # or by hiriq-yod
611  ( $bhsVerse[$i-3] =~ /I/ ) ) ) &&
612  do {
613  $saveGuttural = pop @entity_line; # snip off the gutteral
614  push @entity_line,$PATAH; # push on the patach
615  push @entity_line,$saveGuttural; # push back on the gutteral
616  next CHAR;
617  };
618  # convert cantillation
619  # since we have previously dealt with all other cases of
620  # numbers, two digit patterns are all we have to search for
621  $bhsVerse[$i] =~ /\d/ && $bhsVerse[$i+1] =~ /\d/ && do {
622  push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"};
623  $i++; # accents are two digits long, so advance past the 2nd digit
624  next CHAR;
625  };
626  # convert katef vowels, which are two characters long
627  $bhsVerse[$i] =~ /:/ && $bhsVerse[$i+1] =~ /[AEF]/ && do {
628  push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"};
629  $i++;
630  next CHAR;
631  };
632  # convert everything else
633  push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]"};
634  } # end CHAR
635 # print the line to standard output with XML character-level encoding
636 # each character has the following format:
637 # <c id="1kg1.verse#.word#.character#">&#1234;</c>
638 
639 # set up the verse element
640  $word = 1;
641  $character = 1;
642  print "<verse>\n<word>\n";
643 # print each character element
644 # if there is a space, then close the word entity, open a new word
645 # entity, increment the word number, reset the character number to
646 # zero.
647  foreach $element (@entity_line) {
648  if ( $element =~ " " ) {
649  $word++;
650  $character = 1;
651  print "</word>\n<word>\n";
652  next;
653  }
654  print "<c id=\"1kg1.$verse.$word.$character\">$element</c>\n";
655  $character++;
656  }
657 # close the verse element
658  print "</word></verse>\n";
659 # reinitialize variables
660  @bhsVerse = ();
661  @entity_line = ();
662  @bhsLines = ();
663  } # end while
664 # close the XML document
665  print "</body>\n";
666  */
virtual int getState()
Definition: swinputmeth.cpp:33
void init()
Definition: hebrewmcim.cpp:83
int * translate(char in)
Definition: hebrewmcim.cpp:33
map< int, int > subst2[12]
Definition: hebrewmcim.h:47
map< int, int * > multiChars
Definition: hebrewmcim.h:48
int subst[255]
Definition: hebrewmcim.h:46
virtual void setState(int state)
Definition: swinputmeth.cpp:29