[sword-svn] r262 - trunk/source/data/translit/crosswire

chrislit at crosswire.org chrislit at crosswire.org
Fri Feb 7 00:52:56 MST 2014


Author: chrislit
Date: 2014-02-07 00:52:55 -0700 (Fri, 07 Feb 2014)
New Revision: 262

Modified:
   trunk/source/data/translit/crosswire/Coptic_Latin_Beta.txt
   trunk/source/data/translit/crosswire/Greek_Latin_Beta.txt
Log:
updated, checked, tested Coptic/Beta transliterator
minor tweaks to Greek/Beta transliterator


Modified: trunk/source/data/translit/crosswire/Coptic_Latin_Beta.txt
===================================================================
--- trunk/source/data/translit/crosswire/Coptic_Latin_Beta.txt	2014-02-07 07:06:43 UTC (rev 261)
+++ trunk/source/data/translit/crosswire/Coptic_Latin_Beta.txt	2014-02-07 07:52:55 UTC (rev 262)
@@ -1,6 +1,6 @@
 # ***************************************************************************
 # *
-# *  Copyright (C) 2002-2013, CrossWire Bible Society
+# *  Copyright (C) 2002-2014, CrossWire Bible Society
 # *  All Rights Reserved.
 # *
 # ***************************************************************************
@@ -9,52 +9,60 @@
 
 # Based on Beta code for CCAT/CATSS/TLG:
 # http://ccat.sas.upenn.edu/gopher/text/religion/biblical/0-betacode.txt
+# and:
+# http://www.tlg.uci.edu/encoding/BCM2013.pdf
+
 # Coptic-Latin/Beta
+:: [[:Coptic:][̓ʼ᾽᾿̔ʽ῞´ˊ΄´͂̂\^ˆ῀̀\`ˋ`̈¨ͅιͺ̣··;’‐—]];
 :: NFD (NFC);
-# Uppercase
-Ⲁ<>'*A';
-Ⲃ<>'*B';
-Ⲅ<>'*G';
-Ⲇ<>'*D';
-Ⲉ<>'*E';
-Ⲋ>''; # sou is non-alphabetic, and excluded from Beta
-Ⲍ<>'*Z';
-Ⲏ<>'*H';
-Ⲑ<>'*Q';
-Ⲓ<>'*I';
-Ⲕ<>'*K';
-Ⲗ<>'*L';
-Ⲙ<>'*M';
-Ⲛ<>'*N';
-Ⲝ<>'*C';
-Ⲟ<>'*O';
-Ⲫ<>'*F';
-Ⲯ<>'*Y';
-Ⲡ<>'*P';
-Ⲣ<>'*R';
-Ⲥ<'*J';
-Ⲥ<>'*S';
-Ⲧ<>'*T';
-Ⲩ<>'*U';
-Ⲭ<>'*X';
-Ⲱ<>'*W';
-Ϣ<>'*s';
-Ϥ<>'*f';
-Ϩ<>'*h';
-Ϫ<>'*j';
-Ϭ<>'*g';
-Ϯ<>'*t';
-# Archaic Greek (should not appear in Coptic)
-'Ϝ'<>'*V';
-'Ϟ'<>'*#3';
-'Ϡ'<>'*#5';
-# Lowercase
+
+# Letters
+#  Map archaic letters to close approximates
+|K<\#3;
+|S<\#5;
+
+#  Uppercase
+Ⲁ<>\*A;
+Ⲃ<>\*B;
+Ⲅ<>\*G;
+Ⲇ<>\*D;
+Ⲉ<>\*E;
+Ⲋ<>\*V;
+Ⲍ<>\*Z;
+Ⲏ<>\*H;
+Ⲑ<>\*Q;
+Ⲓ<>\*I;
+Ⲕ<>\*K;
+Ⲗ<>\*L;
+Ⲙ<>\*M;
+Ⲛ<>\*N;
+Ⲝ<>\*C;
+Ⲟ<>\*O;
+Ⲫ<>\*F;
+Ⲯ<>\*Y;
+Ⲡ<>\*P;
+Ⲣ<>\*R;
+Ⲥ<\*J;
+Ⲥ<\*S[123];
+Ⲥ<>\*S;
+Ⲧ<>\*T;
+Ⲩ<>\*U;
+Ⲭ<>\*X;
+Ⲱ<>\*W;
+Ϣ<>\*s;
+Ϥ<>\*f;
+Ϩ<>\*h;
+Ϫ<>\*j;
+Ϭ<>\*g;
+Ϯ<>\*t;
+
+#  Lowercase
 ⲁ<>A;
 ⲃ<>B;
 ⲅ<>G;
 ⲇ<>D;
 ⲉ<>E;
-ⲋ>''; # sou is non-alphabetic, and excluded from Beta
+ⲋ<>V;
 ⲍ<>Z;
 ⲏ<>H;
 ⲑ<>Q;
@@ -70,6 +78,7 @@
 ⲡ<>P;
 ⲣ<>R;
 ⲥ<J;
+ⲥ<S[123];
 ⲥ<>S;
 ⲧ<>T;
 ⲩ<>U;
@@ -81,32 +90,28 @@
 ϫ<>j;
 ϭ<>g;
 ϯ<>t;
-# Archaic Greek (should not appear in Coptic)
-ϝ<>V;
-ϟ<>'#3';
-ϡ<>'#5';
-# Non-letter characters, copied from Greek transform
-ͅ<>'|';
-ͺ>'|';
-̔<>'(';
-ʽ>'(';
-·<>':';
-̓<>')';
-ʼ>')';
-́<>'/';
-´>'/';
-ˊ>'/';
-΄>'/';
-͂<>'=';
-̂>'=';
-'^'>'=';
-ˆ>'=';
-̀<>'\\';
-'`'>'\\';
-ˋ>'\\';
-̈<>'+';
-¨>'+';
-̄<>'_';   # Perseus LSJ uses _ for long vowels
-# Coptic encoded in Beta (as opposed to Greek) is cased. Uppercase S maps to sigma, whereas lowercase s maps to shai. However, the 20 letters below do not have lower case values in Coptic Beta, so we uppercase them here.
+
+# Non-letter characters
+#  Delete Greek accents
+<[\)\(\/\=\\\+\|\?];
+
+#  ignore period & comma
+
+#  middle dot
+·<>\:;
+·>\:;
+#  semicolon (question mark) -- use a semicolon in both scripts
+;>\;;
+#  apostrophe
+’<>\';
+#  hyphen
+‐<>\-;
+—<>\_;
+
+# Coptic encoded in Beta (as opposed to Greek) is cased.
+# E.g., uppercase S maps to sigma, whereas lowercase s maps to shai.
+# However, the 20 letters below do not have lower case values in Coptic Beta, so we uppercase them here.
 :: ([abcdeiklmnopqruvwxyz] upper);
+
 :: NFC (NFD);
+:: ([\*A-Za-z\#1235\)\(\/\=\\\+\|\?\:\;\'\-\_]);

Modified: trunk/source/data/translit/crosswire/Greek_Latin_Beta.txt
===================================================================
--- trunk/source/data/translit/crosswire/Greek_Latin_Beta.txt	2014-02-07 07:06:43 UTC (rev 261)
+++ trunk/source/data/translit/crosswire/Greek_Latin_Beta.txt	2014-02-07 07:52:55 UTC (rev 262)
@@ -129,7 +129,7 @@
 ι>\|;
 ͺ>\|;
 #  dot below
-̣<\?;
+̣<>\?;
 
 #  ignore period & comma
 
@@ -148,4 +148,4 @@
 
 :: ([a-z] upper);
 :: NFC (NFD);
-:: ([\*A-Za-z\#%0-9\)\(\/\=\\\+\|\?\:\;\'\-\_]);
+:: ([\*A-Za-z\#1235\)\(\/\=\\\+\|\?\:\;\'\-\_]);




More information about the sword-cvs mailing list