[ruby-oci8-commit] [309] trunk/ruby-oci8: * lib/oci8.rb.in, lib/oci8/encoding-init.rb: (ruby 1 .9) set

nobody at rubyforge.org nobody at rubyforge.org
Tue Jan 13 09:36:44 EST 2009


Revision: 309
Author:   kubo
Date:     2009-01-13 09:36:43 -0500 (Tue, 13 Jan 2009)

Log Message:
-----------
* lib/oci8.rb.in, lib/oci8/encoding-init.rb: (ruby 1.9) set
    OCI8.encoding by querying NLS_LANG. If DEFAULT_OCI8_ENCODING
    is defined, use it by default.
* lib/oci8/encoding.yml: (ruby 1.9) add a new file containing
    mapping table from Oracle characterset name to Ruby encoding
    name.

Modified Paths:
--------------
    trunk/ruby-oci8/ChangeLog
    trunk/ruby-oci8/lib/oci8.rb.in

Added Paths:
-----------
    trunk/ruby-oci8/lib/oci8/encoding-init.rb
    trunk/ruby-oci8/lib/oci8/encoding.yml

Modified: trunk/ruby-oci8/ChangeLog
===================================================================
--- trunk/ruby-oci8/ChangeLog	2009-01-11 15:11:09 UTC (rev 308)
+++ trunk/ruby-oci8/ChangeLog	2009-01-13 14:36:43 UTC (rev 309)
@@ -1,3 +1,11 @@
+2009-01-13  KUBO Takehiro  <kubo at jiubao.org>
+	* lib/oci8.rb.in, lib/oci8/encoding-init.rb: (ruby 1.9) set
+	    OCI8.encoding by querying NLS_LANG. If DEFAULT_OCI8_ENCODING
+	    is defined, use it by default.
+	* lib/oci8/encoding.yml: (ruby 1.9) add a new file containing
+	    mapping table from Oracle characterset name to Ruby encoding
+	    name.
+
 2009-01-11  KUBO Takehiro  <kubo at jiubao.org>
 	* ext/oci8/encoding.c: (ruby 1.9) add OCI8.encoding and
 	    OCI8.encoding= to get and set the encoding of the Oracle

Added: trunk/ruby-oci8/lib/oci8/encoding-init.rb
===================================================================
--- trunk/ruby-oci8/lib/oci8/encoding-init.rb	                        (rev 0)
+++ trunk/ruby-oci8/lib/oci8/encoding-init.rb	2009-01-13 14:36:43 UTC (rev 309)
@@ -0,0 +1,31 @@
+#
+# setup default OCI encoding from NLS_LANG.
+#
+
+# try to get NLS_LANG.
+nls_lang = ENV['NLS_LANG']
+
+if nls_lang.nil? and RUBY_PLATFORM =~ /mswin32|cygwin|mingw32|bccwin32/
+  # TODO
+end
+
+if nls_lang
+  # Extract character set name from NLS_LANG.
+  if nls_lang =~ /\.([[:alnum:]]+)$/
+    charset = $1.upcase
+  else
+    raise "Invalid NLS_LANG format: #{nls_lang}"
+  end
+
+  # Convert the Oracle character set name to Ruby encoding name by
+  # querying the yaml data.
+  require 'yaml'
+  enc = YAML::load_file(File.dirname(__FILE__) + '/encoding.yml')[charset]
+  if enc.nil?
+    raise "Ruby encoding name is not found in encoding.yml for NLS_LANG #{nls_lang}."
+  end
+else
+  warn "Warning: NLS_LANG is not set. fallback to US-ASCII."
+  enc = 'US-ASCII'
+end
+OCI8.encoding = enc

Added: trunk/ruby-oci8/lib/oci8/encoding.yml
===================================================================
--- trunk/ruby-oci8/lib/oci8/encoding.yml	                        (rev 0)
+++ trunk/ruby-oci8/lib/oci8/encoding.yml	2009-01-13 14:36:43 UTC (rev 309)
@@ -0,0 +1,537 @@
+#
+# Mapping table from Oracle charset name to Ruby encoding name
+#
+
+############################################################
+# Oracle Database Globalization Support Guide 11g Release 1 (11.1)
+#
+# Table A-4 Recommended ASCII Database Character Sets
+
+#
+# Asian
+#
+
+# EUC 24-bit Japanese
+JA16EUC:           EUC-JP
+
+# The same as JA16EUC except for the way that the wave dash and the
+# tilde are mapped to and from Unicode.
+JA16EUCTILDE:      CP51932      # or eucJP-ms
+
+# Shift-JIS 16-bit Japanese
+JA16SJIS:          Shift_JIS
+
+# The same as JA16SJIS except for the way that the wave dash and the
+# tilde are mapped to and from Unicode.
+JA16SJISTILDE:     Windows-31J
+
+# MS Windows Code Page 949 Korean
+KO16MSWIN949:      CP949
+
+# Thai Industrial Standard 620-2533 - ASCII 8-bit
+TH8TISASCII:       Windows-874  # or TIS-620
+
+# MS Windows Code Page 1258 8-bit Vietnamese
+VN8MSWIN1258:      Windows-1258
+
+# GBK 16-bit Simplified Chinese
+ZHS16GBK:          GBK
+
+# MS Windows Code Page 950 with Hong Kong Supplementary Character
+# Set HKSCS-2001 (character set conversion to and from Unicode is
+# based on Unicode 3.0)
+ZHT16HKSCS:        Big5         # Does the Big5 include HKSCS?
+
+# MS Windows Code Page 950 Traditional Chinese
+ZHT16MSWIN950:     Big5
+
+# EUC 32-bit Traditional Chinese
+ZHT32EUC:          EUC-TW       # Who use this?
+
+#
+# European
+#
+
+# ISO 8859-13 Baltic
+BLT8ISO8859P13:    ISO-8859-13
+
+# MS Windows Code Page 1257 8-bit Baltic
+BLT8MSWIN1257:     Windows-1257
+
+# ISO 8859-5 Latin/Cyrillic
+CL8ISO8859P5:      ISO-8859-5
+
+# MS Windows Code Page 1251 8-bit Latin/Cyrillic
+CL8MSWIN1251:      Windows-1251
+
+# ISO 8859-2 East European
+EE8ISO8859P2:      ISO-8859-2
+
+# ISO 8859-7 Latin/Greek
+EL8ISO8859P7:      ISO-8859-7
+
+# MS Windows Code Page 1253 8-bit Latin/Greek
+EL8MSWIN1253:      Windows-1253
+
+# MS Windows Code Page 1250 8-bit East European
+EE8MSWIN1250:      Windows-1250
+
+# ISO 8859-10 North European
+NE8ISO8859P10:     ISO-8859-10
+
+# ISO 8859-4 North and North-East European
+NEE8ISO8859P4:     ISO-8859-4
+
+# ISO 8859-15 West European
+WE8ISO8859P15:     ISO-8859-15
+
+# MS Windows Code Page 1252 8-bit West European
+WE8MSWIN1252:      Windows-1252
+
+#
+# Middle Eastern
+#
+
+# ISO 8859-6 Latin/Arabic
+AR8ISO8859P6:      ISO-8859-6
+
+# MS Windows Code Page 1256 8-Bit Latin/Arabic
+AR8MSWIN1256:      Windows-1256
+
+# ISO 8859-8 Latin/Hebrew
+IW8ISO8859P8:      ISO-8859-8
+
+# MS Windows Code Page 1255 8-bit Latin/Hebrew
+IW8MSWIN1255:      Windows-1255
+
+# MS Windows Code Page 1254 8-bit Turkish
+TR8MSWIN1254:      Windows-1254
+
+# ISO 8859-9 West European & Turkish
+WE8ISO8859P9:      ISO-8859-9
+
+#
+# Universal
+#
+
+# Unicode 5.0 UTF-8 Universal character set
+AL32UTF8:          UTF-8
+
+#
+############################################################
+
+
+############################################################
+# Oracle Database Globalization Support Guide 11g Release 1 (11.1)
+#
+# Table A-6 Other ASCII-based Database Character Sets
+
+#
+# Asian
+#
+
+# Bangladesh National Code 8-bit BSCII
+BN8BSCII:          nil          # FIXME
+
+# Multiple-Script Indian Standard 8-bit Latin/Indian Languages
+IN8ISCII:          nil          # FIXME
+
+# JVMS 16-bit Japanese
+JA16VMS:           nil          # FIXME
+
+# KSC5601 16-bit Korean
+KO16KSC5601:       EUC-KR # This should be 'CP949'?
+
+# KSCCS 16-bit Korean
+KO16KSCCS:         nil          # FIXME
+
+# Mac Server 8-bit Latin/Thai
+TH8MACTHAIS:       macThai
+
+# VN3 8-bit Vietnamese
+VN8VN3:            nil          # FIXME
+
+# CGB2312-80 16-bit Simplified Chinese
+ZHS16CGB231280:    GB2312
+
+# BIG5 16-bit Traditional Chinese
+ZHT16BIG5:         Big5
+
+# HP CCDC 16-bit Traditional Chinese
+ZHT16CCDC:         nil          # FIXME
+
+# Taiwan Taxation 16-bit Traditional Chinese
+ZHT16DBT:          nil          # FIXME
+
+# MS Windows Code Page 950 with Hong Kong Supplementary Character
+# Set HKSCS-2001 (character set conversion to and from Unicode is
+# based on Unicode 3.1)
+ZHT16HKSCS31:      Big5         # Does the Big5 include HKSCS?
+
+# SOPS 32-bit Traditional Chinese
+ZHT32SOPS:         nil          # FIXME
+
+# TRIS 32-bit Traditional Chinese
+ZHT32TRIS:         nil          # FIXME
+
+#
+# Middle Eastern
+#
+
+# Arabic MS-DOS 710 Server 8-bit Latin/Arabic
+AR8ADOS710:        nil          # FIXME
+
+# Arabic MS-DOS 720 Server 8-bit Latin/Arabic
+AR8ADOS720:        nil          # FIXME
+
+# APTEC 715 Server 8-bit Latin/Arabic
+AR8APTEC715:       nil          # FIXME
+
+# ASMO Extended 708 8-bit Latin/Arabic
+AR8ASMO8X:         nil          # FIXME
+
+# Mussa'd Alarabi/2 768 Server 8-bit Latin/Arabic
+AR8MUSSAD768:      nil          # FIXME
+
+# Nafitha Enhanced 711 Server 8-bit Latin/Arabic
+AR8NAFITHA711:     nil          # FIXME
+
+# Nafitha International 721 Server 8-bit Latin/Arabic
+AR8NAFITHA721:     nil          # FIXME
+
+# SAKHR 706 Server 8-bit Latin/Arabic
+AR8SAKHR706:       nil          # FIXME
+
+# SAKHR 707 Server 8-bit Latin/Arabic
+AR8SAKHR707:       nil          # FIXME
+
+# ISO 8859-9 Latin Azerbaijani
+AZ8ISO8859PE:      nil          # What is this different with WE8ISO8859P9?
+
+# Multiple-Script Indian Standard 8-bit Latin/Indian Languages
+IN8ISCII:          nil          # FIXME
+
+# Mac Client 8-bit Hebrew
+IW8MACHEBREW:      nil          # FIXME
+
+# IBM-PC Code Page 1507/862 8-bit Latin/Hebrew
+IW8PC1507:         IBM862
+
+# ISO 6937 8-bit Coded Character Set for Text Communication
+LA8ISO6937:        nil          # FIXME
+
+# DEC 8-bit Turkish
+TR8DEC:            nil          # FIXME
+
+# IBM-PC Code Page 857 8-bit Turkish
+TR8PC857:          IBM857
+
+#
+#European
+#
+
+# Mac Client 8-bit Latin/Arabic
+AR8ARABICMAC:      nil          # FIXME
+
+# Mac Server 8-bit Latin/Arabic
+AR8ARABICMACS:     nil          # FIXME
+
+# MS Windows 8-bit Bulgarian Cyrillic
+BG8MSWIN:          nil          # FIXME
+
+# IBM-PC Code Page 437 8-bit (Bulgarian Modification)
+BG8PC437S:         nil          # What is differnt with US8PC437?
+
+# Latvian Standard LVS8-92(1) Windows/Unix 8-bit Baltic
+BLT8CP921:         nil          # FIXME
+
+#IBM-PC Code Page 775 8-bit Baltic
+BLT8PC775:         IBM775
+
+# IBM-PC Code Page 863 8-bit Canadian French
+CDN8PC863:         IBM863
+
+# ISO 8859-14 Celtic
+CEL8ISO8859P14:    ISO-8859-14
+
+# ISOIR111 Cyrillic
+CL8ISOIR111:       nil          # FIXME
+
+# RELCOM Internet Standard 8-bit Latin/Cyrillic
+CL8KOI8R:          KOI8-R
+
+# KOI8 Ukrainian Cyrillic
+CL8KOI8U:          KOI8-U
+
+# Mac Server 8-bit Latin/Cyrillic
+CL8MACCYRILLICS:   macCyrillic
+
+# Mac Server 8-bit Central European
+EE8MACCES:         macCentEuro
+
+# Mac Server 8-bit Croatian
+EE8MACCROATIANS:   macCroatian
+
+# IBM-PC Code Page 852 8-bit East European
+EE8PC852:          IBM852       # or 'CP852'
+
+# DEC 8-bit Latin/Greek
+EL8DEC:            nil          # FIXME
+
+# Mac Server 8-bit Greek
+EL8MACGREEKS:      macGreek
+
+# IBM-PC Code Page 437 8-bit (Greek modification)
+EL8PC437S:         nil          # What is differnt with US8PC437?
+
+# IBM-PC Code Page 851 8-bit Greek/Latin
+EL8PC851:          nil          # FIXME
+
+# IBM-PC Code Page 869 8-bit Greek/Latin
+EL8PC869:          IBM869
+
+# MS Windows Code Page 923 8-bit Estonian
+ET8MSWIN923:       nil          # FIXME
+
+# Hungarian 8-bit Special AB Mod
+HU8ABMOD:          nil          # FIXME
+
+# Hungarian 8-bit CWI-2
+HU8CWI2:           nil          # FIXME
+
+# IBM-PC Code Page 861 8-bit Icelandic
+IS8PC861:          IBM861
+
+# German Government Printer 8-bit All-European Latin
+LA8PASSPORT:       nil          # FIXME
+
+# MS Windows Code Page 921 8-bit Lithuanian
+LT8MSWIN921:       nil          # FIXME
+
+# IBM-PC Code Page 772 8-bit Lithuanian (Latin/Cyrillic)
+LT8PC772:          nil          # FIXME
+
+# IBM-PC Code Page 774 8-bit Lithuanian (Latin)
+LT8PC774:          nil          # FIXME
+
+#Latvian Version IBM-PC Code Page 866 8-bit Latin/Cyrillic
+LV8PC8LR:          nil         # What is it differnt with RU8PC866?
+
+# IBM-PC Code Page 1117 8-bit Latvian
+LV8PC1117:         nil          # FIXME
+
+# IBM-PC Alternative Code Page 8-bit Latvian (Latin/Cyrillic)
+LV8RST104090:      nil          # FIXME
+
+# IBM-PC Code Page 865 8-bit Norwegian
+N8PC865:           IBM865
+
+# BESTA 8-bit Latin/Cyrillic
+RU8BESTA:          nil          # FIXME
+
+# IBM-PC Code Page 855 8-bit Latin/Cyrillic
+RU8PC855:          IBM855 # or 'CP855'
+
+# IBM-PC Code Page 866 8-bit Latin/Cyrillic
+RU8PC866:          IBM866
+
+# ISO 8859-3 South European
+SE8ISO8859P3:      ISO-8859-3
+
+# Mac Client 8-bit Turkish
+TR8MACTURKISH:     macTurkish
+
+# Mac Server 8-bit Turkish
+TR8MACTURKISHS:    macTurkish
+
+# ASCII 7-bit American
+US7ASCII:          US-ASCII
+
+# IBM-PC Code Page 437 8-bit American
+US8PC437:          IBM437
+
+# DEC 8-bit West European
+WE8DEC:            nil          # FIXME
+
+# DG 8-bit West European
+WE8DG:             nil          # FIXME
+
+# ISO 8859-1 West European
+WE8ISO8859P1:      ISO-8859-1
+
+#Mac Server 8-bit Extended Roman8 West European
+WE8MACROMAN8S:     macRoman
+
+# NCR 4970 8-bit West European
+WE8NCR4970:        nil          # FIXME
+
+# NeXTSTEP PostScript 8-bit West European
+WE8NEXTSTEP:       nil          # FIXME
+
+# IBM-PC Code Page 850 8-bit West European
+WE8PC850:          CP850
+
+# IBM-PC Code Page 858 8-bit West European
+WE8PC858:          nil          # FIXME
+
+# IBM-PC Code Page 860 8-bit West European
+WE8PC860:          IBM860
+
+# HP Roman8 8-bit West European
+WE8ROMAN8:         nil          # FIXME
+
+#
+# Universal
+#
+
+# Unicode 5.0 UTF-8 Universal character set, CESU-8 compliant
+UTF8:              UTF-8
+
+#
+############################################################
+
+############################################################
+# Oracle Database Globalization Support Guide 11g Release 1 (11.1)
+#
+# Table A-9 Client-Only Character Sets
+
+#
+# Asian
+#
+
+# EUC 24-bit Japanese with '\' mapped to the Japanese yen character
+JA16EUCYEN:        nil          # FIXME
+
+# Mac client Shift-JIS 16-bit Japanese
+JA16MACSJIS:       MacJapanese
+
+# Shift-JIS 16-bit Japanese with '\' mapped to the Japanese yen character
+JA16SJISYEN:       nil          # FIXME
+
+# Mac Client 8-bit Latin/Thai
+TH8MACTHAI:        macThai
+
+# GB18030-2000
+ZHS32GB18030:      GB18030
+
+# Mac client CGB2312-80 16-bit Simplified Chinese
+ZHS16MACCGB231280: nil          # FIXME
+
+#
+# European
+#
+
+# DEC VT100 7-bit Swiss (German/French)
+CH7DEC:            nil          # FIXME
+
+# Mac Client 8-bit Latin/Cyrillic
+CL8MACCYRILLIC:    macCyrillic
+
+# Siemens 97801/97808 7-bit German
+D7SIEMENS9780X:    nil          # FIXME
+
+# DEC VT100 7-bit German
+D7DEC:             nil          # FIXME
+
+# EEC Targon 35 ASCI West European/Greek
+EEC8EUROASCI:      nil          # FIXME
+
+# EEC EUROPA3 8-bit West European/Greek
+EEC8EUROPA3:       nil          # FIXME
+
+# Mac Client 8-bit Croatian
+EE8MACCROATIAN:    macCroatian
+
+# Mac Client 8-bit Central European
+EE8MACCE:          macCentEuro
+
+# IBM-PC Code Page 737 8-bit Greek/Latin
+EL8PC737:          IBM737
+
+# Mac Client 8-bit Greek
+EL8MACGREEK:       macGreek
+
+# DEC VT100 7-bit Spanish
+E7DEC:             nil          # FIXME
+
+# Siemens 97801/97808 7-bit Spanish
+E7SIEMENS9780X:    nil          # FIXME
+
+# DEC VT100 7-bit French
+F7DEC:             nil          # FIXME
+
+# Siemens 97801/97808 7-bit French
+F7SIEMENS9780X:    nil          # FIXME
+
+# DEC VT100 7-bit Italian
+I7DEC:             nil          # FIXME
+
+# Siemens 97801/97808 7-bit Italian
+I7SIEMENS9780X:    nil          # FIXME
+
+# Mac Server 8-bit Icelandic
+IS8MACICELANDICS:  macIceland
+
+# Mac Client 8-bit Icelandic
+IS8MACICELANDIC:   macIceland
+
+# DEC VT100 7-bit Dutch
+NL7DEC:            nil          # FIXME
+
+# DEC VT100 7-bit Norwegian/Danish
+NDK7DEC:           nil          # FIXME
+
+# Siemens 97801/97808 7-bit Norwegian
+N7SIEMENS9780X:    nil          # FIXME
+
+# DEC VT100 7-bit Finnish
+SF7DEC:            nil          # FIXME
+
+# Siemens 97801/97808 7-bit Swedish
+S7SIEMENS9780X:    nil          # FIXME
+
+# DEC VT100 7-bit Swedish
+S7DEC:             nil          # FIXME
+
+# ASCII 7-bit Finnish
+SF7ASCII:          nil          # FIXME
+
+# DEC VT100 7-bit Turkish
+TR7DEC:            nil          # FIXME
+
+# ICL special version ISO8859-1
+WE8ISOICLUK:       nil          # What is this different with ISO-8859-1?
+
+# Mac Client 8-bit Extended Roman8 West European
+WE8MACROMAN8:      macRoman
+
+# HP LaserJet 8-bit West European
+WE8HP:             nil          # FIXME
+
+# ASCII 7-bit Yugoslavian
+YUG7ASCII:         nil          # FIXME
+
+#
+# Middle Eastern
+#
+
+# Israeli Standard 960 7-bit Latin/Hebrew
+IW7IS960:          nil          # FIXME
+
+#
+############################################################
+
+
+############################################################
+# Oracle Database Globalization Support Guide 11g Release 1 (11.1)
+#
+# Table A-10 Universal Character Sets
+
+# Unicode 5.0 UTF-16 Universal character set
+AL16UTF16:         UTF-16BE
+
+#
+############################################################
+
+# Other
+AL16UTF16LE:       UTF-16LE

Modified: trunk/ruby-oci8/lib/oci8.rb.in
===================================================================
--- trunk/ruby-oci8/lib/oci8.rb.in	2009-01-11 15:11:09 UTC (rev 308)
+++ trunk/ruby-oci8/lib/oci8.rb.in	2009-01-13 14:36:43 UTC (rev 309)
@@ -19,6 +19,15 @@
 end
 
 require 'oci8lib'
+
+if OCI8.respond_to? :encoding
+  if defined? DEFAULT_OCI8_ENCODING
+    OCI8.encoding = DEFAULT_OCI8_ENCODING
+  else
+    load 'oci8/encoding-init.rb'
+  end
+end
+
 require 'oci8/oracle_version.rb'
 
 class OCI8




More information about the ruby-oci8-commit mailing list