Смекни!
smekni.com

Перекодировка текстовых файлов (стр. 3 из 3)

Sub FindCP(stroky() As String, msg1 As String, msg2 As String, index As Integer)

Dim s As Integer, z As Integer

Dim symb As String * 1

Dim kod As Byte

Dim scp(7) As codepage

Dim ks As String, ks1 As String

Dim ks2 As String, ne As String

ks = "Ваш текст предположительно имеет кодировку "

ne = "не "

ks1 = "Требуется "

ks2 = "Перекодировка "

For s = 0 To UBound(stroky)

For z = 1 To Len(stroky(s))

symb = Mid(stroky(s), z, 1)

kod = Asc(symb)

If cp1(kod) Then scp(0).vol = scp(0).vol + 1: scp(0).name = "КОИ-8R"

If cp2(kod) Then scp(1).vol = scp(1).vol + 1: scp(1).name = "Cp1251"

If cp3(kod) Then scp(2).vol = scp(2).vol + 1: scp(2).name = "OEM"

If cp4(kod) Then scp(3).vol = scp(3).vol + 1: scp(3).name = "Cp866"

If cp5(kod) Then scp(4).vol = scp(4).vol + 1: scp(4).name = "Mac"

If cp6(kod) Then scp(5).vol = scp(5).vol + 1: scp(5).name = "ISO"

If cp71(symb) Then scp(6).vol = scp(6).vol + 1: scp(6).name = "Unicode"

Next z

Next s

z = 0

For s = 0 To 6

If scp(s).vol >= z Then

z = scp(s).vol: index = s

EndIf

Nexts

'При совпадении счетчиков "КОИ-8R" и "cp1251" кодировка текста определяется как "cp1251"

If ((scp(0).vol = scp(1).vol) And index <= 1) Then index = 1

If index = 1 Then

msg1 = ks & scp(index).name

msg2 = ks2 & ne & LCase(ks1)

Else:

msg1 = ks & scp(index).name

msg2 = ks1 & LCase(ks2)

End If

EndSub

Модуль 4

Процедура выбора варианта перекодировки (КОИ-8R, 1251, OEM, 866, MAC, Unicode)

Sub Decoder(Fmas() As String, IndxCP As Integer, r As Integer, Smas() As String)

Dim i As Integer

Dim n As Integer

Dim Stroka As String

Dim OutStr As String

Dim smb As String

Dim code As Byte

If IndxCP = 1 Then Exit Sub 'если кодировка cp1251, то выход из процедуры без перекодирования

If IndxCP = 6 Then

Call DecUnicodeTo1251(Fmas, Smas)

Exit Sub

End If

ReDim Smas(r - 1)

For i = 0 To r - 1

Stroka = Fmas(i)

OutStr = ""

For n = 1 To Len(Stroka)

smb = Mid(Stroka, n, 1)

code = Asc(smb)

Select Case IndxCP

Case 0

OutStr = OutStr & Chr(cpKoiTo1251(code))

Case 2

OutStr = OutStr & Chr(cpOEMTo1251(code))

Case 3

OutStr = OutStr & Chr(cp866To1251(code))

Case 4

OutStr = OutStr & Chr(cpMACTo1251(code))

Case 5

OutStr = OutStr & Chr(cpISOTo1251(code))

End Select

Next n

Smas(i) = OutStr

Next i

End Sub

Модуль5

Проверка необходимости преобразования строк в записи пользовательского типа

Sub ConvertToRecord(sk() As String, k As Integer, str As shapka, mas() As param, hp As Integer)

Dim i As Integer

Dim str1 As String

Dim str2 As param

For i = 1 To k - 1

str1 = sk(i)

If i = 1 Then

Call sep(str1, str, hp)

Else:

If k > 1 Then

Call seps(str1, str2, hp)

ReDim Preserve mas(i - 2)

mas(i - 2) = str2

End If

End If

Next i

End Sub

Модуль 6

Первый этап сортировки строк (создание вспомогательного массива)

Sub sort(volVector() As param, intMesto() As Integer, h As Integer)

Dim i As Integer, j As Integer, kl As Integer

Dim highIndex As Integer, lj As Integer

Dim voltemp As Single

Dim flag() As Boolean

h = UBound(volVector)

ReDim intMesto(h)

highIndex = UBound(volVector)

ReDim flag(highIndex)

For i = 0 To highIndex

flag(i) = True

Next i

For i = 0 To highIndex

voltemp = 99999

For j = 0 To highIndex

If flag(j) Then

If volVector(j).vol(1) <= voltemp Then 'если volvector(j) будет меньше или равно voltemp,

'то значение текущего минимума voltemp, будет

'заменено на элемент volvector(j)

voltemp = volVector(j).vol(1)

kl = j

End If

End If

Next j

intMesto(i) = kl

flag(kl) = False

Next i

End Sub

Модуль7

Вывод результата на рабочий лист Excel и сохранение в файл

Sub OutputData(name As String, sk() As String, mm() As Integer, h As Integer, hp As Integer, nf2 As Integer, str As String, mas() As param)

Dim i As Integer, q As Integer

Open name For Output As nf2

Print #nf2, sk(0)

Print #nf2, sk(1)

Cells(1, 1) = sk(0)

For i = 0 To hp

Cells(2, i + 1) = str(i)

Next i

For q = 0 To h

Cells(q + 3, 1) = mas(mm(q)).prop

For i = 0 To hp - 1

Cells(q + 3, i + 2) = mas(mm(q)).vol(i)

Next i

Print #nf2, sk(mm(q) + 2)

Next q

Close #nf2

End Sub

Модуль 8

Процедура обработки текста кодированного в cpUnicode для перекодировки в cp1251

Sub DecUnicodeTo1251(TextUnicode() As String, Text1251() As String)

Dim i As Integer

Dim n As Integer

Dim fstr As String

Dim smb1 As String * 1

Dim smb2 As String * 1

Dim code1 As Byte

Dim code2 As Byte

Dim OutStr As String

'В тексте кодированном в cpUnicode в начале добавляется два символа "ю" и "я"

'Поэтому их надо удалить

fstr = Right(TextUnicode(0), Len(TextUnicode(0)) - 2) 'удаление символов "ю" и "я"

TextUnicode(0) = fstr

For i = 0 To UBound(TextUnicode)

OutStr = ""

For n = 1 To Len(TextUnicode(i))

smb1 = Mid(TextUnicode(i), n, 1)

code1 = Asc(smb1)

smb2 = Mid(TextUnicode(i), n + 1, 1)

code2 = Asc(smb2)

'Проверка по двум байтам:

'Если второй байт равен 4, то первый байт Unicode перекодируется в cp1251

If (code1 <> 4 And code2 = 4) Then OutStr = OutStr & Chr(cpUnicodeTo1251(code1))

'Если первый байт не равен 4, то символ ASCII, и не требует перекодировки

If (code1 <> 4 And code2 <> 4) Then OutStr = OutStr & Chr(code1)

Next n

ReDim Preserve Text1251(i)

Text1251(i) = OutStr

Next i

End Sub

Модуль9

Диапазоныкодовкодировок(КОИ-8R, 1251, OEM, 866, MAC, Unicode)

'Кодовая таблица КОИ-8R

Function cp1(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim e As Boolean, d As Boolean

Const x1 = 163, X2 = 179

Const x4 = 195, X5 = 255

a = x1 = kod: b = X2 = kod

d = x4 <= kod: e = kod <= X5

cp1 = (a) Or (b) Or (d And e)

End Function

'Кодовая таблица Cp1251

Function cp2(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Const x1 = 168, X2 = 184

Const x3 = 195, x4 = 255

a = x1 = kod: b = kod = X2

c = x3 <= kod: d = kod <= x4

cp2 = (a) Or (b) Or (c And d)

End Function

'Кодовая таблица OEM

Function cp3(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Dim a1 As Boolean, b1 As Boolean

Dim c1 As Boolean, d1 As Boolean

Dim a2 As Boolean, b2 As Boolean

Dim c2 As Boolean, d2 As Boolean

Dim a3 As Boolean, b3 As Boolean

Dim c3 As Boolean, d3 As Boolean

Dim a4 As Boolean, b4 As Boolean

Dim c4 As Boolean, d4 As Boolean

Const x1 = 132, X2 = 133

Const x3 = 156, x4 = 159

Const X5 = 160, X6 = 173

Const X7 = 181, X8 = 184

Const X9 = 189, X10 = 190

Const X11 = 198, X12 = 199

Const X13 = 208, X14 = 216

Const X15 = 221, X16 = 222

Const X17 = 224, X18 = 238

Const X19 = 225, X20 = 252

a = x1 <= kod: b = kod <= X2: c = x3 <= kod: d = kod <= x4

a1 = X5 <= kod: b1 = kod <= X6: c1 = X7 <= kod: d1 = kod <= X8

a2 = X9 <= kod: b2 = kod <= X10: c2 = X11 <= kod: d2 = kod <= X12

a3 = X13 <= kod: b3 = kod <= X14: c3 = X15 <= kod: d3 = kod <= X16

a4 = X17 <= kod: b4 = kod <= X18: c4 = X19 <= kod: d4 = kod <= X20

cp3 = (a And b) Or (c And d) Or (a1 And b1) Or (c1 And d1) Or (a2 And b2) Or (c2 And d2) Or (a3 And b3) Or (c3 And d3) Or (a4 And b4) Or (c4 And d4)

End Function

'Кодовая таблица Cp866

Function cp4(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Const x1 = 128, X2 = 175

Const x3 = 224, x4 = 241

a = x1 <= kod: b = kod <= X2

c = x3 <= kod: d = kod <= x4

cp4 = (a And b) Or (c And d)

End Function

'Кодовая таблица Mac

Function cp5(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Const x1 = 128, X2 = 159

Const x3 = 221, x4 = 254

a = x1 <= kod: b = kod <= X2

c = x3 <= kod: d = kod <= x4

cp5 = (a And b) Or (c And d)

End Function

'Кодовая таблица ISO

Function cp6(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Const x1 = 160, X2 = 240

Const x3 = 176, x4 = 238

a = x1 = kod: b = kod = X2

c = x3 <= kod: d = kod <= x4

cp6 = (a And b) Or (c And d)

End Function

'Кодовая таблица Unicode (младшие разряды)

Function cp7(kod As Byte) As Boolean

Dim a As Boolean, b As Boolean

Dim c As Boolean, d As Boolean

Const x1 = 1, X2 = 81

Const x3 = 16, x4 = 79

a = x1 = kod: b = kod = X2

c = x3 <= kod: d = kod <= x4

cp7 = a Or b Or (c And d)

End Function

'Продолжение Unicode (старшие разряды(04))

Function cp71(symb As String) As Boolean

Dim k As Byte

Dim a As Boolean

Const x1 = 4

k = AscB(symb)

a = x1 = k

cp71 = a

End Function

Модуль 10

Описание пользовательских типов данных

Typeparam

prop As String

vol(7) As Single

End Type

Type codepage

name As String

vol As Integer

End Type

Модуль11

Процедура разбивки строки на слова с последующей записью в массив

Sub sep(str As String, par() As String, howpar As Integer)

Dim p As Integer, q As Integer, r As Integer

Dim dlina As Integer

Dim sp As String

Dim slovo As String

Dim HT As String * 1

HT = Chr(9) '09-код символа "горизонтальная табуляция"

str = str & HT

dlina = Len(str)

p = 1: q = 0

Do While p < dlina

r = InStr(p, str, HT)

slovo = Mid(str, p, r - p)

ReDim Preserve par(q)

par(q) = slovo

q = q + 1

p = r + 1

Loop

howpar = q

EndSub

Модуль 12

Процедура преобразования строки в запись(элементы записи могут быть типа String и Single)

Sub seps(str As String, par As param, howpar As Integer)

Dim p As Integer, q As Integer, r As Integer

Dim dlina As Integer

Dim sp As String, smb As String

Dim HT As String * 1

HT = Chr(9)

dlina = Len(str)

If dlina = 0 Then

Exit Sub

End If

r = InStr(str, HT)

par.prop = Left(str, r - 1)

sp = Right(str, dlina - r) & HT

dlina = dlina - r + 1

p = 1: q = 0

Do While p < dlina

r = InStr(p, sp, HT)

smb = Mid(sp, p, r - p)

If smb = "-" Then

par.vol(q) = 0

Else:

par.vol(q) = CSng(smb)

End If

q = q + 1

p = r + 1

Loop

howpar = q

End Sub

Модуль13

Перекодирование кодов символов из исходной кодировки в заданную 1251

'Перекодирование кода символа из cpКОИ-8R в cp1251

Function cpKoiTo1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 225 To 226c = code - 33Case 228 To 229c = code - 32Case 233 To 240c = code - 33Case 242 To 245c = code - 34Case 193 To 194c = code + 31Case 196 To 197c = code + 32Case 201 To 208c = code + 31Case 210 To 213c = code + 30Case 253c = 217Case 255c = 218Case 249c = 219 Case 247c = 194Case 231c = 195Case 179c = 168Case 246c = 198Case 250c = 199Case 230c = 212Case 232c = 213Case 227c = 214Case 254c = 215Case 251c = 216Case 224c = 222 Case 163c = 184Case 214c = 230Case 218c = 231Case 198c = 244Case 200c = 245Case 195c = 246Case 222c = 247Case 219c = 248Case 221c = 249Case 223c = 250Case 252c = 221 Case 242c = 223Case 215c = 226Case 199c = 227 Case 209c = 255Case 217c = 251Case 216c = 252Case 220c = 253Case 192c = 254Case 248c = 220

End Select

cpKoiTo1251 = c

End Function

'перекодирование кода символа из cpOEM в cp1251

Function cpOEMTo1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 161c = 192Case 163c = 193Case 236c = 194Case 173c = 195Case 167c = 196Case 169c = 197Case 133c = 168Case 234c = 198Case 244c = 199Case 184c = 200Case 190c = 201Case 199c = 202Case 209c = 203Case 211c = 204Case 213c = 205Case 215c = 206Case 221c = 207 Case 229c = 242Case 231c = 243Case 170c = 244Case 181c = 245Case 164c = 246Case 251c = 247Case 245c = 248Case 249c = 249Case 237c = 250Case 241c = 251Case 158c = 252Case 247c = 253Case 150c = 254Case 222c = 255Case 232c = 211Case 171c = 212 Case 226c = 208 Case 168c = 229Case 132c = 184Case 233c = 230Case 243c = 231Case 183c = 232Case 189c = 233Case 198c = 234Case 208c = 235Case 210c = 236Case 212c = 237Case 214c = 238Case 216c = 239Case 225c = 240Case 227c = 241Case 228c = 209Case 230c = 210Case 166c = 228 Case 182c = 213Case 165c = 214Case 152c = 215Case 246c = 216Case 250c = 217Case 238c = 218Case 242c = 219Case 159c = 220Case 248c = 221Case 157c = 222Case 224c = 223Case 160c = 224Case 162c = 225Case 235c = 226Case 172c = 227

End Select

cpOEMTo1251 = c

End Function

'перекодирование кода символа из cp866 в cp1251

Function cp866To1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 128 To 175

c = code + 64

Case 224 To 239

c = code + 16

Case 240

c = 168

Case 241

c = 184

End Select

cp866To1251 = c

End Function

'перекодирование кода символа из Unicode в cp1251

Function cpUnicodeTo1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 16 To 79

c = code + 176

Case 1

c = 168

Case 81

c = 184

End Select

cpUnicodeTo1251 = c

End Function

'перекодирование кода символа из cpMAC в cp1251

Function cpMACTo1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 128 To 159

c = code + 64

Case 224 To 254

c = code

Case 221

c = 168

Case 222

c = 184

Case 223

c = 255

End Select

cpMACTo1251 = c

End Function

'перекодирование кода символа из cpISO в cp1251

Function cpISOTo1251(code As Byte) As Byte

Dim c As Byte

c = code

Select Case code

Case 176 To 239

c = code + 16

Case 160

c = 168

Case 240

c = 184

End Select

cpISOTo1251 = c

End Function

Литература

· Стеценко А.А. Структуры и алгоритмы обработки данных – Методические указания к практическим и лабораторным занятиям.: Чебоксары 2009.

· Стеценко А.А. Структуры и типы данных – учебное пособие.: Чебоксары 2009.

· Электронный учебник по VBA. Режим доступа: http://www.mini-soft.ru/soft/vba