{ WINUTF.PAS Copyright (c) TrSek alias Zdeno Sekerak } { Program konvertuje textovy subor kodovany v znakovej sade Win1250 } { (Latin2), alebo UTF do suboru v UNICODE. } { } { Datum:06.06.2005 http://www.trsek.com } program Win1250_UTF_UNICODE; uses crt,dos; const WIN_1250 = 'WIN1250.txt'; { prevodna tabulka z Win1250, Latin2 } WIN_UTF8 = 'UTF8.txt'; { prevodna tabulka z UTF-8 } VST_TXT1 = 'v_W1250.txt'; { kodovany text vo Win-1250 } VST_TXT2 = 'v_UTF8.txt'; { kodovany text v UTF-8 } VYS_TXT = 'Unicode.txt'; { kodovany text v Unicode } var fin:file of byte; fout:file of word; win:array[0..255] of word; utf:array[0..255] of word; vstup:string; choice:byte; ch:byte; w:word; { prekonvertuje Hex do Word tak ze zavola val } function HexToWord(s:string):word; var hex:word; err:integer; begin Val('$'+s,hex,err); HexToWord := hex; end; { precitame definicny subor pre kodovanie win1250, UTF-8 -> Unicode } procedure LoadCodePage(sour:string; var win:array of word); var i:byte; fw:text; pom:string; begin Assign(fw, sour); ReSet(fw); for i:=0 to 255 do begin readln(fw,pom); win[i]:=HexToWord(pom); end; Close(fw); end; { najde v UTF-8 sade } function NajdiUTF(w:word):byte; var i:byte; begin NajdiUTF:=0; for i:=0 to 255 do begin if( utf[i]=w )then NajdiUTF:=i; end; end; begin Writeln('Program konvertuje textovy subor zo sady Win1250 (Latin2), UTF do Unicode.'); WriteLn('Pozadovane znakove sady su definovane v suboroch ', WIN_1250, ' a ', WIN_UTF8, '.'); WriteLn; WriteLn('Vyber:'); WriteLn('1. Win1250 -> Unicode (', VST_TXT1,')'); WriteLn('2. UTF-8 -> Unicode (', VST_TXT2,')'); ReadLn(choice); { meno suboru } Write('Zadaj meno suboru:'); ReadLn(vstup); { Nacita znakove sady } LoadCodePage(WIN_1250, win); LoadCodePage(WIN_UTF8, utf); { zacneme citat kodovany text } Assign(fin, vstup); ReSet(fin); { do tohoto suboru budeme text ukladat } Assign(fout, VYS_TXT); ReWrite(fout); { priznak UNICODE } w:=$FEFF; Write(fout, w); w:=0; { prekodujem na unicode } while( not(eof(fin))) do begin Read(fin,ch); if( choice=1 )then { win1250 } Write(fout, win[ch]) else begin { UTF-8 } { znak je kodovany na 2 alebo 1 bajte } if( ch in [$C2, $C3, $C4, $C5 ])then w:= word(ch) * $100 else begin Write( fout, win[ NajdiUTF(w+ch)]); w:=0; end; end; { if( choice } end; { zavriem subory } Close(fin); Close(fout); WriteLn('Hotovo. Vysledok je v subore ', VYS_TXT); ReadLn; end.