source: projects/synaptic/trunk/intltool-merge.in @ 280

Revision 280, 33.3 KB checked in by yasumichi, 15 years ago (diff)

first import

Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.33";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
66my $MULTIPLE_OUTPUT = 0;
67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
104if ($VERSION_ARG)
105{
106        &print_version;
107}
108elsif ($HELP_ARG)
109{
110        &print_help;
111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
114        &utf8_sanity_check;
115        &preparation;
116        &print_message;
117        &ba_merge_translations;
118        &finalize;
119}
120elsif ($XML_STYLE_ARG && @ARGV > 2)
121{
122        &utf8_sanity_check;
123        &preparation;
124        &print_message;
125        &xml_merge_output;
126        &finalize;
127}
128elsif ($KEYS_STYLE_ARG && @ARGV > 2)
129{
130        &utf8_sanity_check;
131        &preparation;
132        &print_message;
133        &keys_merge_translations;
134        &finalize;
135}
136elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
137{
138        &utf8_sanity_check;
139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
142        &finalize;
143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
146        &utf8_sanity_check;
147        &preparation;
148        &print_message;
149        &schemas_merge_translations;
150        &finalize;
151}
152elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
153{
154        &preparation;
155        &print_message;
156        &rfc822deb_merge_translations;
157        &finalize;
158}
159else
160{
161        &print_help;
162}
163
164exit;
165
166## Sub for printing release information
167sub print_version
168{
169    print <<_EOF_;
170${PROGRAM} (${PACKAGE}) ${VERSION}
171Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
172
173Copyright (C) 2000-2003 Free Software Foundation, Inc.
174Copyright (C) 2000-2001 Eazel, Inc.
175This is free software; see the source for copying conditions.  There is NO
176warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
177_EOF_
178    exit;
179}
180
181## Sub for printing usage information
182sub print_help
183{
184    print <<_EOF_;
185Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
186Generates an output file that includes some localized attributes from an
187untranslated source file.
188
189Mandatory options: (exactly one must be specified)
190  -b, --ba-style         includes translations in the bonobo-activation style
191  -d, --desktop-style    includes translations in the desktop style
192  -k, --keys-style       includes translations in the keys style
193  -s, --schemas-style    includes translations in the schemas style
194  -r, --rfc822deb-style  includes translations in the RFC822 style
195  -x, --xml-style        includes translations in the standard xml style
196
197Other options:
198  -u, --utf8             convert all strings to UTF-8 before merging
199                         (default for everything except RFC822 style)
200  -p, --pass-through     deprecated, does nothing and issues a warning
201  -m, --multiple-output  output one localized file per locale, instead of
202                         a single file containing all localized elements
203  -c, --cache=FILE       specify cache file name
204                         (usually \$top_builddir/po/.intltool-merge-cache)
205  -q, --quiet            suppress most messages
206      --help             display this help and exit
207      --version          output version information and exit
208
209Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
210or send email to <xml-i18n-tools\@gnome.org>.
211_EOF_
212    exit;
213}
214
215
216## Sub for printing error messages
217sub print_error
218{
219    print STDERR "Try `${PROGRAM} --help' for more information.\n";
220    exit;
221}
222
223
224sub print_message
225{
226    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
227}
228
229
230sub preparation
231{
232    $PO_DIR = $ARGV[0];
233    $FILE = $ARGV[1];
234    $OUTFILE = $ARGV[2];
235
236    &gather_po_files;
237    &get_translation_database;
238}
239
240# General-purpose code for looking up translations in .po files
241
242sub po_file2lang
243{
244    my ($tmp) = @_;
245    $tmp =~ s/^.*\/(.*)\.po$/$1/;
246    return $tmp;
247}
248
249sub gather_po_files
250{
251    for my $po_file (glob "$PO_DIR/*.po") {
252        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
253    }
254}
255
256sub get_local_charset
257{
258    my ($encoding) = @_;
259    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
260
261    # seek character encoding aliases in charset.alias (glib)
262
263    if (open CHARSET_ALIAS, $alias_file)
264    {
265        while (<CHARSET_ALIAS>)
266        {
267            next if /^\#/;
268            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
269        }
270
271        close CHARSET_ALIAS;
272    }
273
274    # if not found, return input string
275
276    return $encoding;
277}
278
279sub get_po_encoding
280{
281    my ($in_po_file) = @_;
282    my $encoding = "";
283
284    open IN_PO_FILE, $in_po_file or die;
285    while (<IN_PO_FILE>)
286    {
287        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
288        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
289        {
290            $encoding = $1;
291            last;
292        }
293    }
294    close IN_PO_FILE;
295
296    if (!$encoding)
297    {
298        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
299        $encoding = "ISO-8859-1";
300    }
301
302    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
303    if ($?) {
304        $encoding = get_local_charset($encoding);
305    }
306
307    return $encoding
308}
309
310sub utf8_sanity_check
311{
312    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
313    $UTF8_ARG = 1;
314}
315
316sub get_translation_database
317{
318    if ($cache_file) {
319        &get_cached_translation_database;
320    } else {
321        &create_translation_database;
322    }
323}
324
325sub get_newest_po_age
326{
327    my $newest_age;
328
329    foreach my $file (values %po_files_by_lang)
330    {
331        my $file_age = -M $file;
332        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
333    }
334
335    $newest_age = 0 if !$newest_age;
336
337    return $newest_age;
338}
339
340sub create_cache
341{
342    print "Generating and caching the translation database\n" unless $QUIET_ARG;
343
344    &create_translation_database;
345
346    open CACHE, ">$cache_file" || die;
347    print CACHE join "\x01", %translations;
348    close CACHE;
349}
350
351sub load_cache
352{
353    print "Found cached translation database\n" unless $QUIET_ARG;
354
355    my $contents;
356    open CACHE, "<$cache_file" || die;
357    {
358        local $/;
359        $contents = <CACHE>;
360    }
361    close CACHE;
362    %translations = split "\x01", $contents;
363}
364
365sub get_cached_translation_database
366{
367    my $cache_file_age = -M $cache_file;
368    if (defined $cache_file_age)
369    {
370        if ($cache_file_age <= &get_newest_po_age)
371        {
372            &load_cache;
373            return;
374        }
375        print "Found too-old cached translation database\n" unless $QUIET_ARG;
376    }
377
378    &create_cache;
379}
380
381sub create_translation_database
382{
383    for my $lang (keys %po_files_by_lang)
384    {
385        my $po_file = $po_files_by_lang{$lang};
386
387        if ($UTF8_ARG)
388        {
389            my $encoding = get_po_encoding ($po_file);
390
391            if (lc $encoding eq "utf-8")
392            {
393                open PO_FILE, "<$po_file";     
394            }
395            else
396            {
397                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
398
399                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
400            }
401        }
402        else
403        {
404            open PO_FILE, "<$po_file"; 
405        }
406
407        my $nextfuzzy = 0;
408        my $inmsgid = 0;
409        my $inmsgstr = 0;
410        my $msgid = "";
411        my $msgstr = "";
412
413        while (<PO_FILE>)
414        {
415            $nextfuzzy = 1 if /^#, fuzzy/;
416       
417            if (/^msgid "((\\.|[^\\])*)"/ )
418            {
419                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
420                $msgid = "";
421                $msgstr = "";
422
423                if ($nextfuzzy) {
424                    $inmsgid = 0;
425                } else {
426                    $msgid = unescape_po_string($1);
427                    $inmsgid = 1;
428                }
429                $inmsgstr = 0;
430                $nextfuzzy = 0;
431            }
432
433            if (/^msgstr "((\\.|[^\\])*)"/)
434            {
435                $msgstr = unescape_po_string($1);
436                $inmsgstr = 1;
437                $inmsgid = 0;
438            }
439
440            if (/^"((\\.|[^\\])*)"/)
441            {
442                $msgid .= unescape_po_string($1) if $inmsgid;
443                $msgstr .= unescape_po_string($1) if $inmsgstr;
444            }
445        }
446        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
447    }
448}
449
450sub finalize
451{
452}
453
454sub unescape_one_sequence
455{
456    my ($sequence) = @_;
457
458    return "\\" if $sequence eq "\\\\";
459    return "\"" if $sequence eq "\\\"";
460    return "\n" if $sequence eq "\\n";
461    return "\r" if $sequence eq "\\r";
462    return "\t" if $sequence eq "\\t";
463    return "\b" if $sequence eq "\\b";
464    return "\f" if $sequence eq "\\f";
465    return "\a" if $sequence eq "\\a";
466    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
467
468    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
469    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
470
471    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
472
473    return $sequence;
474}
475
476sub unescape_po_string
477{
478    my ($string) = @_;
479
480    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
481
482    return $string;
483}
484
485## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
486## > in the entity. For further info please look at #84738.
487sub entity_decode
488{
489    local ($_) = @_;
490
491    s/&apos;/'/g; # '
492    s/&quot;/"/g; # "
493    s/&amp;/&/g;
494    s/&lt;/</g;
495
496    return $_;
497}
498 
499# entity_encode: (string)
500#
501# Encode the given string to XML format (encode '<' etc).
502
503sub entity_encode
504{
505    my ($pre_encoded) = @_;
506
507    my @list_of_chars = unpack ('C*', $pre_encoded);
508
509    # with UTF-8 we only encode minimalistic
510    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
511}
512
513sub entity_encode_int_minimalist
514{
515    return "&quot;" if $_ == 34;
516    return "&amp;" if $_ == 38;
517    return "&apos;" if $_ == 39;
518    return "&lt;" if $_ == 60;
519    return chr $_;
520}
521
522sub entity_encoded_translation
523{
524    my ($lang, $string) = @_;
525
526    my $translation = $translations{$lang, $string};
527    return $string if !$translation;
528    return entity_encode ($translation);
529}
530
531## XML (bonobo-activation specific) merge code
532
533sub ba_merge_translations
534{
535    my $source;
536
537    {
538       local $/; # slurp mode
539       open INPUT, "<$FILE" or die "can't open $FILE: $!";
540       $source = <INPUT>;
541       close INPUT;
542    }
543
544    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
545
546    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
547    {
548        print OUTPUT $1;
549
550        my $node = $2 . "\n";
551
552        my @strings = ();
553        $_ = $node;
554        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
555             push @strings, entity_decode($3);
556        }
557        print OUTPUT;
558
559        my %langs;
560        for my $string (@strings)
561        {
562            for my $lang (keys %po_files_by_lang)
563            {
564                $langs{$lang} = 1 if $translations{$lang, $string};
565            }
566        }
567       
568        for my $lang (sort keys %langs)
569        {
570            $_ = $node;
571            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
572            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
573            print OUTPUT;
574        }
575    }
576
577    print OUTPUT $source;
578
579    close OUTPUT;
580}
581
582
583## XML (non-bonobo-activation) merge code
584
585
586# Process tag attributes
587#   Only parameter is a HASH containing attributes -> values mapping
588sub getAttributeString
589{
590    my $sub = shift;
591    my $do_translate = shift || 0;
592    my $language = shift || "";
593    my $result = "";
594    my $translate = shift;
595    foreach my $e (reverse(sort(keys %{ $sub }))) {
596        my $key    = $e;
597        my $string = $sub->{$e};
598        my $quote = '"';
599       
600        $string =~ s/^[\s]+//;
601        $string =~ s/[\s]+$//;
602       
603        if ($string =~ /^'.*'$/)
604        {
605            $quote = "'";
606        }
607        $string =~ s/^['"]//g;
608        $string =~ s/['"]$//g;
609
610        if ($do_translate && $key =~ /^_/) {
611            $key =~ s|^_||g;
612            if ($language) {
613               
614                # Handle translation
615                #
616                my $decode_string = entity_decode($string);
617                my $translation = $translations{$language, $decode_string};
618                if ($translation) {
619                    $translation = entity_encode($translation);
620                    $string = $translation;
621                    $$translate = 2;
622                } else {
623                    $$translate = 2; # we still want translations for deep nesting (FIXME: this will cause
624                                     # problems since we might get untranslated duplicated entries, but with xml:lang set)
625                    # Fix would be to set it here to eg. 3, and do a check in traverse() to see if any of the containing tags
626                    # really need translation, and only emit "translation" if there is (this means parsing same data twice)
627                }
628            } else {
629                 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2
630            }
631        }
632       
633        $result .= " $key=$quote$string$quote";
634    }
635    return $result;
636}
637
638# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
639#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
640#   can you define the correct semantics for this?)
641#
642
643sub getXMLstring
644{
645    my $ref = shift;
646    my @list = @{ $ref };
647    my $result = "";
648
649    my $count = scalar(@list);
650    my $attrs = $list[0];
651    my $index = 1;
652
653    while ($index < $count) {
654        my $type = $list[$index];
655        my $content = $list[$index+1];
656        if (! $type ) {
657            # We've got CDATA
658            if ($content) {
659                # lets strip the whitespace here, and *ONLY* here
660                $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
661                $result .= ($content);
662            } else {
663                #print "no cdata content when expected it\n"; # is this possible, is this ok?
664                # what to do if this happens?
665                # Did I mention that I hate XML::Parser tree style?
666            }
667        } else {
668            # We've got another element
669            $result .= "<$type";
670            $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
671            if ($content) {
672                my $subresult = getXMLstring($content);
673                if ($subresult) {
674                    $result .= ">".$subresult . "</$type>";
675                } else {
676                    $result .= "/>";
677                }
678            } else {
679                $result .= "/>";
680            }
681        }
682        $index += 2;
683    }
684    return $result;
685}
686
687# Translate list of nodes if necessary
688sub translate_subnodes
689{
690    my $fh = shift;
691    my $content = shift;
692    my $language = shift || "";
693    my $singlelang = shift || 0;
694
695    my @nodes = @{ $content };
696
697    my $count = scalar(@nodes);
698    my $index = 0;
699    while ($index < $count) {
700        my $type = $nodes[$index];
701        my $rest = $nodes[$index+1];
702        if ($singlelang) {
703            my $oldMO = $MULTIPLE_OUTPUT;
704            $MULTIPLE_OUTPUT = 1;
705            traverse($fh, $type, $rest, $language);
706            $MULTIPLE_OUTPUT = $oldMO;
707        } else {
708            traverse($fh, $type, $rest, $language);
709        }
710        $index += 2;
711    }
712}
713
714sub traverse
715{
716    my $fh = shift;
717    my $nodename = shift;
718    my $content = shift;
719    my $language = shift || "";
720
721    if (!$nodename) {
722        if ($content =~ /^[\s]*$/) {
723            $leading_space .= $content;
724        }
725        print $fh $content;
726    } else {
727        # element
728        my @all = @{ $content };
729        my $attrs = shift @all;
730        my $translate = 0;
731        my $outattr = getAttributeString($attrs, 1, $language, \$translate);
732
733        if ($nodename =~ /^_/) {
734            $translate = 1;
735            $nodename =~ s/^_//;
736        }
737        my $lookup = '';
738        print $fh "<$nodename", $outattr;
739        if ($translate) {
740            $lookup = getXMLstring($content);
741            if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
742                $lookup =~ s/^\s+//s;
743                $lookup =~ s/\s+$//s;
744            }
745
746            if ($lookup || $translate == 2) {
747                my $translation = $translations{$language, $lookup};
748                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
749                    $translation = $lookup if (!$translation);
750                    print $fh " xml:lang=\"", $language, "\"" if $language;
751                    print $fh ">";
752                    if ($translate == 2) {
753                        translate_subnodes($fh, \@all, $language, 1);
754                    } else {
755                        print $fh $translation;
756                    }
757                    print $fh "</$nodename>";
758
759                    return; # this means there will be no same translation with xml:lang="$language"...
760                            # if we want them both, just remove this "return"
761                } else {
762                    print $fh ">";
763                    if ($translate == 2) {
764                        translate_subnodes($fh, \@all, $language, 1);
765                    } else {
766                        print $fh $lookup;
767                    }
768                    print $fh "</$nodename>";
769                }
770            } else {
771                print $fh "/>";
772            }
773
774            for my $lang (sort keys %po_files_by_lang) {
775                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
776                        next;
777                    }
778                    if ($lang) {
779                        # Handle translation
780                        #
781                        my $translate = 0;
782                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
783                        my $translation = $translations{$lang, $lookup};
784                        if ($translate && !$translation) {
785                            $translation = $lookup;
786                        }
787
788                        if ($translation || $translate) {
789                            print $fh "\n";
790                            $leading_space =~ s/.*\n//g;
791                            print $fh $leading_space;
792                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
793                            if ($translate == 2) {
794                               translate_subnodes($fh, \@all, $lang, 1);
795                            } else {
796                                print $fh $translation;
797                            }
798                            print $fh "</$nodename>";
799                        }
800                    }
801            }
802
803        } else {
804            my $count = scalar(@all);
805            if ($count > 0) {
806                print $fh ">";
807                my $index = 0;
808                while ($index < $count) {
809                    my $type = $all[$index];
810                    my $rest = $all[$index+1];
811                    traverse($fh, $type, $rest, $language);
812                    $index += 2;
813                }
814                print $fh "</$nodename>";
815            } else {
816                print $fh "/>";
817            }
818        }
819    }
820}
821
822sub intltool_tree_cdatastart
823{
824    my $expat    = shift;
825    my $clist = $expat->{Curlist};
826    my $pos   = $#$clist;
827
828    push @$clist, 0 => $expat->original_string();
829}
830
831sub intltool_tree_cdataend
832{
833    my $expat    = shift;
834    my $clist = $expat->{Curlist};
835    my $pos   = $#$clist;
836
837    $clist->[$pos] .= $expat->original_string();
838}
839
840sub intltool_tree_char
841{
842    my $expat = shift;
843    my $text  = shift;
844    my $clist = $expat->{Curlist};
845    my $pos   = $#$clist;
846
847    # Use original_string so that we retain escaped entities
848    # in CDATA sections.
849    #
850    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
851        $clist->[$pos] .= $expat->original_string();
852    } else {
853        push @$clist, 0 => $expat->original_string();
854    }
855}
856
857sub intltool_tree_start
858{
859    my $expat    = shift;
860    my $tag      = shift;
861    my @origlist = ();
862
863    # Use original_string so that we retain escaped entities
864    # in attribute values.  We must convert the string to an
865    # @origlist array to conform to the structure of the Tree
866    # Style.
867    #
868    my @original_array = split /\x/, $expat->original_string();
869    my $source         = $expat->original_string();
870
871    # Remove leading tag.
872    #
873    $source =~ s|^\s*<\s*(\S+)||s;
874
875    # Grab attribute key/value pairs and push onto @origlist array.
876    #
877    while ($source)
878    {
879       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
880       {
881           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
882           push @origlist, $1;
883           push @origlist, '"' . $2 . '"';
884       }
885       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
886       {
887           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
888           push @origlist, $1;
889           push @origlist, "'" . $2 . "'";
890       }
891       else
892       {
893           last;
894       }
895    }
896
897    my $ol = [ { @origlist } ];
898
899    push @{ $expat->{Lists} }, $expat->{Curlist};
900    push @{ $expat->{Curlist} }, $tag => $ol;
901    $expat->{Curlist} = $ol;
902}
903
904sub readXml
905{
906    my $filename = shift || return;
907    if(!-f $filename) {
908        die "ERROR Cannot find filename: $filename\n";
909    }
910
911    my $ret = eval 'require XML::Parser';
912    if(!$ret) {
913        die "You must have XML::Parser installed to run $0\n\n";
914    }
915    my $xp = new XML::Parser(Style => 'Tree');
916    $xp->setHandlers(Char => \&intltool_tree_char);
917    $xp->setHandlers(Start => \&intltool_tree_start);
918    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
919    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
920    my $tree = $xp->parsefile($filename);
921
922# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
923# would be:
924# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
925# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
926
927    return $tree;
928}
929
930sub print_header
931{
932    my $infile = shift;
933    my $fh = shift;
934    my $source;
935
936    if(!-f $infile) {
937        die "ERROR Cannot find filename: $infile\n";
938    }
939
940    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
941    {
942        local $/;
943        open DOCINPUT, "<${FILE}" or die;
944        $source = <DOCINPUT>;
945        close DOCINPUT;
946    }
947    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
948    {
949        print $fh "$1\n";
950    }
951    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
952    {
953        print $fh "$1\n";
954    }
955}
956
957sub parseTree
958{
959    my $fh        = shift;
960    my $ref       = shift;
961    my $language  = shift || "";
962
963    my $name = shift @{ $ref };
964    my $cont = shift @{ $ref };
965    traverse($fh, $name, $cont, $language);
966}
967
968sub xml_merge_output
969{
970    my $source;
971
972    if ($MULTIPLE_OUTPUT) {
973        for my $lang (sort keys %po_files_by_lang) {
974            if ( ! -e $lang ) {
975                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
976            }
977            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
978            my $tree = readXml($FILE);
979            print_header($FILE, \*OUTPUT);
980            parseTree(\*OUTPUT, $tree, $lang);
981            close OUTPUT;
982            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
983        }
984    }
985    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
986    my $tree = readXml($FILE);
987    print_header($FILE, \*OUTPUT);
988    parseTree(\*OUTPUT, $tree);
989    close OUTPUT;
990    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
991}
992
993sub keys_merge_translations
994{
995    open INPUT, "<${FILE}" or die;
996    open OUTPUT, ">${OUTFILE}" or die;
997
998    while (<INPUT>)
999    {
1000        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
1001        {
1002            my $string = $3;
1003
1004            print OUTPUT;
1005
1006            my $non_translated_line = $_;
1007
1008            for my $lang (sort keys %po_files_by_lang)
1009            {
1010                my $translation = $translations{$lang, $string};
1011                next if !$translation;
1012
1013                $_ = $non_translated_line;
1014                s/(\w+)=.*/[$lang]$1=$translation/;
1015                print OUTPUT;
1016            }
1017        }
1018        else
1019        {
1020            print OUTPUT;
1021        }
1022    }
1023
1024    close OUTPUT;
1025    close INPUT;
1026}
1027
1028sub desktop_merge_translations
1029{
1030    open INPUT, "<${FILE}" or die;
1031    open OUTPUT, ">${OUTFILE}" or die;
1032
1033    while (<INPUT>)
1034    {
1035        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
1036        {
1037            my $string = $3;
1038
1039            print OUTPUT;
1040
1041            my $non_translated_line = $_;
1042
1043            for my $lang (sort keys %po_files_by_lang)
1044            {
1045                my $translation = $translations{$lang, $string};
1046                next if !$translation;
1047
1048                $_ = $non_translated_line;
1049                s/(\w+)=.*/${1}[$lang]=$translation/;
1050                print OUTPUT;
1051            }
1052        }
1053        else
1054        {
1055            print OUTPUT;
1056        }
1057    }
1058
1059    close OUTPUT;
1060    close INPUT;
1061}
1062
1063sub schemas_merge_translations
1064{
1065    my $source;
1066
1067    {
1068       local $/; # slurp mode
1069       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1070       $source = <INPUT>;
1071       close INPUT;
1072    }
1073
1074    open OUTPUT, ">$OUTFILE" or die;
1075
1076    # FIXME: support attribute translations
1077
1078    # Empty nodes never need translation, so unmark all of them.
1079    # For example, <_foo/> is just replaced by <foo/>.
1080    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1081
1082    while ($source =~ s/
1083                        (.*?)
1084                        (\s+)(<locale\ name="C">(\s*)
1085                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1086                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1087                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1088                        <\/locale>)
1089                       //sx)
1090    {
1091        print OUTPUT $1;
1092
1093        my $locale_start_spaces = $2 ? $2 : '';
1094        my $default_spaces = $4 ? $4 : '';
1095        my $short_spaces = $7 ? $7 : '';
1096        my $long_spaces = $10 ? $10 : '';
1097        my $locale_end_spaces = $13 ? $13 : '';
1098        my $c_default_block = $3 ? $3 : '';
1099        my $default_string = $6 ? $6 : '';
1100        my $short_string = $9 ? $9 : '';
1101        my $long_string = $12 ? $12 : '';
1102
1103        print OUTPUT "$locale_start_spaces$c_default_block";
1104
1105        $default_string =~ s/\s+/ /g;
1106        $default_string = entity_decode($default_string);
1107        $short_string =~ s/\s+/ /g;
1108        $short_string = entity_decode($short_string);
1109        $long_string =~ s/\s+/ /g;
1110        $long_string = entity_decode($long_string);
1111
1112        for my $lang (sort keys %po_files_by_lang)
1113        {
1114            my $default_translation = $translations{$lang, $default_string};
1115            my $short_translation = $translations{$lang, $short_string};
1116            my $long_translation  = $translations{$lang, $long_string};
1117
1118            next if (!$default_translation && !$short_translation &&
1119                     !$long_translation);
1120
1121            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1122
1123        print OUTPUT "$default_spaces";   
1124
1125        if ($default_translation)
1126        {
1127            $default_translation = entity_encode($default_translation);
1128            print OUTPUT "<default>$default_translation</default>";
1129        }
1130
1131            print OUTPUT "$short_spaces";
1132
1133            if ($short_translation)
1134            {
1135                        $short_translation = entity_encode($short_translation);
1136                        print OUTPUT "<short>$short_translation</short>";
1137            }
1138
1139            print OUTPUT "$long_spaces";
1140
1141            if ($long_translation)
1142            {
1143                        $long_translation = entity_encode($long_translation);
1144                        print OUTPUT "<long>$long_translation</long>";
1145            }       
1146
1147            print OUTPUT "$locale_end_spaces</locale>";
1148        }
1149    }
1150
1151    print OUTPUT $source;
1152
1153    close OUTPUT;
1154}
1155
1156sub rfc822deb_merge_translations
1157{
1158    my %encodings = ();
1159    for my $lang (keys %po_files_by_lang) {
1160        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1161    }
1162
1163    my $source;
1164
1165    $Text::Wrap::huge = 'overflow';
1166    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1167
1168    {
1169       local $/; # slurp mode
1170       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1171       $source = <INPUT>;
1172       close INPUT;
1173    }
1174
1175    open OUTPUT, ">${OUTFILE}" or die;
1176
1177    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1178    {
1179            my $sep = $1;
1180            my $non_translated_line = $3.$4;
1181            my $string = $5;
1182            my $underscore = length($2);
1183            next if $underscore eq 0 && $non_translated_line =~ /^#/;
1184            #  Remove [] dummy strings
1185            my $stripped = $string;
1186            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1187            $stripped =~ s/\[\s[^\[\]]*\]$//;
1188            $non_translated_line .= $stripped;
1189
1190            print OUTPUT $sep.$non_translated_line;
1191   
1192            if ($underscore)
1193            {
1194                my @str_list = rfc822deb_split($underscore, $string);
1195
1196                for my $lang (sort keys %po_files_by_lang)
1197                {
1198                    my $is_translated = 1;
1199                    my $str_translated = '';
1200                    my $first = 1;
1201               
1202                    for my $str (@str_list)
1203                    {
1204                        my $translation = $translations{$lang, $str};
1205                   
1206                        if (!$translation)
1207                        {
1208                            $is_translated = 0;
1209                            last;
1210                        }
1211
1212                        #  $translation may also contain [] dummy
1213                        #  strings, mostly to indicate an empty string
1214                        $translation =~ s/\[\s[^\[\]]*\]$//;
1215                       
1216                        if ($first)
1217                        {
1218                            if ($underscore eq 2)
1219                            {
1220                                $str_translated .= $translation;
1221                            }
1222                            else
1223                            {
1224                                $str_translated .=
1225                                    Text::Tabs::expand($translation) .
1226                                    "\n";
1227                            }
1228                        }
1229                        else
1230                        {
1231                            if ($underscore eq 2)
1232                            {
1233                                $str_translated .= ', ' . $translation;
1234                            }
1235                            else
1236                            {
1237                                $str_translated .= Text::Tabs::expand(
1238                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1239                                    "\n .\n";
1240                            }
1241                        }
1242                        $first = 0;
1243
1244                        #  To fix some problems with Text::Wrap::wrap
1245                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1246                    }
1247                    next unless $is_translated;
1248
1249                    $str_translated =~ s/\n \.\n$//;
1250                    $str_translated =~ s/\s+$//;
1251
1252                    $_ = $non_translated_line;
1253                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1254                    print OUTPUT;
1255                }
1256            }
1257    }
1258    print OUTPUT "\n";
1259
1260    close OUTPUT;
1261    close INPUT;
1262}
1263
1264sub rfc822deb_split
1265{
1266    # Debian defines a special way to deal with rfc822-style files:
1267    # when a value contain newlines, it consists of
1268    #   1.  a short form (first line)
1269    #   2.  a long description, all lines begin with a space,
1270    #       and paragraphs are separated by a single dot on a line
1271    # This routine returns an array of all paragraphs, and reformat
1272    # them.
1273    # When first argument is 2, the string is a comma separated list of
1274    # values.
1275    my $type = shift;
1276    my $text = shift;
1277    $text =~ s/^[ \t]//mg;
1278    return (split(/, */, $text, 0)) if $type ne 1;
1279    return ($text) if $text !~ /\n/;
1280
1281    $text =~ s/([^\n]*)\n//;
1282    my @list = ($1);
1283    my $str = '';
1284
1285    for my $line (split (/\n/, $text))
1286    {
1287        chomp $line;
1288        if ($line =~ /^\.\s*$/)
1289        {
1290            #  New paragraph
1291            $str =~ s/\s*$//;
1292            push(@list, $str);
1293            $str = '';
1294        }
1295        elsif ($line =~ /^\s/)
1296        {
1297            #  Line which must not be reformatted
1298            $str .= "\n" if length ($str) && $str !~ /\n$/;
1299            $line =~ s/\s+$//;
1300            $str .= $line."\n";
1301        }
1302        else
1303        {
1304            #  Continuation line, remove newline
1305            $str .= " " if length ($str) && $str !~ /\n$/;
1306            $str .= $line;
1307        }
1308    }
1309
1310    $str =~ s/\s*$//;
1311    push(@list, $str) if length ($str);
1312
1313    return @list;
1314}
1315
Note: See TracBrowser for help on using the repository browser.