source: projects/synaptic/trunk/intltool-extract.in @ 280

Revision 280, 13.6 KB checked in by yasumichi, 15 years ago (diff)

first import

Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.33";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49my $SRCDIR_ARG  = ".";
50
51my $FILE;
52my $OUTFILE;
53
54my $gettext_type = "";
55my $input;
56my %messages = ();
57my %loc = ();
58my %count = ();
59my %comments = ();
60my $strcount = 0;
61
62## Use this instead of \w for XML files to handle more possible characters.
63my $w = "[-A-Za-z0-9._:]";
64
65## Always print first
66$| = 1;
67
68## Handle options
69GetOptions (
70            "type=s"     => \$TYPE_ARG,
71            "local|l"    => \$LOCAL_ARG,
72            "help|h"     => \$HELP_ARG,
73            "version|v"  => \$VERSION_ARG,
74            "update"     => \$UPDATE_ARG,
75            "quiet|q"    => \$QUIET_ARG,
76            "srcdir=s"   => \$SRCDIR_ARG,
77            ) or &error;
78
79&split_on_argument;
80
81
82## Check for options.
83## This section will check for the different options.
84
85sub split_on_argument {
86
87    if ($VERSION_ARG) {
88        &version;
89
90    } elsif ($HELP_ARG) {
91        &help;
92       
93    } elsif ($LOCAL_ARG) {
94        &place_local;
95        &extract;
96
97    } elsif ($UPDATE_ARG) {
98        &place_normal;
99        &extract;
100
101    } elsif (@ARGV > 0) {
102        &place_normal;
103        &message;
104        &extract;
105
106    } else {
107        &help;
108
109    } 
110}   
111
112sub place_normal {
113    $FILE        = $ARGV[0];
114    $OUTFILE     = "$FILE.h";
115}   
116
117sub place_local {
118    $FILE        = $ARGV[0];
119    $OUTFILE     = fileparse($FILE, ());
120    if (!-e "tmp/") {
121        system("mkdir tmp/");
122    }
123    $OUTFILE     = "./tmp/$OUTFILE.h"
124}
125
126sub determine_type {
127   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
128        $gettext_type=$1
129   }
130}
131
132## Sub for printing release information
133sub version{
134    print <<_EOF_;
135${PROGRAM} (${PACKAGE}) $VERSION
136Copyright (C) 2000, 2003 Free Software Foundation, Inc.
137Written by Kenneth Christiansen, 2000.
138
139This is free software; see the source for copying conditions.  There is NO
140warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
141_EOF_
142    exit;
143}
144
145## Sub for printing usage information
146sub help {
147    print <<_EOF_;
148Usage: ${PROGRAM} [OPTION]... [FILENAME]
149Generates a header file from an XML source file.
150
151It grabs all strings between <_translatable_node> and its end tag in
152XML files. Read manpage (man ${PROGRAM}) for more info.
153
154      --type=TYPE   Specify the file type of FILENAME. Currently supports:
155                    "gettext/glade", "gettext/ini", "gettext/keys"
156                    "gettext/rfc822deb", "gettext/schemas",
157                    "gettext/scheme", "gettext/xml"
158  -l, --local       Writes output into current working directory
159                    (conflicts with --update)
160      --update      Writes output into the same directory the source file
161                    reside (conflicts with --local)
162      --srcdir      Root of the source tree
163  -v, --version     Output version information and exit
164  -h, --help        Display this help and exit
165  -q, --quiet       Quiet mode
166
167Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
168or send email to <xml-i18n-tools\@gnome.org>.
169_EOF_
170    exit;
171}
172
173## Sub for printing error messages
174sub error{
175    print STDERR "Try `${PROGRAM} --help' for more information.\n";
176    exit;
177}
178
179sub message {
180    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
181}
182
183sub extract {
184    &determine_type;
185
186    &convert;
187
188    open OUT, ">$OUTFILE";
189    &msg_write;
190    close OUT;
191
192    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
193}
194
195sub convert {
196
197    ## Reading the file
198    {
199        local (*IN);
200        local $/; #slurp mode
201        open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
202        $input = <IN>;
203    }
204
205    &type_ini if $gettext_type eq "ini";
206    &type_keys if $gettext_type eq "keys";
207    &type_xml if $gettext_type eq "xml";
208    &type_glade if $gettext_type eq "glade";
209    &type_scheme if $gettext_type eq "scheme";
210    &type_schemas  if $gettext_type eq "schemas";
211    &type_rfc822deb  if $gettext_type eq "rfc822deb";
212}
213
214sub entity_decode_minimal
215{
216    local ($_) = @_;
217
218    s/&apos;/'/g; # '
219    s/&quot;/"/g; # "
220    s/&amp;/&/g;
221
222    return $_;
223}
224
225sub entity_decode
226{
227    local ($_) = @_;
228
229    s/&apos;/'/g; # '
230    s/&quot;/"/g; # "
231    s/&amp;/&/g;
232    s/&lt;/</g;
233    s/&gt;/>/g;
234
235    return $_;
236}
237
238sub escape_char
239{
240    return '\"' if $_ eq '"';
241    return '\n' if $_ eq "\n";
242    return '\\' if $_ eq '\\';
243
244    return $_;
245}
246
247sub escape
248{
249    my ($string) = @_;
250    return join "", map &escape_char, split //, $string;
251}
252
253sub type_ini {
254    ### For generic translatable desktop files ###
255    while ($input =~ /^_.*=(.*)$/mg) {
256        $messages{$1} = [];
257    }
258}
259
260sub type_keys {
261    ### For generic translatable mime/keys files ###
262    while ($input =~ /^\s*_\w+=(.*)$/mg) {
263        $messages{$1} = [];
264    }
265}
266
267sub type_xml {
268    ### For generic translatable XML files ###
269       
270    while ($input =~ /(?:<!--([^>]*?)-->[^\n]*\n?[^\n]*)?\s_$w+\s*=\s*\"([^"]*)\"/sg) { # "
271        $messages{entity_decode_minimal($2)} = [];
272        $comments{entity_decode_minimal($2)} = $1 if (defined($1));
273    }
274
275    while ($input =~ /(?:<!--([^>]*?)-->\s*)?<_($w+)(?: xml:space="($w+)")?[^>]*>(.*?)<\/_\2>/sg) {
276        $_ = $4;
277        if (!defined($3) || $3 ne "preserve") {
278            s/\s+/ /g;
279            s/^ //;
280            s/ $//;
281        }
282        $messages{$_} = [];
283        $comments{$_} = $1 if (defined($1));
284    }
285}
286
287sub type_schemas {
288    ### For schemas XML files ###
289         
290    # FIXME: We should handle escaped < (less than)
291    while ($input =~ /
292                      <locale\ name="C">\s*
293                          (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
294                          (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
295                          (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
296                      <\/locale>
297                     /sgx) {
298        my @totranslate = ($3,$6,$9);
299        my @eachcomment = ($2,$5,$8);
300        foreach (@totranslate) {
301            my $currentcomment = shift @eachcomment;
302            next if !$_;
303            s/\s+/ /g;
304            $messages{entity_decode_minimal($_)} = [];
305            $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
306        }
307    }
308}
309
310sub type_rfc822deb {
311    ### For rfc822-style Debian configuration files ###
312
313    my $lineno = 1;
314    my $type = '';
315    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
316    {
317        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
318        while ($pre =~ m/\n/g)
319        {
320            $lineno ++;
321        }
322        $lineno += length($newline);
323        my @str_list = rfc822deb_split(length($underscore), $text);
324        for my $str (@str_list)
325        {
326            $strcount++;
327            $messages{$str} = [];
328            $loc{$str} = $lineno;
329            $count{$str} = $strcount;
330            my $usercomment = '';
331            while($pre =~ s/(^|\n)#([^\n]*)$//s)
332            {
333                $usercomment = "\n" . $2 . $usercomment;
334            }
335            $comments{$str} = $tag . $usercomment;
336        }
337        $lineno += ($text =~ s/\n//g);
338    }
339}
340
341sub rfc822deb_split {
342    # Debian defines a special way to deal with rfc822-style files:
343    # when a value contain newlines, it consists of
344    #   1.  a short form (first line)
345    #   2.  a long description, all lines begin with a space,
346    #       and paragraphs are separated by a single dot on a line
347    # This routine returns an array of all paragraphs, and reformat
348    # them.
349    # When first argument is 2, the string is a comma separated list of
350    # values.
351    my $type = shift;
352    my $text = shift;
353    $text =~ s/^[ \t]//mg;
354    return (split(/, */, $text, 0)) if $type ne 1;
355    return ($text) if $text !~ /\n/;
356
357    $text =~ s/([^\n]*)\n//;
358    my @list = ($1);
359    my $str = '';
360    for my $line (split (/\n/, $text))
361    {
362        chomp $line;
363        if ($line =~ /^\.\s*$/)
364        {
365            #  New paragraph
366            $str =~ s/\s*$//;
367            push(@list, $str);
368            $str = '';
369        }
370        elsif ($line =~ /^\s/)
371        {
372            #  Line which must not be reformatted
373            $str .= "\n" if length ($str) && $str !~ /\n$/;
374            $line =~ s/\s+$//;
375            $str .= $line."\n";
376        }
377        else
378        {
379            #  Continuation line, remove newline
380            $str .= " " if length ($str) && $str !~ /\n$/;
381            $str .= $line;
382        }
383    }
384    $str =~ s/\s*$//;
385    push(@list, $str) if length ($str);
386    return @list;
387}
388
389sub type_glade {
390    ### For translatable Glade XML files ###
391
392    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
393
394    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
395        # Glade sometimes uses tags that normally mark translatable things for
396        # little bits of non-translatable content. We work around this by not
397        # translating strings that only includes something like label4 or window1.
398        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label|dialog)[0-9]+$/;
399    }
400   
401    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
402        for my $item (split (/\n/, $1)) {
403            $messages{entity_decode($item)} = [];
404        }
405    }
406
407    ## handle new glade files
408    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) {
409        $messages{entity_decode($3)} = [] unless $3 =~ /^(window|label)[0-9]+$/;
410        if (defined($2) and !($3 =~ /^(window|label)[0-9]+$/)) {
411           $comments{entity_decode($3)} = entity_decode($2) ;
412        }
413    }
414    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
415        $messages{entity_decode_minimal($2)} = [];
416    }
417}
418
419sub type_scheme {
420    my ($line, $i, $state, $str, $trcomment, $char);
421    for $line (split(/\n/, $input)) {
422        $i = 0;
423        $state = 0; # 0 - nothing, 1 - string, 2 - translatable string
424        while ($i < length($line)) {
425            if (substr($line,$i,1) eq "\"") {
426                if ($state == 2) {
427                    $comments{$str} = $trcomment if ($trcomment);
428                    $messages{$str} = [];
429                    $str = '';
430                    $state = 0; $trcomment = "";
431                } elsif ($state == 1) {
432                    $str = '';
433                    $state = 0; $trcomment = "";
434                } else {
435                    $state = 1;
436                    $str = '';
437                    if ($i>0 && substr($line,$i-1,1) eq '_') {
438                        $state = 2;
439                    }
440                }
441            } elsif (!$state) {
442                if (substr($line,$i,1) eq ";") {
443                    $trcomment = substr($line,$i+1);
444                    $trcomment =~ s/^;*\s*//;
445                    $i = length($line);
446                } elsif ($trcomment && substr($line,$i,1) !~ /\s|\(|\)|_/) {
447                    $trcomment = "";
448                }
449            } else {
450                if (substr($line,$i,1) eq "\\") {
451                    $char = substr($line,$i+1,1);
452                    if ($char ne "\"" && $char ne "\\") {
453                       $str = $str . "\\";
454                    }
455                    $i++;
456                }
457                $str = $str . substr($line,$i,1);
458            }
459            $i++;
460        }
461    }
462}
463
464sub msg_write {
465    my @msgids;
466    if (%count)
467    {
468        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
469    }
470    else
471    {
472        @msgids = sort keys %messages;
473    }
474    for my $message (@msgids)
475    {
476        my $offsetlines = 1;
477        $offsetlines++ if $message =~ /%/;
478        if (defined ($comments{$message}))
479        {
480                while ($comments{$message} =~ m/\n/g)
481                {
482                    $offsetlines++;
483                }
484        }
485        print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
486                if defined $loc{$message};
487        print OUT "/* ".$comments{$message}." */\n"
488                if defined $comments{$message};
489        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
490       
491        my @lines = split (/\n/, $message, -1);
492        for (my $n = 0; $n < @lines; $n++)
493        {
494            if ($n == 0)
495            {
496                print OUT "char *s = N_(\"";
497            }
498            else
499            { 
500                print OUT "             \"";
501            }
502
503            print OUT escape($lines[$n]);
504
505            if ($n < @lines - 1)
506            {
507                print OUT "\\n\"\n";
508            }
509            else
510            {
511                print OUT "\");\n"; 
512            }
513        }
514    }
515}
516
Note: See TracBrowser for help on using the repository browser.