#!/usr/bin/python2.2
# 
# Copyright 2002, 2003 Zuza Software Foundation
# 
# This file is part of mozpotools.
#
# mozpotools is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# mozpotools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with mozpotools; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""script that converts a .po file with translations based on a .pot file
generated from a Mozilla localization .dtd back to the .dtd (but translated)
Uses the original .dtd to do the conversion as this makes sure we don't
leave out any unexpected stuff...
reads .dtd from stdin, .po from first command-line argument, writes .dtd to stdout"""

import sys
import dtd
import po
import extract

# read in the dtd
dtdlines = sys.stdin.readlines()
d = dtd.dtdfile()
d.fromlines(dtdlines)
# make an index of all the entities
d.makeindex()

# read in the po (aux file)
pf = open(sys.argv[-1],'r')
potlines = pf.readlines()
pf.close()
p = po.pofile()
p.fromlines(potlines)

def findentities(definition):
  entities = {}
  amppos = 0
  while amppos != -1:
    amppos = definition.find("&",amppos)
    if amppos != -1:
      semicolonpos = definition.find(";",amppos)
      # search backwards in case there's an intervening & (if not it's OK)...
      amppos = definition.rfind("&", amppos, semicolonpos)
      if semicolonpos != -1:
        entities[definition[amppos:semicolonpos+1]] = 0
      amppos=semicolonpos
  return entities

def entitycheck(original, translation):
  originalset=findentities(original)
  translatedset=findentities(translation)
  return originalset == translatedset

# translate the strings
for thepo in p.poelements:
  # there may be more than one entity due to msguniq merge
  entities = []
  mixedentities = []  # those entities which have a .label and .accesskey combined
  for sourcecomment in thepo.sourcecomments:
    entities += extract.rstripeol(sourcecomment)[3:].split()
  # search for mixed entities...
  for entity in entities:
    if entity.endswith(".label"):
      entitybase = entity[:entity.rfind(".label")]
      # see if there is a matching accesskey, making this a mixed entity
      if entitybase + ".accesskey" in entities:
        # add both versions to the list of mixed entities
        mixedentities += [entity,entitybase+".accesskey"]
  for entity in entities:
    if d.index.has_key(entity):
      # now we need to replace the definition of entity with msgstr
      thedtd = d.index[entity] # find the dtd
      definition = thedtd.definition
      # currently let's just get the msgstr back
      # this converts the po-style string to a dtd-style string
      backslash = '\\'
      unquotedid = "\n".join([extract.extractwithoutquotes(line,'"','"',backslash,includeescapes=0)[0] for line in thepo.msgid])
      if unquotedid[:1] == "\n": unquotedid = unquotedid[1:]
      unquotedstr = "\n".join([extract.extractwithoutquotes(line,'"','"',backslash,includeescapes=0)[0] for line in thepo.msgstr])
      if unquotedstr[:1] == "\n": unquotedstr = unquotedstr[1:]
      # check there aren't missing entities...
      if len(unquotedstr.strip()) > 0 and not entitycheck(unquotedid, unquotedstr):
        # print to stderr that the entities in the original and translation don't match
        print >>sys.stderr,"entitycheck failed in %s (file %s)" % (entity, sys.argv[-1])
        # we can also make it the same as the original
        # we lose the translation, but we avoid crash-type errors
        # this should be done when testing ; some detected errors aren't a problem,
        # so it can be commented out for final builds... (FIXME)
        # unquotedstr = unquotedid
      # handle mixed entities
      if entity.endswith(".label"):
        if entity in mixedentities:
          # mixed labels just need the & taken out
          # except that &entity; needs to be avoided...
          amppos = 0
          while amppos >= 0:
            amppos = unquotedstr.find("&",amppos)
            if amppos != -1:
              amppos += 1
              semipos = unquotedstr.find(";",amppos)
              if semipos != -1:
                if unquotedstr[amppos:semipos].isalpha():
                  continue
              # otherwise, cut it out... only the first one need be changed
              # (see below to see how the accesskey is done)
              unquotedstr = unquotedstr[:amppos-1] + unquotedstr[amppos:]
              break
          
      if entity.endswith(".accesskey"):
        if entity in mixedentities:
          # mixed access keys need the key extracted from after the &
          # but we must avoid proper entities i.e. &gt; etc...
          amppos = 0
          while amppos >= 0:
            amppos = unquotedstr.find("&",amppos)
            if amppos != -1:
              amppos += 1
              semipos = unquotedstr.find(";",amppos)
              if semipos != -1:
                if unquotedstr[amppos:semipos].isalpha():
                  # what we have found is an entity, not a shortcut key...
                  continue
              # otherwise, we found the shortcut key
              unquotedstr = unquotedstr[amppos]
              amppos = -2 # to signal it was found...
          if amppos == -1:
            # couldn't handle, but shouldn't use mixed definition
            continue
      # finally set the new definition in the dtd, but not if its empty
      if len(unquotedstr) > 0:
        thedtd.definition = extract.eitherquotestr(unquotedstr) 
    
# not really

# write the dtd out again
newdtdlines = d.tolines()
sys.stdout.writelines(newdtdlines)

# done

