#!/usr/bin/env python

import sys, signal
import thread, threading, time
import xml.parsers.expat
import codecs
from threading import Thread

root="source/"
max_threads = 25

titles = [[]]

localization_data = [[]]

redirects = []

# list of elements that we can directly convert to wiki text
replace_element = \
    {'start':{'br': '<br/>',
              'emph': "'''",
              'help-id-missing': "'''Missing help ID.'''"
             },
     'end':  {'br': '',
              'emph': "'''",
              'help-id-missing': ""
             }
    }

replace_paragraph_role = \
    {'start':{'code': '<code>',
              'codeintip': '<code>',
              'example': '<code>',
              'heading1': '= ',
              'heading2': '== ',
              'heading3': '=== ',
              'heading4': '==== ',
              'heading5': '===== ',
              'heading6': '====== ',
              'head1': '= ', # used only in one file, probably in error?
              'head2': '== ', # used only in one file, probably in error?
              'listitem': '',
              'note': '{{Note|',
              'null': '', # special paragraph for Variable, CaseInline, etc.
              'paragraph': '',
              'related': '', # used only in one file, probably in error?
              'relatedtopics': '', # used only in one file, probably in error?
              'tablecontent': '| ',
              'tablehead': '! scope="col" | ',
              'tip': '{{Tip|',
              'warning': '{{Warning|',
             },
     'end':{'code': '</code>\n\n',
            'codeintip': '</code>\n\n',
            'example': '</code>\n\n',
            'heading1': ' =\n\n',
            'heading2': ' ==\n\n',
            'heading3': ' ===\n\n',
            'heading4': ' ====\n\n',
            'heading5': ' =====\n\n',
            'heading6': ' ======\n\n',
            'head1': ' =\n\n', # used only in one file, probably in error?
            'head2': ' ==\n\n', # used only in one file, probably in error?
            'listitem': '\n',
            'note': '}}\n\n',
            'null': '', # special paragraph for Variable, CaseInline, etc.
            'paragraph': '\n\n',
            'related': '\n\n', # used only in one file, probably in error?
            'relatedtopics': '\n\n', # used only in one file, probably in error?
            'tablecontent': '\n\n',
            'tablehead': '\n\n',
            'tip': '}}\n\n',
            'warning': '}}\n\n',
           }
    }

section_id_mapping = \
    {'relatedtopics': 'RelatedTopics'}

# text snippets that we need to convert
replace_text_list = \
    [["$[officename]", "{{ProductName}}"],
     ["%PRODUCTNAME", "{{ProductName}}"]
    ]

help_id_patterns = [
        "HID",
        "SID",
        "FID",
        "RID",
        "TP_",
        "MD_",
        "FN_",
        "DLG_",
        "SW_",
        "UID",
        "basctl_",
        "basic_",
        "chart2_",
        "dbaccess_",
        "extensions_",
        "filter_",
        "formula_",
        "fpicker_",
        "framework_",
        "goodies_"
        ]

all_help_id_mappings = [[]]

def load_all_help_ids():
    file = codecs.open("helpers/help_hid.lst", "r", "utf-8")
    for line in file:
        ids = line.strip().upper().split(",")
        if len(ids) >= 2:
            all_help_id_mappings.append(ids)

def get_help_id_res2(name):
    file = codecs.open("helpers/hid.lst", "r", "utf-8")
    for line in file:
        ids = line.strip().upper().split(" ")
        if len(ids) >= 2:
            if ids[0] == name:
                return ids[1]
    # if none found
    return "0"

def get_help_id(name):
    name = name.strip().replace("cui_","svx_").upper()
    for i in all_help_id_mappings:
        if len(i) >= 2 and i[0].strip() == name:
            return i[1].strip()
    return get_help_id_res2(name)

def get_link_filename(link, name):
    text = link
    if link.find("http") >= 0:
        text = name
    for title in titles:
        try:
            if title[0].find(text) >= 0:
                return title[1].strip()
        except:
            pass
    return link

def get_link_name(link):
    for title in titles:
        if title[0].find(link) >= 0:
            return title[2].strip()
    return link

def replace_text(text):
    for i in replace_text_list:
        if text.find(i[0]) >= 0:
            text = text.replace(i[0],i[1])
    return text

def load_localization_data(sdf_file):
    try:
        file = codecs.open(sdf_file, "r", "utf-8")
        for line in file:
            line = line.strip()
            # TODO: Check if multiple \t needs to be merged
            if line.find("#") == 0:
                continue
            localization_data.append(line.split("\t"))
    except:
        return

def replace_gt_lt(str,char,replace):
    # Add additional space to catch strings starting with <=
    str = " "+str
    index = -1
    while True:
        index = str.find(char, index+1)
        if index < 0:
            break
        if str[index-1] != '\\':
            str = str[:index]+replace+str[index+1:]
    return str[1:]


def get_localized_text(id, text):
    # Note: The order is important
    replace_localized_strs = [
            ["\\\"","\""],
            ["& Chr(13)&","<br>"],
            ["& Chr(13) &","<br>"],
            ["&","&amp;"],
            ["\\n","\n"],
            ["\\t","\t"],
            ["\\\\<","&lt;"],
            ["\\\\>","&gt;"],
            ]
    for line in localization_data:
        if len(line) > 10 and line[4].strip() == id.strip():
            str = line[10]
            for i in replace_localized_strs:
                str = str.replace(i[0],i[1])
            str = replace_gt_lt(str,"<","&lt;")
            str = replace_gt_lt(str,">","&gt;")
            # Finally replace the \< and \> tokens
            str = str.replace("\\<","<").replace("\\>",">")
            return str
    return ""

def get_localized_objects(parser, loc_text, attrs):
    p = LocalizedText(parser, loc_text, attrs)
    return p.parse()

def href_to_fname_id(href):
    link = href.replace('"', '')
    fname = link
    id = ''
    if link.find("#") >= 0:
        fname = link[:link.find("#")]
        id = link[link.find("#")+1:]
    else:
        sys.stderr.write('Reference without a "#" in "%s".'% link)

    return [fname, id]

# Base class for all the elements
#
# self.name - name of the element, to drop the self.child_parsing flag
# self.objects - collects the child objects that are constructed during
#                parsing of the child elements
# self.child_parsing - flag whether we are parsing a child, or the object
#                      itself
# self.parent - parent object
class ElementBase:
    def __init__(self, name, parent):
        self.name = name
        self.objects = []
        self.child_parsing = False
        self.parent = parent

    def start_element(self, parser, name, attrs):
        pass

    def end_element(self, parser, name):
        if name == self.name:
            self.parent.child_parsing = False

    def char_data(self, parser, data):
        pass

    def get_curobj(self):
        if self.child_parsing:
            return self.objects[len(self.objects)-1].get_curobj()
        return self

    # start parsing a child element
    def parse_child(self, child):
        self.child_parsing = True
        self.objects.append(child)

    # construct the wiki representation of this object, including the objects
    # held in self.objects (here only the text of the objects)
    def get_all(self):
        text = u''
        for i in self.objects:
            text = text + i.get_all()
        return text

    # for handling variables, and embedding in general
    # id - the variable name we want to get
    def get_variable(self, id):
        for i in self.objects:
            if i != None:
                var = i.get_variable(id)
                if var != None:
                    return var
        return None

    # embed part of another file into current structure
    def embed_href(self, parent_parser, fname, id):
        # parse another xhp
        parser = XhpParser('source/' + fname, False, parent_parser.current_app, parent_parser.wiki_page_name)
        var = parser.get_variable(id)

        if var != None:
            self.objects.append(var)
        elif parser.follow_embed:
            sys.stderr.write('Cannot find reference "#%s" in "%s".\n'% \
                    (id, fname))

    def unhandled_element(self, parser, name):
        filename = "Localization File"
        if parser:
            filename = parser.filename
        sys.stderr.write('Warning: Unhandled element "%s" in "%s" (%s)\n'% \
                        (name, self.name, filename))

class XhpFile(ElementBase):
    def __init__(self):
        ElementBase.__init__(self, None, None)
        self.depth=1

    def start_element(self, parser, name, attrs):
        if name == 'body':
            # ignored, we flatten the structure
            pass
        elif name == 'bookmark':
            self.parse_child(Bookmark(attrs, self, 'div', parser))
        elif name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'embed' or name == 'embedvar':
            if parser.follow_embed:
                (fname, id) = href_to_fname_id(attrs['href'])
                self.embed_href(parser, fname, id)
        elif name == 'helpdocument':
            # ignored, we flatten the structure
            pass
        elif name == 'list':
            self.parse_child(List(attrs, self))
        elif name == 'meta':
            self.parse_child(Meta(attrs, self))
        elif name == 'paragraph':
            para = Paragraph(attrs, self, self.depth)
            self.depth = para.depth
            self.parse_child(para)
        elif name == 'section':
            self.parse_child(Section(attrs, self, self.depth))
        elif name == 'sort':
            self.parse_child(Sort(attrs, self))
        elif name == 'switch':
            self.parse_child(Switch(attrs, self, parser.embedding_app))
        elif name == 'table':
            self.parse_child(Table(attrs, self))
        else:
            self.unhandled_element(parser, name)

class LocalizedText(ElementBase):
    def __init__(self, parser, data, attrs):
        # Initialized with some 'tag' such that the parser
        # never needs to access the parent (which in this 
        # case is null)
        ElementBase.__init__(self, 'localizedtext', None)
        header = u'<?xml version="1.0" encoding="UTF-8"?><paragraph>'
        self.data = header + data #+ '</paragraph>'
        self.xml = self.data.encode('utf-8')
        self.follow_embed = True
        self.head_obj = None
        self.attrs = attrs
        self.parser = parser
        #print self.data.encode('utf-8')

    def parse(self):
        p = xml.parsers.expat.ParserCreate()
        p.StartElementHandler = self.start_element
        p.EndElementHandler = self.end_element
        p.CharacterDataHandler = self.char_data
        try:
            p.Parse(self.xml)
        except:
            # TODO: Check different exceptions
            sys.stderr.write('Trying to parse: '+self.xml+'\n')
            print self.xml
            raise
        return self.objects[0].objects

    def start_element(self, name, attrs):
        if name == 'paragraph':
            self.parse_child(Paragraph(self.attrs, self, 0))
        else:
            if self.child_parsing:
                self.get_curobj().start_element(self.parser, name, attrs)
            else:
                self.unhandled_element(None, name)

    def char_data(self, data):
        if self.child_parsing:
            self.get_curobj().char_data(self, data)
        else:
            # Should never occur
            self.unhandled_element(None,"Unhandled Data:"+data)

    def end_element(self, name):
        if self.child_parsing:
            self.get_curobj().end_element(self, name)

class Bookmark(ElementBase):
    def __init__(self, attrs, parent, type, parser):
        ElementBase.__init__(self, 'bookmark', parent)

        self.type = type

        self.id = attrs['id']
        self.app = ''
        self.redirect = ''
        self.target = ''

        # let's construct the name of the redirect, so that we can point
        # to the wikihelp directly from the LO code; wiki then takes care of
        # the correct redirect
        branch = attrs['branch']
        if branch.find('hid/') == 0 and (parser.current_app_raw != '' or parser.follow_embed):
            name = branch[branch.find('/') + 1:]

            if name.find('.uno:') == 0:
                self.app = parser.current_app_raw
                self.redirect = name
                self.target = parser.wiki_page_name

    def get_all(self):
        global redirects
        # first of all, we need to create a redirect page for this one
        if self.redirect != '' and self.target != '':
            if self.app != '':
                redirects.append(['%s/%s'% (self.app, self.redirect), self.target])
            else:
                for i in ['swriter', 'scalc', 'simpress', 'sdraw', 'smath', \
                        'schart', 'sbasic', 'sdatabase']:
                    redirects.append(['%s/%s'% (i, self.redirect), self.target])

        # then we also have to setup ID inside the page
        if self.type == 'div':
            return '<div id="%s"></div>\n'% self.id
        elif self.type == 'span':
            return '<span id="%s"></span>'% self.id
        else:
            sys.stderr.write('Unknown bookmark type "%s"'% self.type)

        return ''

class Image(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'image', parent)
        self.src     = attrs['src']
        try:
            self.width   = attrs['width']
            self.height  = attrs['height']
        except:
            self.width = self.height = ""
        self.align   = 'left'
        self.alt     = False
        self.alttext = ""

    def start_element(self, parser, name, attrs):
        if name == 'alt':
            self.alt = True
        else:
            self.unhandled_element(parser, name)

    def end_element(self, parser, name):
        ElementBase.end_element(self, parser, name)

        if name == 'alt':
            self.alt = False

    def char_data(self, parser, data):
        if self.alt:
            self.alttext = self.alttext + data

    def get_all(self):
        wikitext = "[[Image:"+self.src+"|border|"+self.align+"|"
        if len(self.width):
            wikitext = wikitext + self.width+"x"+self.height+"|"
        wikitext = wikitext + self.alttext+"]]"
        return wikitext

    def get_curobj(self):
        return self

class Comment(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'comment', parent)
        self.text = ''

    def char_data(self, parser, data):
        self.text = self.text + data

    def get_all(self):
        return '<!-- ' + self.text + ' -->'

class Text:
    def __init__(self, text):
        self.wikitext = replace_text(text)

    def get_all(self):
        return self.wikitext

    def get_variable(self, id):
        return None

class TableCell(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'tablecell', parent)

    def start_element(self, parser, name, attrs):
        if name == 'bookmark':
            self.parse_child(Bookmark(attrs, self, 'div', parser))
        elif name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'embed' or name == 'embedvar':
            (fname, id) = href_to_fname_id(attrs['href'])
            if parser.follow_embed:
                self.embed_href(parser, fname, id)
        elif name == 'paragraph':
            self.parse_child(Paragraph(attrs, self, 0))
        elif name == 'section':
            # FIXME depth, should we use something better than 0?
            self.parse_child(Section(attrs, self, 0))
        else:
            self.unhandled_element(parser, name)

class TableRow(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'tablerow', parent)

    def start_element(self, parser, name, attrs):
        if name == 'tablecell':
            self.parse_child(TableCell(attrs, self))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        text = '|-\n' + ElementBase.get_all(self)
        return text

class Table(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'table', parent)

    def start_element(self, parser, name, attrs):
        if name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'tablerow':
            self.parse_child(TableRow(attrs, self))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        # + ' align="left"' etc.?
        text = '{| border="1"\n' + \
            ElementBase.get_all(self) + \
            '|}\n\n'
        return text

class ListItem(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'listitem', parent)

    def start_element(self, parser, name, attrs):
        if name == 'bookmark':
            self.parse_child(Bookmark(attrs, self, 'span', parser))
        elif name == 'embed' or name == 'embedvar':
            (fname, id) = href_to_fname_id(attrs['href'])
            if parser.follow_embed:
                self.embed_href(parser, fname, id)
        elif name == 'paragraph':
            self.parse_child(Paragraph(attrs, self, 0))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        text = ""
        prefix = '*'
        postfix = ''
        if self.parent.startwith > 0:
            prefix = '<li>'
            postfix = '</li>'
        elif self.parent.type == 'ordered':
            prefix = '#'

        # add the text itself
        for i in self.objects:
            text = text + prefix + ElementBase.get_all(self)

        return text

class List(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'list', parent)

        self.type = attrs['type']
        try:
            self.startwith = int(attrs['startwith'])
        except:
            self.startwith = 0

    def start_element(self, parser, name, attrs):
        if name == 'listitem':
            self.parse_child(ListItem(attrs, self))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        text = ""
        if self.startwith > 0:
            text = text + '<ol start="%d">\n'% self.startwith

        text = text + ElementBase.get_all(self)

        if self.startwith > 0:
            text = text + '</ol>\n'
        else:
            text = text + '\n'
        return text

# we ignore the entire <meta> part of xhp
# TODO - do we need it for something?
class Meta(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'meta', parent)

class Section(ElementBase):
    def __init__(self, attrs, parent, depth):
        ElementBase.__init__(self, 'section', parent)
        self.depth = depth
        self.id = attrs['id']

    def start_element(self, parser, name, attrs):
        if name == 'bookmark':
            self.parse_child(Bookmark(attrs, self, 'div', parser))
        elif name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'embed' or name == 'embedvar':
            (fname, id) = href_to_fname_id(attrs['href'])
            if parser.follow_embed:
                self.embed_href(parser, fname, id)
        elif name == 'list':
            self.parse_child(List(attrs, self))
        elif name == 'paragraph':
            para = Paragraph(attrs, self, self.depth)
            self.depth = para.depth
            self.parse_child(para)
        elif name == 'section':
            # sections can be nested
            self.parse_child(Section(attrs, self, self.depth))
        elif name == 'switch':
            self.parse_child(Switch(attrs, self, parser.embedding_app))
        elif name == 'table':
            self.parse_child(Table(attrs, self))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        mapping = ''
        try:
            mapping = section_id_mapping[self.id]
        except:
            pass

        # some of the section ids are used as real id's, some of them have
        # function (like relatetopics), and have to be templatized
        text = ''
        if mapping != '':
            text = text + '{{%s|'% mapping
        text = text + ElementBase.get_all(self)
        if mapping != '':
            text = text + '}}\n\n'

        return text

    def get_variable(self, id):
        var = ElementBase.get_variable(self, id)
        if var != None:
            return var
        if id == self.id:
            return self
        return None

class Sort(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'sort', parent)

        try:
            self.order = attrs['order']
        except:
            self.order = 'asc'

    def start_element(self, parser, name, attrs):
        if name == 'section':
            # FIXME depth, should we use something better than 0?
            self.parse_child(Section(attrs, self, 0))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        rev = False
        if self.order == 'asc':
            rev = True
        self.objects = sorted(self.objects, key=lambda obj: obj.id, reverse=rev)

        return ElementBase.get_all(self)

class Link(ElementBase):
    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'link', parent)

        self.link = attrs['href']
        try:
            self.lname = attrs['name']
        except:
            self.lname = self.link[self.link.rfind("/")+1:]
        # Override lname
        self.default_name = self.lname
        self.lname = get_link_filename(self.link, self.lname)
        self.wikitext = ""

    def char_data(self, parser, data):
        self.wikitext = self.wikitext + data

    def get_all(self):
        if self.wikitext == "":
            self.wikitext = self.default_name

        self.wikitext = replace_text(self.wikitext)
        if self.link.find("http") >= 0:
            text = "["+self.link+" "+self.wikitext+"]"
        else:
            text = "[["+self.lname+"|"+self.wikitext+"]]"
        return text

class SwitchInline(ElementBase):
    def __init__(self, attrs, parent, app):
        ElementBase.__init__(self, 'switchinline', parent)
        self.switch = attrs['select']
        self.embedding_app = app

    def start_element(self, parser, name, attrs):
        if name == 'caseinline':
            self.parse_child(CaseInline(attrs, self, False))
        elif name == 'defaultinline':
            self.parse_child(CaseInline(attrs, self, True))
        else:
            self.unhandled_element(parser, name)

    def get_all(self):
        if len(self.objects) == 0:
            return ''
        elif self.switch == 'sys':
            system = {'MAC':'', 'UNIX':'', 'WIN':'', 'default':''}
            for i in self.objects:
                if i.case == 'MAC' or i.case == 'UNIX' or \
                   i.case == 'WIN' or i.case == 'default':
                    system[i.case] = i.get_all()
                elif i.case == 'OS2':
                    # ignore, there is only one mention of OS2, which is a
                    # 'note to translators', and no meat
                    pass
                elif i.case == 'HIDE_HERE':
                    # do what the name suggest ;-)
                    pass
                else:
                    sys.stderr.write('Unhandled "%s" case in "sys" switchinline.\n'% \
                            i.case )
            text = '{{System'
            for i in [['default', 'default'], ['MAC', 'mac'], \
                      ['UNIX', 'unx'], ['WIN', 'win']]:
                if system[i[0]] != '':
                    text = '%s|%s=%s'% (text, i[1], system[i[0]])
            return text + '}}'
        elif self.switch == 'appl':
            # we want directly use the right text, when inlining something
            # 'shared' into an 'app'
            if self.embedding_app == '':
                text = ''
                default = ''
                for i in self.objects:
                    appls = {'BASIC':'Basic', 'CALC':'Calc', \
                             'CHART':'Chart', 'DRAW':'Draw', \
                             'IMAGE':'Draw', 'IMPRESS': 'Impress', \
                             'MATH':'Math', 'WRITER':'Writer', \
                             'OFFICE':'', 'default':''}
                    try:
                        app = appls[i.case]
                        all = i.get_all()
                        if all == '':
                            pass
                        elif app == '':
                            default = all
                        else:
                            text = text + '{{WhenIn%s|%s}}'% (app, all)
                    except:
                        sys.stderr.write('Unhandled "%s" case in "appl" switchinline.\n'% \
                                i.case)

                if text == '':
                    text = default
                elif default != '':
                    text = text + '{{WhenDefault|%s}}'% default

                return text
            else:
                for i in self.objects:
                    if i.case == self.embedding_app:
                        return i.get_all()

        return ''

class Case(ElementBase):
    def __init__(self, attrs, parent, is_default):
        ElementBase.__init__(self, 'case', parent)

        if is_default:
            self.name = 'default'
            self.case = 'default'
        else:
            self.case = attrs['select']

    def start_element(self, parser, name, attrs):
        if name == 'bookmark':
            self.parse_child(Bookmark(attrs, self, 'div', parser))
        elif name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'embed' or name == 'embedvar':
            if parser.follow_embed:
                (fname, id) = href_to_fname_id(attrs['href'])
                self.embed_href(parser, fname, id)
        elif name == 'list':
            self.parse_child(List(attrs, self))
        elif name == 'paragraph':
            # FIXME depth, should we use something better than 0?
            self.parse_child(Paragraph(attrs, self, 0))
        elif name == 'section':
            # FIXME depth, should we use something better than 0?
            self.parse_child(Section(attrs, self, 0))
        elif name == 'table':
            self.parse_child(Table(attrs, self))
        else:
            self.unhandled_element(parser, name)

class Switch(SwitchInline):
    def __init__(self, attrs, parent, app):
        SwitchInline.__init__(self, attrs, parent, app)
        self.name = 'switch'

    def start_element(self, parser, name, attrs):
        self.embedding_app = parser.embedding_app
        if name == 'case':
            self.parse_child(Case(attrs, self, False))
        elif name == 'default':
            self.parse_child(Case(attrs, self, True))
        else:
            self.unhandled_element(parser, name)

class Item(ElementBase):
    replace_type = \
            {'start':{'input': '<code>',
                      'keycode': '{{KeyCode|',
                      'literal': '<code>',
                      'menuitem': '{{MenuItem|',
                      'productname': ''
                     },
             'end':{'input': '</code>',
                    'keycode': '}}',
                    'literal': '</code>',
                    'menuitem': '}}',
                    'productname': ''
                   }}

    def __init__(self, attrs, parent):
        ElementBase.__init__(self, 'item', parent)

        self.type = attrs['type']
        self.text = ''

    def char_data(self, parser, data):
        self.text = self.text + data

    def get_all(self):
        try:
            return self.replace_type['start'][self.type] + \
                   replace_text(self.text) + \
                   self.replace_type['end'][self.type]
        except:
            sys.stderr.write('Unhandled item type "%s".\n'% self.type)

        return replace_text(self.text)


class Paragraph(ElementBase):
    def __init__(self, attrs, parent, depth):
        ElementBase.__init__(self, 'paragraph', parent)

        try:
            self.role = attrs['role']
        except:
            self.role = 'paragraph'

        try: 
            self.id = attrs['id']
        except:
            self.id = ""

        try:
            self.level=int(attrs['level'])
        except:
            self.level=0
        if depth > self.level:
            self.depth = depth
        else:
            self.depth = self.level
        self.is_first = (len(self.parent.objects) == 0)
        self.localized_objects = []

    def start_element(self, parser, name, attrs):
        if name == 'ahelp':
            # TODO extended tips are ignored for now, just the text is used
            # verbatim
            pass
        elif name == 'comment':
            self.parse_child(Comment(attrs, self))
        elif name == 'embedvar':
            if parser.follow_embed:
                (fname, id) = href_to_fname_id(attrs['href'])
                self.embed_href(parser, fname, id)
        elif name == 'image':
            self.parse_child(Image(attrs, self))
        elif name == 'item':
            self.parse_child(Item(attrs, self))
        elif name == 'link':
            self.parse_child(Link(attrs, self))
        elif name == 'switchinline':
            self.parse_child(SwitchInline(attrs, self, parser.embedding_app))
        elif name == 'variable':
            self.parse_child(Variable(attrs, self, self.depth))
        else:
            try:
                global replace_element
                self.objects.append(Text(replace_element['start'][name]))
            except:
                self.unhandled_element(parser, name)

    def end_element(self, parser, name):
        ElementBase.end_element(self, parser, name)

        try:
            global replace_element
            self.objects.append(Text(replace_element['end'][name]))
        except:
            pass

    def char_data(self, parser, data):
        if self.role == 'paragraph' or self.role == 'heading':
            if data != '' and data[0] == ' ':
                data = ' ' + data.lstrip()
            data = data.replace('\n', ' ')

        if len(self.localized_objects):
            return
        loc_text = u''
        if len(self.id):
            loc_text = get_localized_text(self.id, data)
        if len(loc_text):
            attrs = {'role':self.role,
                     'level':self.level}
            self.localized_objects = get_localized_objects(parser, loc_text, attrs)
        else:
            self.objects.append(Text(data))

    def get_all(self):
        # Localization objects present, drop the other objects
        if len(self.localized_objects):
            self.objects = self.localized_objects

        role = self.role
        if role == 'heading':
            if self.depth < 6:
                role = 'heading%d'% self.depth
            else:
                role = 'heading6'
        if ( role == 'tablecontent' or role == 'tablehead' ) and not self.is_first:
            role = 'paragraph'

        # prepend the markup according to the role
        text = ''
        if len(self.objects) > 0:
            try:
                text = text + replace_paragraph_role['start'][role]
            except:
                sys.stderr.write( "Unknown paragraph role start: " + role + "\n" )

        # the text itself
        text = text + ElementBase.get_all(self).strip()

        # append the markup according to the role
        if len(self.objects) > 0:
            try:
                text = text + replace_paragraph_role['end'][role]
            except:
                sys.stderr.write( "Unknown paragraph role end: " + role + "\n" )

        return text

class Variable(Paragraph):
    def __init__(self, attrs, parent, depth):
        Paragraph.__init__(self, attrs, parent, depth)
        self.name = 'variable'
        self.id = attrs['id']

    def get_variable(self, id):
        if id == self.id:
            return self
        return None

class CaseInline(Paragraph):
    def __init__(self, attrs, parent, is_default):
        Paragraph.__init__(self, attrs, parent, 0)

        self.role = 'null'
        if is_default:
            self.name = 'defaultinline'
            self.case = 'default'
        else:
            self.name = 'caseinline'
            self.case = attrs['select']

class XhpParser:
    def __init__(self, filename, follow_embed, embedding_app, wiki_page_name):
        self.head_obj = XhpFile()
        self.filename = filename
        self.follow_embed = follow_embed
        self.wiki_page_name = wiki_page_name

        self.current_app = ''
        self.current_app_raw = ''
        for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \
                  ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \
                  ['schart', 'CHART'], ['simpress', 'IMPRESS'], \
                  ['smath', 'MATH'], ['swriter', 'WRITER']]:
            if filename.find('/%s/'% i[0]) >= 0:
                self.current_app_raw = i[0]
                self.current_app = i[1]
                break

        if embedding_app != '':
            self.embedding_app = embedding_app
        else:
            self.embedding_app = self.current_app

        file = codecs.open(filename, "r", "utf-8")
        p = xml.parsers.expat.ParserCreate()

        p.StartElementHandler = self.start_element
        p.EndElementHandler = self.end_element
        p.CharacterDataHandler = self.char_data

        buf = file.read()
        p.Parse(buf.encode('utf-8'))
        file.close()

    def start_element(self, name, attrs):
        self.head_obj.get_curobj().start_element(self, name, attrs)

    def end_element(self, name):
        self.head_obj.get_curobj().end_element(self, name)

    def char_data(self, data):
        self.head_obj.get_curobj().char_data(self, data)

    def get_all(self):
        return self.head_obj.get_all()

    def get_variable(self, id):
        return self.head_obj.get_variable(id)

def loadallfiles(filename):
    global titles
    file = codecs.open(filename, "r", "utf-8")
    for line in file:
        title = line.split(";", 2)
        titles.append(title)

def signal_handler(signal, frame):
    sys.stderr.write( 'Exiting...\n' )
    sys.exit(1)
signal.signal(signal.SIGINT, signal_handler)

class WikiConv2(Thread):
    def __init__(self, inputfile, wiki_page_name, outputfile):
        Thread.__init__(self)
        self.inputfile = inputfile
        self.wiki_page_name = wiki_page_name
        self.outputfile = outputfile

    def run(self):
        parser = XhpParser(self.inputfile, True, '', self.wiki_page_name)
        file = codecs.open(self.outputfile, "wb", "utf-8")
        file.write(parser.get_all())
        file.close()

# Main Function
load_all_help_ids()
loadallfiles("alltitles.csv")
if len(sys.argv) > 1:
    load_localization_data(sys.argv[1])

for title in titles:
    while threading.active_count() > max_threads:
        time.sleep(0.001)

    outfile = ""
    infile  = ""
    if len(title) > 1:
        outfile = "wiki/"+title[1].strip()
        infile  = title[0].strip()
        try:
            file = open(outfile,"r")
        except:    
            try:
                wiki = WikiConv2(infile,title[1].strip(),outfile)
                wiki.start()
                continue
            except:
                print 'Failed to convert "%s" into "%s".\n'% \
                        (title[1].strip(), outfile)
        print "Warning: Skipping: "+infile+" > "+outfile
        file.close()

time.sleep(0.1)

# vim:set shiftwidth=4 softtabstop=4 expandtab: