#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#

#This digs through a pile of bugzilla's and populates the cwd with a big
#collection of bug-docs in per-filetype dirs with bug-ids as names with
#prefixes to indicate which bug-tracker, e.g.
#
#fdo-bugid-X.suffix
#rhbz-bugid-X.suffix
#moz-bugid-X.suffix
#
#where X is the n'th attachment of that type in the bug

from __future__ import print_function
import feedparser
import base64
import re
import os, os.path
import sys
try:
    from urllib.request import urlopen
except:
    from urllib import urlopen
try:
    import xmlrpc.client as xmlrpclib
except:
    import xmlrpclib
from xml.dom import minidom
from xml.sax.saxutils import escape

def urlopen_retry(url):
    maxretries = 3
    for i in range(maxretries + 1):
        try:
            return urlopen(url)
        except IOError as e:
            print("caught IOError: " + str(e))
            if maxretries == i:
                raise
            print("retrying...")

def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
    id = url.rsplit('=', 2)[1]
    print("id is " + prefix + id + " " + suffix)
    if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
        print("assuming " + id + " is up to date")
    else:
        print("parsing " + id)
        sock = urlopen_retry(url+"&ctype=xml")
        dom = minidom.parse(sock)
        sock.close()
        attachmentid=0
        for attachment in dom.getElementsByTagName('attachment'):
            attachmentid += 1
            print(" mimetype is", end=' ')
            for node in attachment.childNodes:
                if node.nodeName == 'type':
                    print(node.firstChild.nodeValue, end=' ')
                    if node.firstChild.nodeValue.lower() != mimetype.lower():
                        print('skipping')
                        break
                elif node.nodeName == 'data':
                    # check if attachment is deleted (i.e. https://bugs.kde.org/show_bug.cgi?id=53343&ctype=xml)
                    if not node.firstChild:
                        print('deleted attachment, skipping')
                        continue

                    download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix
                    print('downloading as ' + download)
                    f = open(download, 'wb')
                    f.write(base64.b64decode(node.firstChild.nodeValue))
                    f.close()
                    break

def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
    id = url.rsplit('=', 2)[1]
    print("id is " + prefix + id + " " + suffix)
    if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
        print("assuming " + id + " is up to date")
    else:
        print("parsing " + id)
        sock = urlopen_retry(url+"&ctype=xml")
        dom = minidom.parse(sock)
        sock.close()
        attachmentid=0
        for comment in dom.getElementsByTagName('thetext'):
            commentText = comment.firstChild.nodeValue
            match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText)
            if not match:
                continue

            attachmentid += 1

            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
            if os.path.isfile(download):
                print("assuming " + download + " is up to date")
                continue

            realAttachmentId = match.group(1)
            handle = urlopen_retry(novellattach + realAttachmentId)
            if not handle:
                print("attachment %s is not accessible" % realAttachmentId)
                continue
            print(" mimetype is", end=' ')

            info = handle.info()
            if info.get_content_type:
                remoteMime = info.get_content_type()
            else:
                remoteMime = info.gettype()
            print(remoteMime, end=' ')
            if remoteMime != mimetype:
                print("skipping")
                continue

            print('downloading as ' + download)
            f = open(download, 'wb')
            f.write(handle.read())
            f.close()

def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
    try:
        os.mkdir(suffix)
    except:
        pass
    try:
        proxy = xmlrpclib.ServerProxy(rpcurl)
        query = dict()
        query['column_list']='bug_id'
        query['query_format']='advanced'
        query['field0-0-0']='attachments.mimetype'
        query['type0-0-0']='equals'
        query['value0-0-0']=mimetype
        result = proxy.Bug.search(query)
        bugs = result['bugs']
        print(str(len(bugs)) + ' bugs to process')
        for bug in bugs:
            url = showurl + str(bug['id'])
            get_from_bug_url_via_xml(url, mimetype, prefix, suffix)
    except xmlrpclib.Fault as err:
        print("A fault occurred")
        print("Fault code: %s" % err.faultCode)
        print(err.faultString)

def get_through_rss_query_url(url, mimetype, prefix, suffix):
    try:
        os.mkdir(suffix)
    except:
        pass
    d = feedparser.parse(url)

    #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
    #get_novell_bug_via_xml function is a workaround for that situation
    get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml

    for entry in d['entries']:
        try:
            get_bug_function(entry['id'], mimetype, prefix, suffix)
        except KeyboardInterrupt:
            raise # Ctrl+C should work
        except:
            print(entry['id'] + " failed: " + str(sys.exc_info()[0]))
            pass

def get_through_rss_query(queryurl, mimetype, prefix, suffix):
    url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
    print('url is ' + url)
    get_through_rss_query_url(url, mimetype, prefix, suffix)

def get_launchpad_bugs(prefix):
    #launchpadlib python module is required to download launchpad attachments
    from launchpadlib.launchpad import Launchpad

    launchpad = Launchpad.login_anonymously("attachmentdownload", "production")
    ubuntu = launchpad.distributions["ubuntu"]

    #since searching bugs having attachments with specific mimetypes is not available in launchpad API
    #we're iterating over all bugs of the most interesting source packages
    for pkg in ["libreoffice", "openoffice.org", "abiword", "gnumeric", "koffice", "calligra"]:
        srcpkg = ubuntu.getSourcePackage(name=pkg)
        pkgbugs = srcpkg.searchTasks(status=["New", "Fix Committed", "Invalid", "Won't Fix", "Confirmed", "Triaged", "In Progress", "Incomplete", "Incomplete (with response)", "Incomplete (without response)", "Fix Released", "Opinion", "Expired"])

        for bugtask in pkgbugs:
            bug = bugtask.bug
            id = str(bug.id)
            print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50])
            attachmentid = 0
            for attachment in bug.attachments:
                attachmentid += 1
                handle = attachment.data.open()
                if not handle.content_type in mimetypes:
                    #print "skipping"
                    continue

                suffix = mimetypes[handle.content_type]
                if not os.path.isdir(suffix):
                    try:
                        os.mkdir(suffix)
                    except:
                        pass

                download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix

                if os.path.isfile(download):
                    print("assuming " + id + " is up to date")
                    break

                print('mimetype is ' + handle.content_type + ' downloading as ' + download)

                f = open(download, "w")
                f.write(handle.read())
                f.close()

freedesktop = 'http://bugs.libreoffice.org/buglist.cgi'
abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
openoffice = 'https://issues.apache.org/ooo/buglist.cgi'
redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'

#Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
#As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
#python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
#system is a nightmare
novellattach = 'https://bugzilla.novell.com/attachment.cgi?id='
novell = 'https://bugzilla.novell.com/buglist.cgi'

mimetypes = {
# ODF
    'application/vnd.oasis.opendocument.base': 'odb',
    'application/vnd.oasis.opendocument.database': 'odb',
    'application/vnd.oasis.opendocument.chart': 'odc',
    'application/vnd.oasis.opendocument.chart-template': 'otc',
    'application/vnd.oasis.opendocument.formula': 'odf',
    'application/vnd.oasis.opendocument.formula-template': 'otf',
    'application/vnd.oasis.opendocument.graphics': 'odg',
    'application/vnd.oasis.opendocument.graphics-template': 'otg',
    'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
    'application/vnd.oasis.opendocument.presentation': 'odp',
    'application/vnd.oasis.opendocument.presentation-template': 'otp',
    'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
    'application/vnd.oasis.opendocument.spreadsheet': 'ods',
    'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
    'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
    'application/vnd.oasis.opendocument.text': 'odt',
    'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
    'application/vnd.oasis.opendocument.text-master': 'odm',
    'application/vnd.oasis.opendocument.text-template': 'ott',
    'application/vnd.oasis.opendocument.text-web': 'oth',
# OOo XML
    'application/vnd.sun.xml.base': 'odb',
    'application/vnd.sun.xml.calc': 'sxc',
    'application/vnd.sun.xml.calc.template': 'stc',
    'application/vnd.sun.xml.chart': 'sxs',
    'application/vnd.sun.xml.draw': 'sxd',
    'application/vnd.sun.xml.draw.template': 'std',
    'application/vnd.sun.xml.impress': 'sxi',
    'application/vnd.sun.xml.impress.template': 'sti',
    'application/vnd.sun.xml.math': 'sxm',
    'application/vnd.sun.xml.writer': 'sxw',
    'application/vnd.sun.xml.writer.global': 'sxg',
    'application/vnd.sun.xml.writer.template': 'stw',
    'application/vnd.sun.xml.writer.web': 'stw',
# MSO
    'application/rtf': 'rtf',
    'text/rtf': 'rtf',
    'application/msword': 'doc',
    'application/vnd.ms-powerpoint': 'ppt',
    'application/vnd.ms-excel': 'xls',
    'application/vnd.ms-excel.sheet.binary.macroEnabled.12': 'xlsb',
    'application/vnd.ms-excel.sheet.macroEnabled.12': 'xlsm',
    'application/vnd.ms-excel.template.macroEnabled.12': 'xltm',
    'application/vnd.ms-powerpoint.presentation.macroEnabled.12': 'pptm',
    'application/vnd.ms-powerpoint.slide.macroEnabled.12': 'sldm',
    'application/vnd.ms-powerpoint.slideshow.macroEnabled.12': 'ppsm',
    'application/vnd.ms-powerpoint.template.macroEnabled.12': 'potm',
    'application/vnd.ms-word.document.macroEnabled.12': 'docm',
    'application/vnd.ms-word.template.macroEnabled.12': 'dotm',
    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
    'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
    'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
    'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
    'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
    'application/vnd.openxmlformats-officedocument.presentationml.slide': 'sldx',
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
    'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
    'application/vnd.visio': 'vsd',
    'application/vnd.visio.xml': 'vdx',
    'application/x-mspublisher': 'pub',
# W3C
    'application/xhtml+xml': 'xhtml',
    'application/mathml+xml': 'mml',
    'text/html': 'html',
    'application/docbook+xml': 'docbook',
# misc
    'text/csv': 'csv',
    'text/spreadsheet': 'slk',
    'application/vnd.corel-draw': 'cdr',
    'application/vnd.lotus-wordpro': 'lwp',
    'application/vnd.lotus-1-2-3': 'wks',
    'application/vnd.wordperfect': 'wpd',
    'application/wordperfect5.1': 'wpd',
    'application/vnd.ms-works': 'wps',
    'application/clarisworks' : 'cwk',
    'application/macwriteii' : 'mw',
    'application/vnd.apple.keynote': 'key',
    'application/x-iwork-keynote-sffkey': 'key',
    'application/x-hwp': 'hwp',
    'application/x-aportisdoc': 'pdb',
    'application/prs.plucker' : 'pdb_plucker',
    'application/vnd.palm' : 'pdb_palm',
    'application/x-sony-bbeb' : 'lrf',
    'application/x-pocket-word': 'psw',
    'application/x-t602': '602',
    'application/x-fictionbook+xml': 'fb2',
# binfilter
    'application/x-starcalc': 'sdc',
    'application/vnd.stardivision.calc': 'sdc5',
    'application/x-starchart': 'sds',
    'application/vnd.stardivision.chart': 'sds5',
    'application/x-stardraw': 'sdd_d',
    'application/vnd.stardivision.draw': 'sda5',
    'application/x-starimpress': 'sdd_i',
    'application/vnd.stardivision.impress': 'sdd5',
    'application/vnd.stardivision.impress-packed': 'sdp5',
    'application/x-starmath': 'smf',
    'application/vnd.stardivision.math': 'smf5',
    'application/x-starwriter': 'sdw',
    'application/vnd.stardivision.writer': 'sdw5',
    'application/vnd.stardivision.writer-global': 'sgl5',
# relatively uncommon image mimetypes
    'image/x-freehand': 'fh',
    'image/cgm': 'cgm',
    'image/tiff': 'tiff',
    'image/vnd.dxf': 'dxf',
    'image/x-emf': 'emf',
    'image/x-targa': 'tga',
    'image/x-sgf': 'sgf',
    'image/x-svm': 'svm',
    'image/x-wmf': 'wmf',
    'image/x-pict': 'pict',
    'image/x-cmx': 'cmx',
    'image/svg+xml': 'svg',
    'image/x-MS-bmp': 'bmp',
    'image/x-wpg': 'wpg',
    'image/x-eps': 'eps',
    'image/x-met': 'met',
    'image/x-portable-bitmap': 'pbm',
    'image/x-photo-cd': 'pcd',
    'image/x-pcx': 'pcx',
    'image/x-portable-graymap': 'pgm',
    'image/x-portable-pixmap': 'ppm',
    'image/vnd.adobe.photoshop': 'psd',
    'image/x-cmu-raster': 'ras',
    'image/x-xbitmap': 'xbm',
    'image/x-xpixmap': 'xpm',
}

# disabled for now, this would download gigs of pngs/jpegs...
common_noncore_mimetypes = {
# graphics
    'image/gif': 'gif',
    'image/jpeg': 'jpeg',
    'image/png': 'png',
# pdf, etc.
    'application/pdf': 'pdf',
}

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(freedesktop, mimetype, "fdo", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(openoffice, mimetype, "ooo", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(novell, mimetype, "novell", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(gnome, mimetype, "gnome", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(abisource, mimetype, "abi", extension)

for (mimetype,extension) in mimetypes.items():
    get_through_rss_query(kde, mimetype, "kde", extension)

try:
    get_launchpad_bugs("lp")
except ImportError:
    print("launchpadlib unavailable, skipping Ubuntu tracker")

# vim:set shiftwidth=4 softtabstop=4 expandtab:
