###############################################################################
# Local Security Check Automation Framework
#
# Authors:
# Veerendra GG <veerendragg@secpod.com>
#
# Revision 1.0
# Date: 2008/12/26
#
# Copyright:
# Copyright (c) 2009 SecPod , http://www.secpod.org
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# (or any later version), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
###############################################################################

import re
import os
import sys
import string

from common import utils


## Supported SUSE OSes for parsing. The value is as used in gather-package-list.nasl
## to set "ssh/login/release"
os_map = {

    'openSUSE 10.1' : 'openSUSE10.1',
    'openSUSE 10.2' : 'openSUSE10.2',
    'openSUSE 10.3' : 'openSUSE10.3',
    'openSUSE 11.0' : 'openSUSE11.0',
    'openSUSE 11.1' : 'openSUSE11.1',
    'openSUSE 11.2' : 'openSUSE11.2',

    'SUSE SLES 9' : 'SLES9.0',   # SUSE Linux Enterprise Server 9
    'SUSE SLES 10' : 'SLES10.0', # SUSE Linux Enterprise Server 10
    'SLES 11':'SLES11.0',        # SUSE Linux Enterprise Server 11


#    'SLE 11':'SLE11.0',         # New SUSE Linux Enterprise

#    'SUSE SLED 10': 'SLED10',
#    'SUSE LINUX 10.1' : 'SL10.1',
#    'Novell Linux POS 9' : 'NLPOS9',
#    'SLE SDK 10 SP1' : 'SLESDK10SP1',
#    'SLE SDK 10 SP2' : 'SLESDK10SP2',
#    'SLE SDK 10 SP3' : 'SLESDK10SP3', # New

#    'SUSE SLE 10 DEBUGINFO':'SLEDe10',

#    'Novell Linux Desktop 9' : 'NLDk9',
#    'Novell Linux Desktop 9 SDK' : 'NLDk9SDK',

#    'Open Enterprise Server' : 'OES',
#    'SuSE Linux Enterprise Server 8' : 'SLESSr8',
#    'SUSE Linux Enterprise Server 10 SP1' : 'LES10SP1',
#    'SUSE Linux Enterprise Server 10 SP2' : 'LES10SP2',
#    'SUSE Linux Enterprise Server 10 SP3' : 'LES10SP3', # New
#    'SUSE Linux Enterprise Desktop 10 SP1' : 'SLESDk10SP1',
#    'SUSE Linux Enterprise Desktop 10 SP2' : 'SLESDk10SP2',
#    'SUSE Linux Enterprise Desktop 10 SP3' : 'SLESDk10SP3', # New
#    'SUSE Linux Enterprise Server RT Solution 10' : 'SLESRTSol10',

#    'SUSE Linux Enterprise 10 SP2 DEBUGINFO' : 'SLEDe10SP2',
#    'SUSE Linux Enterprise 10 SP3 DEBUGINFO' : 'SLEDe10SP3', # New


}

## Strips these from strip list
strip_val = ['.i586.rpm', '.x86_64.rpm', '.noarch.rpm']

append_url = 'http://download.novell.com'

## These are not advisories
skip_list = ['_sr.htm', 'security_summary']


class Parser:
    """
    SUSE security advisory parser, parse and populate the global variables
    """

    ## Global parse structure, initializing
    AdvID = ''
    Description = ''
    Packages = {}
    CVEs = ''
    Name = ''
    Summary = ''
    Platforms = ''
    Product = []
    Html_content = ''
    XREF = []
    FileName = ''


    def fetchHTML(self, year, debug=0):
        """
        Retrive SUSE Advisories locally
        """

        try:
            url = self.main_url.strip('/') + '.html'
            file_name = os.path.join(self.html_cache, \
                        os.path.basename(url.strip()))
            tmpFile = utils.fetchFiles(url, file_name, debug)
            all_adv = re.findall('(linux/security/.*.html?).*'+year+'.*', \
                                utils.stringFormat(tmpFile))
            if not all_adv:
                print "ERROR: No Advisories are available for specified year"+\
                                                                    " : ", year

            for adv in all_adv:
                skip_adv = 0
                ## Skip SUSE Security Summary Report
                for i in skip_list:
                    if i in adv:
                        skip_adv = 1
                        if debug:
                            print "Skipped SUSE Security Advisories : ", adv
                        break
                if skip_adv:
                    continue

                url = self.main_url + os.path.basename(adv)
                file_name = os.path.join(self.html_cache, \
                            os.path.basename(url.strip()))

                if not os.path.isfile(file_name):
                    if debug:
                        print "Fetching SUSE Advisory..." + \
                                        os.path.basename(adv)
                    try:
                        utils.fetchFiles(url, file_name, debug)
                    except Exception, msg:
                        print 'ERROR: Error fething the url %s' % msg

        except Exception, msg:
            print "Exception in : suse -> Parser(Class) -> fetchHTML method()"
            sys.exit(msg)


    def _findAll(self, regex):
        """
        Returns Matched data
        """
        return regex.findall(self.Html_content, re.IGNORECASE)


    def getCVE(self, debug=0):
        """
        Returns CVE list
        """
        if debug:
            print "Getting CVE List..."

        cve_regex = re.compile('CVE-[0-9]+-[0-9]+')
        can_regex = re.compile('CAN-[0-9]+-[0-9]+')

        cve_list = self._findAll(cve_regex)
        cve_list.extend(self._findAll(can_regex))

        cve_list = utils.removeDups(cve_list)

        if cve_list:
            cve_list = '", "'.join(cve_list)
        else:
            cve_list = ''

        return cve_list


    def getAdvID(self, debug=0):
        """
        Returns SUSE Security Advisory ID
        """

        if debug:
            print "Getting Advisory ID..."

        adv_id_regex =  re.compile('Announcement ID.*(SUSE.*)')
        adv_id = self._findAll(adv_id_regex)

        if not adv_id:
            return ''

        return adv_id[0].strip()


    def getAffectedPackage(self, debug=0):
        """
        Returns Affected Packages/RPM's
        """

        if debug:
            print "Getting Affected Packages/RPM List..."

        pkg_regex =  re.compile("  Package:(.*)", re.IGNORECASE)
        pkg = self._findAll(pkg_regex)

        if pkg:
            pkg = pkg[0].strip()
        else:
            pkg = ''

        return pkg


    def getImpact(self, debug=0):
        """
        Returns Vulnerability Impact
        """

        if debug:
            print "Getting Vulnerability Impact..."

        impact_regex =  re.compile("Vulnerability Type: (.*)", re.IGNORECASE)
        impact = self._findAll(impact_regex)

        if impact:
            impact = impact[0].strip()
        else:
            impact = ''

        return impact


    def getDescription(self, debug=0):
        """
        Returns Vulnerability Description
        """

        if debug:
            print "Getting Vulnerability Description..."

        desc_regex = re.compile\
        ('(?s)1\) Problem Description and Brief Discussion(.*)2\) Solution',
                                                              re.IGNORECASE)
        description = self._findAll(desc_regex)

        if description:
            tmp = description[0].strip()
            description = ''
            for desc in tmp.split('\n'):
                desc = desc.strip()

                if "href=" in desc:
                    desc = re.sub('\(?<a href.*">', "", desc)
                    desc = re.sub("</a>?\)?", "", desc.strip())

                ## If blank line encounters, don't add spaces
                if desc:
                    description = description + "\n  " + desc
                else:
                    description = description + "\n" + desc

            description = description.replace('"'," &qt ")

        if not description:
            description = ''

        return description


    def getAffectedProduct(self, debug=0):
        """
        Returns Affected Product/Platform
        """

        ## Get Affected Product/Platform
        prod_list =  re.findall("(?s)Affected Products:(.*) Vulnerability Type"
                                                           , self.Html_content)
        prod_list = ''.join(prod_list).split('\n')

        ## Remove Dulipcate Product/Platform, If any
        prod_list = utils.removeDups(prod_list)
        if debug:
            print "Affected Products are : (%s)" %(prod_list)

        ## Don't include Product/Platform, If not in "os_map" Dict
        ref_list = []
        for i in prod_list:
            if os_map.has_key(i):
                ref_list.append(i)
            elif debug and i:
                  print "UPDATE: Not Generating Code for (%s) OS" %(i)
                  print "If Needed to generate code, then "+ \
                        "add into dict variable os_map in parser"
        if ref_list and debug:
            print "Generating Code for (%s) Products " %(ref_list)

        return ref_list


    def _getRPMDict(self, prod_key_dict, debug=0):
        """
        Return Dictionary containg OS and the corresponding RPM list
        """

        if debug:
            print "Started Executing _getRPMDict() Function..."

        if not prod_key_dict:
            return ''

        final_dict = {}
        rpm_found = False

        ## Get Dictionary containing Link and RPMs
        link_rpm_dict = self._cacheRPM(prod_key_dict, debug)

        ## Set rpm_found to True, If it finds RPMs in any of the links
        for i in link_rpm_dict.keys():
            if link_rpm_dict[i]:
                rpm_found = True
                break

        ## return if it din't find any RPMs
        if not rpm_found:
            if debug:
                print "ERROR: RPMs Not Found..."
            return ''

        ## Map 2 Dictionaries,
        ## 1)OS and URL List   2)URL and RPMs List
        for os in prod_key_dict.keys():
            os_rpm_list = []
            for url in prod_key_dict[os]:
                if link_rpm_dict.has_key(url):
                    link_rpm_dict[url] = utils.stripIt(link_rpm_dict[url],\
                                                                 strip_val)
                    for each_rpm in link_rpm_dict[url]:
                        if each_rpm not in os_rpm_list:
                            if debug:
                                print "Adding (%s) into (%s)" %(each_rpm, os)
                            os_rpm_list.append(each_rpm)

            ## Mapping for OS in os_map
            if os_map.has_key(os):
                os = os_map[os]
                ## Adding rpm list into the OS
                final_dict[os] = os_rpm_list
                if debug:
                    print "(%s) OS has (%s) RPMS" %(os, os_rpm_list)

        if debug:
            print "Existing _getRPMDict() Function..."

        return final_dict


    def _cacheRPM(self, prod_key_dict, debug=0):
        """
        Cache RPM list and Retuns Dictionary containg links and RPM list
        """

        visited_links_dict = {}
        if debug:
            print "Started Executing _cacheRPM() Function..."

        for each_os in prod_key_dict.keys():
            for main_url in prod_key_dict[each_os]:
                if debug:
                    print "Getting RPM list For %s:OS from %s link" \
                                                 %(each_os, main_url)
                ## Added RPM list, If URL Visited previously
                if main_url in visited_links_dict.keys():
                    if debug:
                        print "Cached Link : %s" %(main_url)
                    continue
                ## Get RPM list for OpenSUSE
                elif '.rpm' in main_url:
                    if debug:
                        print "Generating RPM list for openSUSE..."
                    rpm = os.path.basename(main_url)
                    if rpm:
                        visited_links_dict[main_url] = [rpm]
                    continue

                ## Get Links
                data = utils.getHTMLCon(main_url)
                sub_links = re.findall('(/Download\?buildid=.*~)"', data, \
                                                          re.IGNORECASE)
                ## Remove Duplicate Links
                sub_links = utils.removeDups(sub_links)
                if not sub_links:
                    continue

                ## Generate complete URL
                new_sub_links = []
                for i in sub_links:
                    new_sub_links.append(append_url + i)

                if debug:
                    print "Sub URL for URL : ", main_url
                    for i in new_sub_links:
                        print "Sub Links : ", i

                ## Visit each URL and Get the RPM list
                tmp_list = []
                rpm = []
                for each_link in new_sub_links:
                    if debug:
                        print "Visiting and Getting rpm list from : ",each_link
                    if each_link in tmp_list:
                        continue
                    else:
                        tmp_list.append(each_link)
                    data = utils.getHTMLCon(each_link)
                    if not data:
                        if debug:
                            print "ERROR: Din't find contents for : ",each_link
                        continue
                    ## Get RPMs
                    rpms = re.findall('.*.rpm', data)
                    if debug:
                        if not rpms:
                            print "ERROR: Din't find rpms for : ", each_link
                    if rpms:
                       tmp = re.sub('</td>','', rpms[0])
                       for i in tmp.split('<td>'):
                           ## Ignore for other platforms,
                           ## other then i586 and x86_64
                           if 'i586.rpm' in i or "x86_64.rpm" in i or \
                                                     'noarch.rpm' in i:
                               rpm.append(i)
                               if debug:
                                   print "Found RPM : ", i
                           elif debug and '.rpm' in i:
                               print "Found rpm Other then i586 or x86_64 RPM"\
                                                        +" or noarch.rpm:", i
                    if debug:
                        print "For Sub URL: %s \nFound RPMS : (%s)" \
                                                    %(each_link, rpm)
                if rpm:
                    rpm = utils.removeDups(rpm)
                    ## Cache Visited links and RPMs
                    visited_links_dict[main_url] = rpm
                    if debug:
                        print "For Main URL : %s\n Found All RPM : (%s)" \
                                                          %(main_url, rpm)

        if debug:
            print "Existing _cacheRPM() Function..."
        return visited_links_dict


    def _getOsLinkDict(self, prod_list, debug=0):
        """
        Returns Dictionary containg OS and Links
        """

        os_link_dict = {}

        ## Get Specific portion from the Data
        pkgData = self.Html_content[self.Html_content.find('4) Package'+\
                                         ' Location and Checksums\n\n'):]
        if pkgData == -1 and debug:
            print "Did not get Package description"

        if not (prod_list and pkgData):
            return {}

        for i in prod_list:
            key_list = []
            ## Get links of Open SUSE
            if 'openSUSE ' in i:
                if debug:
                    print "Getting links for openSUSE..."
                key = re.findall('update/' + i.split()[1] + \
                                 '/rpm/i586.*i586.rpm' ,self.Html_content)
                if key:
                    key_list.extend(key)
                key = re.findall('update/' + i.split()[1] + \
                                 '/rpm/x86_64.*x86_64.rpm' ,self.Html_content)
                if key:
                    key_list.extend(key)

                if key_list:
                    os_link_dict[i] = key_list
                continue

            ## Get links for other platforms
            searchKey = re.findall(r'(?s)'+ i + '(.*)a>', pkgData, \
                                                      re.IGNORECASE)
            searchKey = ''.join(searchKey)
            if not searchKey:
                continue
            lines = searchKey.replace('\n', '\r\n')
            lines = lines.split('\r')

            for j in lines:
                j = j.strip(' ')
                if j == '\n':
                    break

                if not 'href=' in j:
                    continue

                key = re.findall('<a href="(.*)">', j)
                key = ''.join(key)
                key = re.sub('&amp;', '&', key)
                if key:
                    if key not in key_list:
                        key_list.append(key)
                        if debug:
                            print "Getting (%s) link for (%s) OS" %(key, i)

            ## Create Dictionary containing OS and corresponding links
            if key_list:
                os_link_dict[i] = key_list
                if debug:
                    print "(%s) OS has " %(i)
                    for l in key_list:
                        print "%s Links" %(l)
        if debug:
            print "Final OS Links Dict", os_link_dict

        return os_link_dict


    def getRPM(self, prod_list,  debug=0):
        """
        Returns Affected Package Dictionary
        """

        if debug:
            print "Getting RPM List..."

        ## Get Final Dictionary containing OS and RPM list
        os_link_dict = self._getOsLinkDict(prod_list, debug)
        package_dict = self._getRPMDict(os_link_dict, debug)
        return package_dict


    def formatReference(self, main_url, file_name):
       """
       Constructs a reference for advisory
       """
       if not main_url.endswith('/'):
           main_url = main_url + '/'

       reference = main_url + file_name

       return reference


    def parser(self, html_content, debug=0):
        """
        Main parser function, builds the parser object
        by invoking parse functions
        """

        try:
            if debug:
                print "SUSE Parser Initiated..."

            self.Html_content = html_content.replace('\r\n', '\n')
            if (string.find(self.Html_content,'Solved') > -1 ):
                if debug:
                    print "Skipping Advisory..., it does not have Advisory ID"
                return False

            self.CVEs = self.getCVE(debug)

            self.Platforms = self.getAffectedProduct(debug)
            if not self.Platforms or self.Platforms == []:
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Packages = self.getRPM(self.Platforms, debug)
            if not self.Packages or self.Packages == '':
                if debug:
                    print "ERROR: Required Packages not found..."
                return False

            self.Impact = self.getImpact(debug)

            self.Description = self.getDescription(debug)
            if not self.Description or self.Description == '':
                if debug:
                    print "ERROR: Description not found..."
                return False

            self.AdvID = self.getAdvID(debug)
            if not self.AdvID or self.AdvID == '':
                if debug:
                    print "ERROR: Advisory ID not found..."
                return False

            self.Product = self.getAffectedPackage(debug)
            if not self.Product or self.Product == '':
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Platforms = ", ".join(self.Platforms)

            self.Summary = self.Product

            self.Name = self.Product + " " + self.AdvID

            ## Construct File Name
            (name, value1, value2) = self.AdvID.split(':')
            self.FileName = "_".join(['suse', value1 , value2])

            ## Set XREF
            self.XREF = [name, '-'.join([value1, value2])]

            if debug:
                print "All mandatory attributes are parsed: ", self.AdvID

            return True

        except Exception, msg:
            print 'Exception in Parser suse -> Parser -> parser() Method '
            sys.exit(msg)
