#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4
#
# Copyright (c) 2014, 2015, 2016 Samsung Electronics.Co.Ltd.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; version 2 of the License
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
import os
import re
import sys
import time
import requests
import logging
import subprocess
from bs4 import BeautifulSoup
from xml.etree.ElementTree import fromstring
#from gevent.monkey import patch_all; patch_all()
#from gevent.pool import Pool


name, manifest_url, git_cache_home, regex = os.getenv('PROFILE').split(';')

gerrit_username = os.getenv('GERRIT_USERNAME')
gerrit_hostname = os.getenv('GERRIT_HOSTNAME')
gerrit_sshport = os.getenv('GERRIT_SSHPORT')
workspace = os.getenv('WORKSPACE')


def init_logger():
    logger = logging.getLogger()
    console_handler = logging.StreamHandler()
    logger.addHandler(console_handler)
    logger.setLevel(logging.DEBUG)


def init_git_cache_home():
    if not os.path.exists(git_cache_home):
        os.mkdir(git_cache_home)


def init():
    init_git_cache_home()
    init_logger()


def clone_all_projects(projects):
    # blocking
    for project in projects:
        clone_project(project)

    # gevent
    #pool = Pool(8)
    #pool.map(clone_project, projects)


def clone_project(project):
    """docstring for clone_project"""

    gerrit_path, revision = project
    gerrit_url = 'ssh://{0}@{1}:{2}/{3}'.format(gerrit_username,
                                                gerrit_hostname,
                                                gerrit_sshport,
                                                gerrit_path)

    git_cache_dir = '{0}/{1}'.format(git_cache_home, gerrit_path)
    logging.debug(gerrit_path)

    cmd = 'git clone {0} {1}'.format(gerrit_url, git_cache_dir)
    # if cache exists, just fetch
    if os.path.exists(git_cache_dir):
        cmd = 'git fetch {0}'.format(git_cache_dir)
        os.chdir(git_cache_dir)

    logging.debug(cmd)
    subprocess.call(cmd, shell=True)

    # checkout revision
    os.chdir(git_cache_dir)
    subprocess.call('git checkout {0}'.format(revision).split())
    os.chdir(workspace)


def get_text_from_url(url, timeout=10, max_retry=10):
    """docstring for get_text_from_url"""

    for loop in range(max_retry):
        try:
            r = requests.get(url,
                             timeout=timeout)
            if r.status_code == 200:
                break
            time.sleep(0.5)
        except requests.exceptions.Timeout as e:
            logging.debug(e)
            continue
        except requests.exceptions.ConnectionError as e:
            logging.debug(e)
            continue
        except Exception as e:
            logging.debug(e)
            raise Exception('Can\'t access url : {0}'.format(url))
    else:
        raise Exception('Can\'t access url : {0}'.format(url))
    return r.text


def get_manifests(manifest_url):
    """docstring for get_manifests"""
    if not manifest_url.endswith('/'):
        manifest_url += '/'
    logging.debug(manifest_url)

    text = get_text_from_url(manifest_url)
    soup = BeautifulSoup(text, 'html.parser')

    file_urls = []
    for loop in soup.findAll('a', attrs={'href': re.compile('xml$')}):
        file_urls.append('{0}{1}'.format(manifest_url, loop['href']))

    return file_urls


def get_gerrit_prj_list(manifests):
    """docstring for get_gerrit_prj_list"""

    result = []
    for loop in manifests:
        text = get_text_from_url(loop)
        tree = fromstring(text)
        logging.debug('total found : {}'.format(len(tree.findall('project'))))
        for project in tree.findall('project'):
            if project.attrib['path'] and project.attrib['revision']:
                result.append((project.attrib['path'],
                               project.attrib['revision']))

    result = list(set(result))
    logging.debug('total projects : {0}'.format(len(result)))
    return result


def find_all_incorrect_filenames(projects,
                                 regex='"(.*@.*|.*\.\.\..*)"',
                                 output_filename='incorrect_filenames.csv'):
    """docstring for find_all_incorect_filenames"""
    # for all projects
    for loop in projects:
        gerrit_path, revision = loop
        git_cache_dir = '{0}/{1}'.format(git_cache_home, gerrit_path)

        logging.debug(git_cache_dir)
        os.chdir(git_cache_dir)
        cmd = 'find . -regextype posix-extended -regex {0}'.format(regex)
        outs = subprocess.check_output(cmd, shell=True)
        os.chdir(workspace)
        if outs:
            logging.debug('write {0} for {1}'.format(output_filename,
                                                     gerrit_path))
            with open(output_filename, 'a+') as f:
                output = '\n'.join(['{0},{1}'.format(gerrit_path, out[2:])
                                    for out in outs.split()])
                f.writelines(output)
                f.write('\n')


def main():
    """docstring for main"""

    init()

    start = time.time()

    # get manifest xml files
    manifests = get_manifests(manifest_url)
    # get project/commit list
    projects = get_gerrit_prj_list(manifests)

    # clone all projects
    clone_all_projects(projects)

    # find all incorect file names
    find_all_incorrect_filenames(projects, regex, '{}.csv'.format(name))

    end = time.time() - start
    print('Elapsed time : {0}'.format(end))


if __name__ == '__main__':
    try:
        sys.exit(main())
    except KeyboardInterrupt as e:
        logging.debug(e)
        sys.exit(1)
    except Exception as e:
        logging.debug(e)
        sys.exit(1)
