Source code for esg_search

import os
import zipfile
import logging
import shutil
import filecmp
import errno
import ConfigParser
import tarfile
import yaml
import requests
from clint.textui import progress
from git import Repo
from esgf_utilities import esg_functions
from esgf_utilities import pybash
from esgf_utilities import esg_property_manager
from esgf_utilities import esg_version_manager
from base import esg_tomcat_manager
import solr

logger = logging.getLogger("esgf_logger" +"."+ __name__)

with open(os.path.join(os.path.dirname(__file__), os.pardir, 'esg_config.yaml'), 'r') as config_file:
    config = yaml.load(config_file)

def write_search_service_install_log(search_web_service_dir, esg_search_version):
    esg_functions.write_to_install_manifest("webapp:esg-search", search_web_service_dir, esg_search_version)

    esg_property_manager.set_property("index_service_endpoint", "http://{}/esg-search/search".format(esg_functions.get_esgf_host()))
    esg_property_manager.set_property("index_service_app_home", search_web_service_dir)
    esg_property_manager.set_property("index_master_port", "8984")
    esg_property_manager.set_property("index_slave_port", "8983")
    esg_property_manager.set_property("index_timeout_connection", "2000")
    esg_property_manager.set_property("index_timeout_read_datasets", "10000")
    esg_property_manager.set_property("index_timeout_read_files", "60000")

    esg_property_manager.set_property("publishing_service_endpoint", "https://{}/esg-search/remote/secure/client-cert/hessian/publishingService".format(esg_functions.get_esgf_host()))
    esg_property_manager.set_property("publishing_service_app_home", search_web_service_dir)

    esg_property_manager.set_property("esgf_publisher_resources_home", config["esg_config_dir"])
    esg_property_manager.set_property("esgf_publisher_resources_repo", "https://github.com/ESGF/esgf-publisher-resources.git")

def setup_publisher_resources():
    esgf_publisher_resources_repo = "https://github.com/ESGF/esgf-publisher-resources.git"
    esgf_publisher_resources_dir = os.path.join(config["esg_config_dir"], "esgf-publisher-resources")
    if not os.path.exists(esgf_publisher_resources_dir):
        Repo.clone_from(esgf_publisher_resources_repo, esgf_publisher_resources_dir)


def search_startup_hook():
    print "esg-search startup hook"
    try:
        esg_property_manager.get_property("index_auto_fetch_pub_resources")
    except ConfigParser.NoOptionError:
        esg_property_manager.set_property("index_auto_fetch_pub_resources", "true")
        setup_publisher_resources()


#--------------------
# Lifecycle functions
#--------------------
def start_search_services():
    print "Starting search services..."
    config_facets_props_path = "{}/facets.properties".format(config["esg_config_dir"])
    esg_search_facets_props_path = "{}/webapps/esg-search/WEB-INF/classes/esg/search/config/facets.properties".format(config["tomcat_install_dir"])
    if not os.path.exists(config_facets_props_path) and os.path.exists(esg_search_facets_props_path):
        shutil.copyfile(esg_search_facets_props_path, config_facets_props_path)

    solr_shards = solr.read_shard_config()
    for config_type, port_number in solr_shards:
        solr.start_solr(config_type, port_number)

def stop_search_services():
    print "Stopping search services..."
    solr.stop_solr()

#---------------------------------------------------------
# Solr Search Service Setup and Configuration
#---------------------------------------------------------

def check_for_existing_solr_install():
    print "Checking for search service {}".format(config["esg_search_version"])
    try:
        installed_esg_search_version = esg_version_manager.get_current_webapp_version("esg-search")
    except IOError,error:
        if error.errno == errno.ENOENT:
            logger.info("No existing version of esg-search found.")

    if os.path.isdir("/usr/local/tomcat/webapps/esg-search"):
        try:
            esg_search_install = esg_property_manager.get_property("update.esg.search")
        except ConfigParser.NoOptionError:
            esg_search_install = raw_input("Existing esg-search installation found.  Do you want to continue with the esg-search installation [y/N]: ") or "no"
        if esg_search_install.lower() in ["no", "n"]:
            print "Using existing esg-search installation. Skipping setup."
            return True
        else:
            try:
                backup_esg_search = esg_property_manager.get_property("backup.esg.search")
            except ConfigParser.NoOptionError:
                backup_esg_search = raw_input("Do you want to make a back up of the existing distribution?? [Y/n] ") or "y"
            if backup_esg_search.lower in ["y", "yes"]:
                esg_functions.backup("/usr/local/tomcat/webapps/esg-search")

def download_esg_search(esg_search_version, esg_dist_url):
    pybash.mkdir_p(config["workdir"])
    with pybash.pushd(config["workdir"]):
        search_service_dist_url = "{}/esg-search/esg-search-{}.tar.gz".format(esg_dist_url, esg_search_version)
        search_service_dist_file = "esg-search-{}.tar.gz".format(esg_search_version)
        esg_functions.download_update(search_service_dist_file, search_service_dist_url)

        #Extract in workdir to get esg-search.war file
        try:
            esg_functions.extract_tarball(search_service_dist_file)
        except tarfile.TarError:
            raise

def copy_esg_search_war_to_tomcat(esg_search_version, search_web_service_dir, search_service_war_file):
    search_service_dist_dir = "esg-search-{}".format(esg_search_version)
    with pybash.pushd(os.path.join(config["workdir"],search_service_dist_dir)):
        esg_tomcat_manager.stop_tomcat()

        pybash.mkdir_p(search_web_service_dir)
        shutil.copyfile(search_service_war_file, os.path.join(search_web_service_dir, search_service_war_file))


def extract_esg_search_war(search_web_service_dir, search_service_war_file):
    with pybash.pushd(search_web_service_dir):
        print "Expanding war {search_service_war_file} in {pwd}".format(search_service_war_file=search_service_war_file, pwd=os.getcwd())
        with zipfile.ZipFile("/usr/local/tomcat/webapps/esg-search/esg-search.war", 'r') as zf:
            zf.extractall()
        os.remove("esg-search.war")

def update_solr_cores_schema(search_web_service_dir):
    print "Checking for Solr schema update"
    new_solr_xml = "{}/WEB-INF/solr-home/mycore/conf/schema.xml".format(search_web_service_dir)

    #The values are the solr cores
    solr_shards = {"master-8984": ["datasets", "files", "aggregations"], "localhost-8982": ["datasets", "files", "aggregations"]}

    for shard, cores in solr_shards.items():
        for core in cores:
            old_solr_xml = "/usr/local/solr-home/{shard}/{core}/conf/schema.xml".format(shard=shard, core=core)
            if os.path.exists(old_solr_xml):
                if filecmp.cmp(old_solr_xml, new_solr_xml):
                    print "Files: {old_solr_xml}, {new_solr_xml} are identical, not upgrading".format(old_solr_xml=old_solr_xml, new_solr_xml=new_solr_xml)
                else:
                    print "Copying {new_solr_xml} -> {old_solr_xml}".format(old_solr_xml=old_solr_xml, new_solr_xml=new_solr_xml)
                    shutil.copyfile(new_solr_xml, old_solr_xml)

[docs]def setup_search_service():
    '''
    Install The Search Service...
    - Takes boolean arg: 0 = setup / install mode (default)
                         1 = updated mode

    In setup mode it is an idempotent install (default)
    In update mode it will always pull down latest after archiving old'''

    if check_for_existing_solr_install():
        return

    print "*******************************"
    print "Setting up The ESGF Search Service..."
    print "*******************************"
    search_web_service_dir = "/usr/local/tomcat/webapps/esg-search"
    search_service_war_file = "esg-search.war"
    esg_dist_url = esg_property_manager.get_property("esg.dist.url")

    download_esg_search(config["esg_search_version"], esg_dist_url)

    copy_esg_search_war_to_tomcat(config["esg_search_version"], search_web_service_dir, search_service_war_file)
    extract_esg_search_war(search_web_service_dir, search_service_war_file)
    update_solr_cores_schema(search_web_service_dir)

    TOMCAT_USER_ID = esg_functions.get_user_id("tomcat")
    TOMCAT_GROUP_ID = esg_functions.get_group_id("tomcat")

    esg_functions.change_ownership_recursive(search_web_service_dir, TOMCAT_USER_ID, TOMCAT_GROUP_ID)

    write_search_service_install_log(search_web_service_dir, config["esg_search_version"])
    write_search_rss_properties()
    setup_publisher_resources()

    #Get utility script for crawling thredds sites
    fetch_crawl_launcher()
    fetch_index_optimization_launcher()
    fetch_static_shards_file()


def write_search_rss_properties():
    node_short_name = esg_property_manager.get_property("node.short.name")
    esgf_feed_datasets_title = node_short_name + " RSS"
    esgf_feed_datasets_desc = "Datasets Accessible from node: {}".format(node_short_name)
    esgf_feed_datasets_link = "http://{}/thredds/catalog.html".format(esg_functions.get_esgf_host())

    esg_property_manager.set_property("esgf_feed_datasets_title", esgf_feed_datasets_title)
    esg_property_manager.set_property("esgf_feed_datasets_desc", esgf_feed_datasets_desc)
    esg_property_manager.set_property("esgf_feed_datasets_link", esgf_feed_datasets_link)


def fetch_crawl_launcher():
    esgf_crawl_launcher = os.path.join(config["scripts_dir"], "esgf-crawl")
    current_directory = os.path.join(os.path.dirname(__file__))
    shutil.copyfile(os.path.join(current_directory, "search_scripts/esgf-crawl"), esgf_crawl_launcher)
    os.chmod(esgf_crawl_launcher, 0755)

def fetch_index_optimization_launcher():
    esgf_index_optimization_launcher = os.path.join(config["scripts_dir"], "esgf-optimize-index")
    current_directory = os.path.join(os.path.dirname(__file__))
    shutil.copyfile(os.path.join(current_directory, "search_scripts/esgf-optimize-index"), esgf_index_optimization_launcher)
    os.chmod(esgf_index_optimization_launcher, 0755)

def fetch_static_shards_file():
    with pybash.pushd(config["esg_config_dir"]):
        static_file = "esgf_shards_static.xml"
        url = "{}/xml/{}".format(config["esgf_config_repo"], static_file)
        esg_functions.download_update(static_file, url)


def download_esg_search_war(esg_search_war_url):
    print "\n*******************************"
    print "Downloading ESG Search war file"
    print "******************************* \n"

    r = requests.get(esg_search_war_url, stream=True)
    path = '/usr/local/tomcat/webapps/esg-search/esg-search.war'
    with open(path, 'wb') as f:
        total_length = int(r.headers.get('content-length'))
        for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1):
            if chunk:
                f.write(chunk)
                f.flush()


def main():
    print "*******************************"
    print "Setting up The ESGF Search Sub-Project..."
    print "*******************************"
    setup_search_service()

if __name__ == '__main__':
    main()