#!/usr/bin/env python
"""
Fix the missing dataset parentage by dates specified.

1. Looking for the dataset which has a parent datasets
2. Check any of the blocks has missing parents.
3. Search for file parents by comparing lumis from the parent datasets on those blocks.
4. Insert the parentage to DBS

options

set the dbs instance (default is testbed)

-p --production: look for the DBS production instance,
-t --testbed: look for the testbed instance.

dataset look up options,

(you can search for the missing parentage by giving the start date and end date)
It will look all the datasets from given data period and try to find the missing parentage for those datasets

-s --start: start date of dataset creation (day/month/year i.e. 1/1/2017)
-e --end: end date of dataset creation (day/month/year i.e. 1/2/2017)

or
-d --dataset: specific child dataset you need to fix.

insert options (when missing parentage is found you can specify to insert to DBS, default is --no-insert)

--insert: insert to the specified dbs instance (production or testbed)
--no-insert: not inserting the data - only for the testing purpose

Usage:

Uses block parentage to fix the problem (faster checks block parentage first to fix the missing parentage.)
If some files has missing parents but some files has parents in the same block, this call doesn't fix those files parentage.
In that case use -f option (slow)

$manage execute-agent fix-dbs-parentage -s 1/1/2017 -e 1/2/2017 -p --insert
$manage execute-agent fix-dbs-parentage -p --insert -d /SingleMuon/Run2016D-03Feb2017-v1/MINIAOD

"""

from builtins import input
import time
import datetime
import argparse
import logging
import sys

from WMCore.Services.DBS.DBS3Reader import DBS3Reader
from dbs.apis.dbsClient import DbsApi

def updateParentageInfo(datasetList, insertFlag=False, logger=logger):

    missingParentsFailed = {}
    start = int(time.time())

    if insertFlag:
        answer = input("fixing DBS parentage: Are you sure?('y'/'n') - needs ' single quote: ").lower()
        if answer != 'y':
            logger.info("Aborting the procedure, Nothing updated")
            exit(1)
    else:
        logger.info("Only testing perfomance: No insert")

    for dataset in datasetList:
        if isinstance(dataset, dict):
            dsName = dataset['dataset']
        else:
            dsName = dataset

        failedBlocks = dbsService.fixMissingParentageDatasets(dsName, insertFlag=insertFlag)

        missingParentsFailed[dsName] = failedBlocks
    return missingParentsFailed


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description="Fix DBS file parentage using run/lumi relations")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-d', '--dataset', dest='dataset')
    group.add_argument('-s', '--start', help='start date for dataset search DD/MM/YYYY format')
    parser.add_argument('-e', '--end', help='end date for dataset search DD/MM/YYYY format')

    group2 = parser.add_mutually_exclusive_group()
    group2.add_argument('-p', '--production', dest='prod', action='store_true')
    group2.add_argument('-t','--testbed', dest='prod', action='store_false')
    parser.set_defaults(prod=False)

    group3 = parser.add_mutually_exclusive_group()
    group3.add_argument('--insert', dest='insert', action='store_true')
    group3.add_argument('--no-insert', dest='insert', action='store_false')
    parser.set_defaults(insert=False)

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logger = logging.getLogger()

    if args.start:
        start = time.mktime(datetime.datetime.strptime(args.start, "%d/%m/%Y").timetuple())
        endTime = args.end if args.end else args.start
        end = time.mktime(datetime.datetime.strptime(args.end, "%d/%m/%Y").timetuple())

    if args.prod:
        dbsURL = 'https://cmsweb-prod.cern.ch/dbs/prod/global/DBSWriter'
        logger.info("Using production: %s", dbsURL)
    else:
        dbsURL = 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSWriter'
        logger.info("Using testbed: %s", dbsURL)

    dbs = DbsApi(dbsURL)
    dbsService = DBS3Reader(dbsURL, logger=logger)

    if args.dataset:
        datasetList = [args.dataset]
    else:
        datasetList = dbs.listDatasets(min_cdate=int(start), max_cdate=int(end))

    logger.info("Number of datasets to check: %s", len(datasetList))

    startTime = int(time.time())
    failedBlocks = updateParentageInfo(datasetList, insertFlag=args.insert, logger=logger)
    endTime = int(time.time())

    for dataset in failedBlocks:
        if failedBlocks[dataset]:
            logger.info("Update failed blocks: dataset: %s, blocks %s", dataset, failedBlocks[dataset])
    logger.info("Total Time: %s", endTime - startTime)