# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_pandas.ipynb.

# %% auto 0
__all__ = ['simple_dataframe_from_stata', 'better_dataframe_from_stata', 'better_pdataframe_from_data',
           'better_pdataframe_from_frame']

# %% ../nbs/03_pandas.ipynb 3
from .config import launch_stata
from .helpers import *
from .utils import *
import pandas as pd
import numpy as np

# %% ../nbs/03_pandas.ipynb 5
def simple_dataframe_from_stata(stfr, var, valuelabel, missingval):
    from pystata import stata
    if stfr is None:
        df = stata.pdataframe_from_data(var=var, valuelabel=valuelabel, missingval=missingval)
    else:
        df = stata.pdataframe_from_frame(stfr, var=var, valuelabel=valuelabel, missingval=missingval)
    df.index += 1
    return df

# %% ../nbs/03_pandas.ipynb 7
def _better_dataframe(hdl, var, obs, selectvar, valuelabel, missingval):
    with IndexVar() as idx_var:
        data = hdl.getAsDict(var, obs, selectvar, valuelabel, missingval)
        if not data:
            return pd.DataFrame()
    
        if idx_var in data:
            idx = data.pop(idx_var)
        else:
            temp_var = [idx_var, selectvar] if selectvar else idx_var
            idx = hdl.getAsDict(temp_var, obs, selectvar, valuelabel, missingval).pop(idx_var)
        idx = pd.array(idx, dtype='int64')

        return pd.DataFrame(data=data, index=idx)

# %% ../nbs/03_pandas.ipynb 16
def _var_from_varlist(varlist, stfr):
    from pystata import stata
    if stfr:
        var_code = varlist.strip()
    else:
        _program_name = "temp_nbstata_varlist_name"
        try:
            var_code = diverted_stata_output(f"""\
                program define {_program_name}
                    syntax [varlist(default=none)]
                    disp "`varlist'"
                end
                return add
                {_program_name} {varlist}
                program drop {_program_name}
                """).strip()
        except SystemError as e:
            stata.run(f"capture program drop {_program_name}", quietly=True)
            raise(e)
    return [c.strip() for c in var_code.split() if c] if var_code else None

# %% ../nbs/03_pandas.ipynb 20
def better_dataframe_from_stata(stfr, varlist, obs, selectvar, valuelabel, missingval, sformat):
    import sfi, pystata
    hdl = sfi.Data if stfr is None else sfi.Frame.connect(stfr)
    var = _var_from_varlist(varlist, stfr)
    custom_index_not_needed = obs is None and not selectvar
    if custom_index_not_needed:
        df = simple_dataframe_from_stata(stfr, var, valuelabel, missingval)
    else:
        if hdl.getObsTotal() <= 0:
            return pd.DataFrame()
        df = _better_dataframe(hdl, var, obs, selectvar, valuelabel, missingval)
    if sformat:
        for v in list(df.columns):
            if hdl.isVarTypeString(v) or (valuelabel and missingval==np.NaN
                                          and pd.api.types.is_string_dtype(df[v])):
                continue
            v_format = hdl.getVarFormat(v)
            if missingval != np.NaN and pd.api.types.is_string_dtype(df[v]):
                def format_value(x):
                    return sfi.SFIToolkit.formatValue(x, v_format) if type(x)!=str else x
            else:
                def format_value(x):
                    return sfi.SFIToolkit.formatValue(x, v_format)
            df[v] = df[v].apply(format_value)
    return df

# %% ../nbs/03_pandas.ipynb 21
def better_pdataframe_from_data(varlist="", obs=None, selectvar=None, valuelabel=False, missingval=np.NaN, sformat=False):
    import pystata
    pystata.config.check_initialized()
    return better_dataframe_from_stata(None, varlist, obs, selectvar, valuelabel, missingval, sformat)

# %% ../nbs/03_pandas.ipynb 22
def better_pdataframe_from_frame(stfr, varlist="", obs=None, selectvar=None, valuelabel=False, missingval=np.NaN, sformat=False):
    import pystata
    pystata.config.check_initialized()
    return better_dataframe_from_stata(stfr, varlist, obs, selectvar, valuelabel, missingval, sformat)
