Generating Opacity Tables from Abundance Patterns

This script automates the process of generating opacity tables from specified abundance patterns. It queries the Los Alamos database to get ATOMIC opacities generated with the TOPS code based on the given abundance pattern. The script parses and formats the resulting data into tables that are compatible with the Dartmouth Stellar Evolution Program (DSEP). It involves various operations such as fetching data from the web, parsing files, and complex mathematical computations to optimize and convert data into the required format.
# generateTOPSTables.py
# Febuary 2021
# Thomas M. Boudreaux
#
#
# Given an abundance pattern generate opacity tables in a form that DSEP can 
#  undersntand. These will be automatically queried from the Los Alamos
#  cite, using the most recent ATOMIC opacities generated with the TOPS code.
#
# Los Alamos Cite: https://aphysics2.lanl.gov/apps/
# Reference: Colgan, James, et al. "A new generation of Los Alamos opacity tables." The Astrophysical Journal 817.2 (2016): 116.
 
import argparse
import shutil
import os
import subprocess
import numpy as np
import mechanize
from bs4 import BeautifulSoup
import re
from scipy import interpolate
from tqdm import tqdm
from scipy.optimize import minimize, newton
from scipy.interpolate import RegularGridInterpolator, interp2d
from scipy.interpolate import RectBivariateSpline
import pickle
from datetime import date
from typing import Callable, Iterator
from collections.abc import Iterable
 
TOPS_URL = "https://aphysics2.lanl.gov/apps/"
GN = r"\d+(?:\.\d+)?(?:[E|e][\+|-]\d+)?"
CHEMSYM = "[A-Z](?:[a-z])?(?:\s+)?=\s+"
 
FRACP = re.compile(r"\n(?:\s+)?{a}{b}(?:\s+)?\+/-\s+{b}".format(a=CHEMSYM, b=GN))
ZP = re.compile(r"sumZ\s+=\s+{a}".format(a=GN))
ZXP = re.compile(r"Z/X\s+=\s+{a}".format(a=GN))
 
ELEMIDENT = np.dtype([('ChemSym', '<U11'), ('Val', 'f')])
COMPLDTYPE = np.dtype([("ChemSym", "<U11"), ('massFrac', 'f'),
                       ('numFrac', 'f'), ('ZmassFrac', 'f'),
                       ('massFracErr', 'f'), ('numFracErr', 'f'),
                       ('ZMassFracErr', 'f')])
 
# Bounds of the OPAL formated tables, format is 
# {'filler': [(row, number to be filled)]}
OUTOFBOUNDS = {np.nan:
                [(57, -1), (58, -2), (59, -2), (60, -3), (61, -3), (62, -3),
                 (63, -3), (64, -4), (65, -4), (66, -4), (67, -4), (68, -4),
                 (69, -5)]}
 
 
def abundance_optimization_target(
        I0: Iterator,
        f: Callable[[float, float, float], np.ndarray],
        X: float,
        Z: float
        ) -> float:
    """abundance_optimization_target
 
    Given a function, f, which converts from Fe/H, alpha/H, and a(He) to
        X, Y, and Z, find the similarity between the output of that function
        and a target X, Y, and Z. This is done by finding the magnitude
        of the cross product of the two vectors in X, Y, and Z space.
 
    Positional Arguments:
        I0 -- (list) list containing intial guesses for [Fe/H, alpha/H, a(He)]
        f  -- (function) function which maps Fe/H, alpha/H, and a(He) to X, Y, and Z
        X  -- (float) Hydrogen Mass Fraction
        Z  -- (float) Metal Mass Fraction
 
    Returns:
        (float) |<X,Y,Z> cross f(Fe/H, alpha/H, a(He))|
    """
    return np.linalg.norm(np.cross(np.array([X, 1-X-Z, Z]), f((I0[0], 0.0, I0[1]))))
 
def call_num_frac(
        prog: str,
        name: str,
        feh: float,
        alpha: float,
        Y: float,
        force: bool=False,
        abunTable: str=None
        ) -> int:
    """call_num_frac
 
    Given a executable which takes four arguments through standard input and
        writes a file to disk containing the number fraction and mass fractions
        of elements given Fe/H, alpha/H, a(He), run that program and pass those
        arguments to that program. Note that program must take arguments in
        order of filename, [Fe/H], [alpha/H], a(He).
 
    Positional Arguments:
        prog  -- (string) path to executable program to run
        name  -- (string) file name to save executable output as
        feh   -- (float) [Fe/H] value to pass to program. If abunTable is
                         set this is a tuple of intial, final, number.
        alpha -- (float) [alpha/H] value to pass to program. If abunTable is
                         this is a tuple of initial, final, number.
        Y     -- (float) a(He) value to pass to program. If abunTable is set
                         this is a tuple of initial, final, number.
    Keyword Arguments:
        force     -- (bool) overwride already existing output from exetctable
                            with name = name
        abunTable -- (string) table to parse abundances from. If this is set
                              then the numFrac executable is a little different
                              It takes 12 command line arguments which are
                              tablePath, outputPath, [Fe/H]_0, [Fe/H]_f,
                              [Fe/H]_num, [Alpha/H]_0, [Alpha/H]_f,
                              [Alpha/H]_num, a(He)_0, a(He)_f, and a(He)_num.
                              This program takes NO input from stdin.
    Returns:
        (int) UNIX status code from executable
    """
    if os.path.exists(name):
        if force:
            os.remove(name)
        else:
            raise OSError(f"File {name} exists, you can force this file to be"
                           "overwritten with the -f/--force flag")
 
    if not abunTable:
        # spin the procees up as a subprocess and connect to its input. We are
        #  going to ignore its output and just let that write to the standard
        #  output of the user terminal.
        numFrac = subprocess.Popen(prog, stdin=subprocess.PIPE)
        stdin = numFrac.stdin
 
        # Send the four arguments through.
        stdin.write(f"{name}\n".encode('utf-8'))
        stdin.write(f"{feh}\n".encode('utf-8'))
        stdin.write(f"{alpha}\n".encode('utf-8'))
        stdin.write(f"{Y}\n".encode('utf-8'))
 
        stdin.close()
    else:
        if not isinstance(feh, tuple):
            feh = (feh, feh, 1)
        if not isinstance(alpha, tuple):
            alpha = (alpha, alpha, 1)
        if not isinstance(Y, tuple):
            Y = (Y, Y, 1)
        numFrac = subprocess.Popen([prog, abunTable,  name, str(feh[0]),
                                    str(feh[1]), str(feh[2]), str(alpha[0]),
                                    str(alpha[1]),str(alpha[2]), str(Y[0]),
                                    str(Y[1]), str(Y[2])])
 
    # Grab the status code.
    status = numFrac.wait()
 
    return status
 
 
def comp_list_2_dict(
        compList: Iterator
        ) -> dict:
    """comp_list_2_dict
 
    Take a list containing compsoition information for a star in the form of
        [('Element Symbol', massFraction, numberFraction),...] and convert that
        into a dictionary of the form:
        {'Element Symbol': (massFraction, numberFraction),..}.
 
    Positional Arguments:
        compList -- (Iterator) list of the form:
                               [('Element', massFrac, numberFrac),...]
 
    Returns:
        (dict) Dictionary of the form {'Element':(massFrac, numberFrac),...}
    """
    compDict = dict()
    for element, *info in compList:
        compDict[element] = (info[0],
                             info[1],
                             info[2],
                             info[3],
                             info[4],
                             info[5])
 
    # Set Potassium to zero because the programs I am using do not
    #  save a potasium mass fraction but the header needs it so 
    #  I'm just setting it to zero so that there is an entry in the dict
    #  for it. 
    compDict['K'] = (0.0,0.0,0.0,0.0,0.0,0.0)
    return compDict
 
def parse_abundance_file(
        filePath: str,
        big: bool=False
        ) -> (list, float, float, float):
    """parse_abundance_file
 
    Given a file generated by the executable used in call_num_frac parse that
        file into a usable form. This includes the hydrogen, helium, and
        metall mass fractions. And a list in the form of
        [('Element', massFrac, numberFrac),...]
 
    Positional Arguments:
        filePath -- (string) path to file generated by executable called in
                             call_num_frac
    Keyword Arguments:
         big -- (bool) single composition file or one file with many
                       composition
 
    Returns:
        (list,float,float,float) tuple containg a list of the composition in
        the form [('Element',massFrac,numberFrac),...] along with the Hydrogen,
        Helium, and Metal mass fractions.
 
    """
    with open(filePath) as f:
        contents = f.read()
    # All extractions are done with regex. If There are multiple contents in
    #  the same file that is okay. Lines should be formaed like:
    # Metadata line (one per abundance)
    #  sumZ =  1.09103E-02+/- 8.112E-04    Z/X =  1.10307E-02+/- 8.201E-04
    # Mass/Number Fraction Line (one each per abundance per element)
    #  Cr =    0.00117221 +/-   0.00E+0.0
    fracs = re.findall(FRACP, contents)
    sumZ_raw = re.findall(ZP, contents)
    ZX_raw = re.findall(ZXP, contents)
 
    gridDim = int(np.ceil(len(sumZ_raw)**(1/3)))
 
    Z = np.array([float(re.findall(f"{GN}", x)[0]) for x in sumZ_raw])
    ZX = np.array([float(re.findall(f"{GN}", x)[0]) for x in ZX_raw])
    X = Z/ZX
    Y = 1 - Z - X
 
    # Split the strings so that they are formated as three numbers
    #  seperated by spaces (remove = and +/-)
    # I have hard coded the positions of the different sections in the
    #  file for now
    # TODO: Programatically determine postion of sections of the file
    #       to that they are not hardcoded and can be more easily adapted
    rawNumFrac = [re.sub('(?:=|\+\/-)', ' ', x) for x in fracs[:23]]
    rawMassFrac = [re.sub('(?:=|\+\/-)', ' ', x) for x in fracs[23:2*23]]
    rawZMassFrac = [re.sub('(?:=|\+\/-)', ' ', x) for x in fracs[2*23:2*23+21]]
 
 
    # Format these as (ElementSymbol, Value, ValueUncertantiy)
    numFrac = [(element.split()[0],
                float(element.split()[1]),
                float(element.split()[2]))
                for element in rawNumFrac]
    massFrac = [(element.split()[0],
                float(element.split()[1]),
                float(element.split()[2]))
                for element in rawMassFrac]
    ZMassFrac = [(element.split()[0],
                float(element.split()[1]),
                float(element.split()[2]))
                for element in rawZMassFrac]
 
    # These arrays need to be the same length
    #  so add H and He into the metal one so that 
    #  the length is the same
    ZMassFrac.insert(0, ('H', 0.00, 0.00))
    ZMassFrac.insert(0, ('He', 0.00, 0.00))
 
    compList = np.array([(x[0], x[1], y[1], z[1], x[2], y[2], z[2])
                for x, y, z in
                zip(massFrac, numFrac, ZMassFrac)],
                dtype=COMPLDTYPE)
    compList = compList.reshape(gridDim, gridDim, gridDim, 23)
    X = X.reshape(gridDim, gridDim, gridDim)
    Y = Y.reshape(gridDim, gridDim, gridDim)
    Z = Z.reshape(gridDim, gridDim, gridDim)
 
    if not big:
        return compList[0, 0, 0], X[0, 0, 0], Y[0, 0, 0], Z[0, 0, 0]
    else:
        return compList, X, Y, Z
 
 
def format_TOPS_string(
        compList: list
        ) -> str:
    """format_TOPS_string
 
    Format the composition list from pasrse_abundance_file into a string in the
        form that the TOPS web form expects for a mass fraction input.
 
    Positional Arguments:
        compList -- (list) composition list in the form of:
                           [('Element', massFrac, numFrac),...]
 
    Returns:
        (string) string in the form of:
                 "massFrac0 Element0 massFrac1 Element1 ..."
 
    """
    TOPS_abundance_string = ' '.join([f"{x[1]:0.10f} {str(x[0])}"
                                      for x in compList])
    return TOPS_abundance_string
 
def submit_TOPS_form(
        mixString: str,
        mixName:str,
        massFrac: bool=True
        ) -> bytes:
    """submit_TOPS_form
 
    Open the Los Alamos opacity website, submit a given composition and then
        return the resultant table.
 
    Positional Arguments:
        mixString -- (string) string in the form of:
                              "massFrac0 Element0 massFrac1 Element1 ..." which
                              will be sumbittedi in the webform for mixture
        mixName   -- (string) name to be used in the webform
    Keyword Arguments:
        massFrac  -- (bool)[True] Submit as massFrac instead of numberFrac
 
    Returns:
        (bytes) Table quired from TOPS cite.
 
    """
    br = mechanize.Browser()
    br.open(TOPS_URL)
    br.select_form(nr=0)
    if massFrac:
        br.form.find_control(name="fractype").value = ['mass']
    br.form['mixture'] = mixString
    br.form['mixname'] = mixName
 
    tlow = br.find_control(name="tlow", type="select").get("0.0005")
    tlow.selected = True
    tup = br.find_control(name="tup", type="select").get("60")
    tup.selected = True
 
 
    # These are the lowest and highest densities DSEP should need based
    #  on R = rho/T_6^3
    br.form['rlow'] = "1.77827941e-15"
    br.form['rup'] = "1.25892541e+09"
    br.form['nr'] = '100'
 
 
    # get to the first submission page
    r1 = br.submit()
 
    # get to the second submission page
    br.select_form(nr=0)
    r2 = br.submit()
 
    tableHTML = r2.read()
    br.close()
    return tableHTML
 
def parse_table(
        html: bytes
        ) -> str:
    """parse_table
 
    Parse the bytes table returned from mechanize into a string
 
    Positional Arguments:
        html -- (bytes) bytes table retuend from mechanize bowser at second
                        TOPS submission form
 
    Returns:
        (string) parsed html soruce in the form of a string
    """
    soup = BeautifulSoup(html, 'html.parser')
 
    # deal with line breaks
    table = soup.find('code').prettify().replace('<br/>', '')
    table = table.split('\n')
 
    # cut out the top and bottom lines of the table which dont matter
    table = [x for x in table[1:-2] if x.lstrip().rstrip() != '']
 
    # recombine the table into one string
    table = '\n'.join(table)
    return table
 
def validate_extant_tables(
        path: str,
        prefix: str
        ) -> bool:
    """validate_extant_tables
 
    Check if there is a quiried table from TOPS for every number frac file
        generated by the program passed to call_num_frac.
 
    Positional Arguments:
        path   -- (string) Path to where the results of the number frac and TOPS
                           query files are stored
        prefix -- (string) start prefix given to all abundance / number frac
                           files
 
    Returns:
        (bool) Whether or not all number frac files have a corresponding TOPS
               opacity table
    """
    files = os.listdir(path)
    abundanceFiles = list(filter(lambda x: prefix in x, files))
    opacityTables = list(filter(lambda x: 'OP:' in x, files))
 
    # parse the file name in the right way
    # Its easier to read if this is outside the list comprehension
    parser = lambda x: f"OP:{x.split('_')[-1].split('.')[0]}"
 
    validated = all(map(lambda x: any([parser(x) in y for y in opacityTables]),
                        abundanceFiles))
    return validated
 
def get_target_log_T():
    """get_target_log_T
 
    Get the ndarray for the LogT values that DSEP expects
 
    Returns:
        (ndarray) Array of LogT values expected by DSEP in opacity table
 
    """
    # The spacing on these is non-uniform so make the 4 sections then merge
    #  them together
    targetLogR = np.arange(-8.0, 1.5, 0.5)
    targetLogT_A = np.arange(3.75, 6.0, 0.05)
    targetLogT_B = np.arange(6.0, 8.1, 0.1)
    targetLogT_C = np.arange(8.1, 8.8, 0.2)
    targetLogT = np.concatenate((
        targetLogT_A,
        targetLogT_B,
        targetLogT_C
        ), axis=None)
    return targetLogT
 
def parse_RMO_TOPS_table_file(
        TOPSTable: str,
        n: int=100
        ) -> (np.ndarray, np.ndarray, np.ndarray):
    """parse_RMO_TOPS_table_file
 
    Given the path to a file queried from the TOPS webform put it into a
        computer usable form of 3 arrays. One array of mass density, one of
        LogT and one of log Rossland Mean Opacity
 
    Positional Arguments:
        TOPSTable -- (string) Path to file queried from TOPS webform
 
    Keyword Arguments:
        n -- (int)[100] size of density grid used in TOPS query form
 
    Returns:
        (ndarray[n], ndarray[m], ndarray[m,n]) tuple of ndarrays. First is mass
                                               density, second is Log T, and
                                               third is the Log of the Rossland
                                               Mean Opacity.
    """
    with open(TOPSTable) as f:
        # TOPS returns a table with non breaking spaces (\u00A0). These break
        #  pythons regex so replace all of these with tabs at read in time
        contents = f.read().replace("\u00A0", "\t")
 
    # This regular expression locates n tables in the TOPS return file. If 
    #  the header format of those tables chanegs this regex will also have
    #  to change.
    tgroups = re.findall(r"(Rosseland\s+and\s+Planck\s+opacities\s+and\s+"
                          "free\s+electrons\s+Density\s+Ross\s+opa\s+Planck\s+"
                          "opa\s+No\.\s+Free\s+Av\s+Sq\s+Free\s+(T=\s+\d+\.?\d+"
                          "E[-|+]\d+)\s+((\d\.\d+E[-|+]\d+"
                          "(\s+)?){{5}}\n?){{{}}})".format(n), contents)
 
    # convert from keV in TOPS to K in DSEP
    temperatures = np.array([float(x[1].split()[1])*11604525.0061657
                            for x in tgroups])
    subTables = [x[0].split('\n')[2:] for x in tgroups]
    subTables = [[[float(z) for z in y.split()]
                  for y in x
                  if y.rstrip() != '']
                 for x in subTables]
    subTables = np.array(subTables)
    rho = np.array([x for x in subTables[:, :, 0]])[0]
    RMO = subTables[:, :, 1]
    LogT = np.log10(temperatures)
    logRMO = np.log10(RMO)
 
    OPALTableInit = np.zeros(shape=(rho.shape[0], LogT.shape[0]))
 
    # This transposes the table basically
    for i, T in enumerate(LogT):
        OPALTableInit[:, i] = logRMO[i]
 
    return rho, LogT, OPALTableInit
 
def convert_rho_2_LogR(
        rho: np.ndarray,
        LogT: np.ndarray,
        RMO: np.ndarray
        ) -> (np.ndarray, np.ndarray, np.ndarray):
    """convert_rho_2_LogR
 
    Maps a given kappa(rho,logT) parameter space onto a kappa(LogR, LogT) field
        through interpolation. The final field is the field that DSEP needs.
 
    Positional Arguments:
        rho  -- (ndarray) mass density array of size n
        LogT -- (ndarray) LogT array of size m
        RMO  -- (ndarray) Opacity Array of size m x n
 
 
    Returns:
        (ndarray[19], ndarray[70], ndarray[70,19]) Tuple of LogR array, LogT
                                                   array and opacity array.
                                                   these are in the sizes that
                                                   DSEP expects.
    """
    # build a function that can interpolate log opacity on a mass density log
    #  temperature grid
    kappaFunc = interp2d(LogT, rho, RMO, kind='cubic')
 
    # mapping from T, R to rho
    rhof = lambda T, R: R*((T*1e-6)**3)
 
    # The target LogR an LogT grids which we would like to use
    targetLogT = get_target_log_T()
    targetLogR = np.arange(-8.0, 1.5, 0.5)
 
    # The array to be filled with the converted opaicities
    opacity = np.zeros(shape=(targetLogT.shape[0], targetLogR.shape[0]))
 
    # This function maps the vector function kappaFunc to a scaler function
    #  so that for a given target R and temperature array you get out an 
    #  1D array not a 2D array. There is probably a more elegant way of doing
    #  this. However, theis seems to work well enough and is not slow persay.
    opacityF = lambda x, RTarg: kappaFunc(x, rhof(10**x, 10**RTarg))
 
    for i, logR in enumerate(targetLogR):
        # vectorize it so I can call it. However, this can only take a function
        #  of one argument so use an anoymous function to hide the second
        #  argument
        f = np.vectorize(lambda x: opacityF(x, logR))
        opacity[:, i] = f(targetLogT)
 
    return targetLogR, targetLogT, opacity
 
 
def extract_composition_path(
        path: str
        ) -> (float, float, float):
    """extract_composition_path
 
    Given the name of a TOPS return file (named in the format OP:n_X_Y_Z.dat)
        extract X, Y, and Z
 
    Positional Arguments:
        path -- (string) path to TOPS return file
 
    Returns:
        (float, float, float) tuple of Hydrogen, Helium, and Metal mass fraction
    """
    filename = os.path.basename(path)
    filename = filename.strip('.dat')
    filenameParts = filename.split('_')
    X = float(filenameParts[1])
    Y = float(filenameParts[2])
    Z = float(filenameParts[3])
    return X, Y, Z
 
def format_TOPS_to_OPAL(
        TOPSTable: str,
        comp: tuple,
        tnum: int,
        upperNonRect: np.ndarray=None
        ) -> (str, float, float, float, str):
    """format_TOPS_to_OPAL
 
    Take the path to a table queried from the TOPS web form and fully convert it
        into a table which can be directly read by DSEP. (Note this function
        does not write anything to disk; however, the return products can be
        written to disk)
 
    Positional Arguments:
        TOPSTable -- (string) path to table queried from TOPS web form
        comp      -- (dict) composition dictionary
        tnum      -- (int) table number
 
    Returns:
        (string, float, float, float, string) -- tuple of line metadata for
                                                   table
                                                 Hydrogen metallicity for table
                                                 Helium metallicity for table
                                                 Opacity table
    """
    # Parse the raw TOPS data into a usable form
    rhoInit, logInit, OPALTableInit = parse_RMO_TOPS_table_file(TOPSTable)
 
    # Interpolate raw TOPS data onto the LogR LogT grid DSEP expects
    LogR, LogT, LogRMO = convert_rho_2_LogR(rhoInit, logInit, OPALTableInit)
 
    X, Y, Z = extract_composition_path(TOPSTable)
 
 
    metaLine, fullTable = format_opal_comp_table(LogR,
                                                 LogT,
                                                 LogRMO,
                                                 tnum,
                                                 upperNonRect=upperNonRect)
 
    return metaLine, X, Y, Z, fullTable, LogR, LogT, LogRMO
 
def format_opal_comp_table(
        LogR: np.ndarray,
        LogT: np.ndarray,
        LogRMO: np.ndarray,
        TNUM: int,
        comp: dict = None,
        upperNonRect: np.ndarray =None
        ) -> (str, str):
    """format_opal_comp_table
 
    Take in all the information from a given TOPS tables and format it
        to the proper format for DSEP to undersand. Leave in some placeholders
        so that in future table can be labeld as the proper number.
 
    Positional Argumnets:
        LogR -- (ndarray) The Log R value array (horizontal axis of table)
        LogT -- (ndarray) The Log Temperature value array (vertical axis)
        LogRMO -- (ndarray) all of the RMO values associated with R and T
        TNUM -- (int) Table number
 
    Keyword Argument:
        comp -- (dict) composition dictionary. If not provided placeholders
                        are left in place so that it may be filled later on
        upperNonRect -- (ndarray) array describing how to fill the top of the
                                  table non rectangurally
 
    Returns:
        metaLine -- (str) header line for each table, may or may not have
                          placeholders in it
        fullTable -- (str) full table to be places in opacity file
 
    """
    # place holder empty string
    ph = ''
    if not isinstance(comp, Iterable):
        metaLine = (f" TABLE #TNUMNSPACES{date.today().strftime('%Y%m%d')}0001"
                    f"TSPACESX=TARGX Y=TARGY Z=TARGZ"
                     " dX1=0.0000 dX2=0.0000")
    else:
        metaLine = (f" TABLE #TNUMNSPACES{date.today().strftime('%Y%m%d')}0001"
                    f"TSPACESX={abs(comp[0]):0.4f} Y={abs(comp[1]):0.4f}"
                    f" Z={abs(comp[2]):0.4f} dX1=0.0000 dX2=0.0000")
    logRHeader = ' '.join(["{:>6.1f}".format(round(x, 1)) for x in LogR])
 
    # Build the metadataline for the table, including the date this code was run
    titleLine = f"{ph:>50}log R"
    headerLine = f"logT{logRHeader}"
 
    dataLines = list()
    LogRMONonRect = LogRMO.copy()
    # only make the table non rectangular if asked
    #  the reason for not always doing this is that 
    #  this function is called twice, the first time
    #  to transfer the tables from TOPS diretly
    #  those transfered tables are then used to interpolate
    #  we want rectangulat tables for that interpolation.
    if hasattr(upperNonRect, "__iter__"):
        # cut data out from the bottom right of the table per DSEP
        for filler in OUTOFBOUNDS:
            for row in OUTOFBOUNDS[filler]:
                start = row[1]
                replacement = np.repeat(filler, abs(row[1]))
                LogRMONonRect[row[0]][start:] = replacement
        # cut data out from the top right of the table per DSEP
        subNRE = upperNonRect[upperNonRect[:, 0] == TNUM]
        for nre in subNRE:
            if nre[2] != 0.0:
                replacement = np.repeat(9.999, int(nre[2]))
                LogRMONonRect[int(nre[1])][:int(nre[2])] = replacement
 
    for line, temperature in zip(LogRMONonRect, LogT):
        # Cut max value to the same as DSEP's max value (9.99) any values bigger
        #  would cause errors
        adjustedLine = line[~np.isnan(line)]
        # add opacity values in line rounded off to the nearest thousands
        joinedLine = ' '.join(["{:>6.3f}".format(round(x, 3))
                                for x in adjustedLine])
        dataLines.append(f"{temperature:0.2f} {joinedLine}")
    dataTable = '\n'.join(dataLines)
    fullTable = '\n'.join([metaLine,
                           '',
                           titleLine,
                           '',
                           headerLine,
                           '',
                           dataTable,
                           ''])
    return metaLine, fullTable
 
def format_OPAL_header(
        compDict: dict
        ) -> str:
    """format_OPAL_header
 
    Writes the header of the opacity table that DSEP expects. This is written
        to be the same length (and basically the same contents) of the header
        from the OPACITY project. Not sure if that is required; however, if so
        I am matching it.
 
    Positional Arguments:
        compDict -- (dict) dictionary in the form:
                           {'Element': (massFrac, numFrac), ...} used to fill
                           up the header with composition information. This is
                           meant to be the "solar" compositon of whatever mix
                           you are using so... [Fe/H] = 0.0, [alpha/H] = 0.0,
                           a(He) = 10.93
 
    Returns:
        (string) The Header to be prepended to the opacity table file
 
    """
    headerBaseString = f""" Updated TOPS Rosseland mean opacity tables   (date {date.today().strftime('%Y%m%d')})
 Description of modifications given in Iglesias & Rogers, ApJ464,943(1996)
 
  lines 1-60 contain brief table description
  lines 61-240 contain table summaries
  lines 241-end contain tables. Each table is 77 lines long
 
 NOTE: File is compatible with interpolation routines.
 
 Definitions:
 
 The logarithm of the Rosseland mean opacity [cm**2/g] as a function
 of log(T) for columns of constant log(R), where
 
    R=density[g/cm**3]/T6**3, T6=1.e-6*T[degrees]
    log(T) range: 70 values from 3.75 to 8.70
    log(R) range: 19 values from -8.0 to +1.0
 
 NOTE: Tables are NOT rectangular
       values=9.999 or blanks are out of table range
 
 Composition parameters:
   X   = Hydrogen mass fraction
   Y   = Helium mass fraction
   Z   = Metal mass fraction
   dX1 is not relevant for Type 1 tables
   dX2 is not relevant for Type 1 tables
 
 Abundances for {date.today().strftime('%Y%m%d')}0001     composition: GS98 mix (N.Grevesse & A.J.Sauval 1998, Space Sci. Rev. 85, 161)"""
 
    # Note that the num frac programs I'm using don't contain potassium so I manually set that to 0 abouve
    #  in the function which builds compDict.
    # I think I need to propery add the mass fractions into this
    #  I thught this was just a header but it seems that data is actually
    #  being read from here
    AbuncenceSummary = f""" Element   Abundance - relative metal: number fraction   mass fraction   atomic mass
   H     log(A)=                                                           1.00790
   He    log(A)=                                                           4.00260
   Li    log(A)=
   Be    log(A)=
   B     log(A)=
   C     log(A)=        -----------------  {compDict['C'][1]:0.6f}        {compDict['C'][2]:0.6f}       12.01100
   N     log(A)=        -----------------  {compDict['N'][1]:0.6f}        {compDict['N'][2]:0.6f}       14.00670
   O     log(A)=        -----------------  {compDict['O'][1]:0.6f}        {compDict['O'][2]:0.6f}       15.99940
   F     log(A)=
   Ne    log(A)=        -----------------  {compDict['Ne'][1]:0.6f}        {compDict['Ne'][2]:0.6f}       20.17900
   Na    log(A)=        -----------------  {compDict['Na'][1]:0.6f}        {compDict['Na'][2]:0.6f}       22.98977
   Mg    log(A)=        -----------------  {compDict['Mg'][1]:0.6f}        {compDict['Mg'][2]:0.6f}       24.30500
   Al    log(A)=        -----------------  {compDict['Al'][1]:0.6f}        {compDict['Al'][2]:0.6f}       26.98154
   Si    log(A)=        -----------------  {compDict['Si'][1]:0.6f}        {compDict['Si'][2]:0.6f}       28.08550
   P     log(A)=        -----------------  {compDict['P'][1]:0.6f}        {compDict['P'][2]:0.6f}       30.97376
   S     log(A)=        -----------------  {compDict['S'][1]:0.6f}        {compDict['S'][2]:0.6f}       32.06000
   Cl    log(A)=        -----------------  {compDict['Cl'][1]:0.6f}        {compDict['Cl'][2]:0.6f}       35.45300
   Ar    log(A)=        -----------------  {compDict['Ar'][1]:0.6f}        {compDict['Ar'][2]:0.6f}       39.94800
   K     log(A)=        -----------------  {compDict['K'][1]:0.6f}        {compDict['K'][2]:0.6f}       39.09830
   Ca    log(A)=        -----------------  {compDict['Ca'][1]:0.6f}        {compDict['Ca'][2]:0.6f}       40.08000
   Sc    log(A)=
   Ti    log(A)=        -----------------  {compDict['Ti'][1]:0.6f}        {compDict['Ti'][2]:0.6f}       47.90000
   V     log(A)=
   Cr    log(A)=        -----------------  {compDict['Cr'][1]:0.6f}        {compDict['Cr'][2]:0.6f}       51.99600
   Mn    log(A)=        -----------------  {compDict['Mn'][1]:0.6f}        {compDict['Mn'][2]:0.6f}       54.93800
   Fe    log(A)=        -----------------  {compDict['Fe'][1]:0.6f}        {compDict['Fe'][2]:0.6f}       55.84700
   Co    log(A)=
   Ni    log(A)=        -----------------  {compDict['Ni'][1]:0.6f}        {compDict['Ni'][2]:0.6f}       58.70000"""
    return '\n'.join([headerBaseString, '', AbuncenceSummary, ''])
 
 
 
def format_OPAL_table(
        tableDict: dict,
        compDict: dict
        ) -> str:
    """format_OPAL_table
 
    Given a dictionary of tables and a composition Dictionary for solar
        composition in a given mixture (AGSS08, GS98, etc...) merge all the
        information together into a string which can be written to disk and
        would be the format of an opacoty project table (what DSEP expects)
 
    Positional Arguments:
        tableDict -- (dict) dictionary of table elements, containing a "Summary"
                            entry (metadata) and a "Table" entry. All
                            occcurences of the the string "TNUM" will be
                            replaced with the index+1 of where that table
                            occurs in the file.
        compDict -- (dict) dictionary in the form:
                           {'Element': (massFrac, numFrac), ...} used to fill
                           up the header with composition information. This is
                           meant to be the "solar" compositon of whatever mix
                           you are using so... [Fe/H] = 0.0, [alpha/H] = 0.0,
                           a(He) = 10.93
 
    Returns:
        (string) Opacity Project formated table as a string which can be written
                 to disk
 
    """
    ph = '' # place holder empty string to be used when adding arbitrary spaces
    header = format_OPAL_header(compDict)
    # The space at the start of this (and the table summary lines) is
    #  important as that seems to be how opal95.f determines comment lines
    summaryHeader = f" Table Summaries- There are {len(tableDict)} tables"
 
    # Put together the summary list of table number and corresponding 
    #  compesition at the top of the file
    summaries = map(lambda x: x[1]['Summary'].replace('TNUM', f"{x[0]+1:>3}"),
                    enumerate(tableDict))
    summaries = map(lambda x: x.replace('NSPACES', f"{ph:<2}"), summaries)
    summaries = map(lambda x: x.replace('TSPACES', f"{ph:<6}"), summaries)
    summary = '\n'.join(list(summaries))
 
    # In the current opacity files there are 50 lines of blank space, emulate
    #  that
    interStage = '\n'.join([' ']*51)
    tableStart = ("************************************ Tables ****************"
                  "********************")
    # merge the actual tables together (note these contain metadata and table
    #  information, but that has already been put in the right order on a
    #  table-by-table basis.
    # The [1:] is to remove the space at the start of the line which is used
    #  to mark the summary lines as comments.
    tables = map(lambda x: x[1]['Table'][1:].replace('TNUM', f"{x[0]+1:>3}"),
                 enumerate(tableDict))
    tables = map(lambda x: x.replace('NSPACES', f"{ph:<5}"), tables)
    tables = map(lambda x: x.replace('TSPACES', f"{ph:<7}"), tables)
    tablesFull = '\n'.join(list(tables))
    # Connect all theses sections together seperated by new lines
    OPALFormatted = '\n'.join([header,
                      summaryHeader,
                      '',
                      summary,
                      interStage,
                      tableStart,
                      '',
                      tablesFull])
 
 
    return OPALFormatted
 
 
def validate_composition(
        numberFractions: Iterator,
        tolerence: float=2.0001
        ) -> bool:
    """validate_composition
 
    Confirm that the total number fractions of elements all add up to 1 within
        some tolernce
 
    Positional Arguments:
        numberFractions -- (Iterator) list of number fractions of all elements
                                      in the form of (anyValue, numFrac)
    Keyword Arguments:
        tolerence       -- (float)[0.0001] tolerence to confirm too
 
    Returns:
        (bool) True if the number fractions add up to 1 within the tolerance,
               false otherwise
    """
    total = 0
    for e in numberFractions:
        total += e[1]
    return (not abs(total-1) > tolerence, total)
 
def convert_XYZ_FeHAlphaHaHe(
        f: Callable[[float, float, float], np.ndarray],
        X: float,
        Z: float
        ) -> np.ndarray:
    """convert_XYZ_FeAlphaAaHe
 
    Given some X, Z and a function which maps that onto [Fe/H], [alpha/H], a(He)
        find the corresponding [Fe/H], [alpha/H], a(He) by minizing the
        magnitude of the cross product of those two vectors
 
    Positional Argument:
        f -- (function) function which maps Fe/H, alpha/H, and a(He) to X, Y,
                        and Z
        X -- (float) Hydrogren mass fraction target
        Z -- (float) Metal mass fraction target
 
    Returns:
        (ndarray) ndarray of optimal parameters given f to match the target mass
                 fractions, formated like [[Fe/H], [alpha/H], a(He)]
    """
    # Initial guess is solar composition
    # Bounds are set 0.05 units in from the grid size used below when finding
    #  the mapping funciton f to prevent out of bounds errors when exploring
    #  the parameter space.
    res = minimize(abundance_optimization_target,
            [0.0, 10.93],
            args=(f, X, Z),
            bounds=[(-2.95, 2.95), (5.0, 16.75)]
            )
    return res.x
 
def interp_table_to_composition(formatedTables, pContents):
    Xcomps = np.array(list(map(lambda x: x['X'], formatedTables)))
    Zcomps = np.array(list(map(lambda x: x['Z'], formatedTables)))
    KappaT = np.array(list(map(lambda x: x['LogRMO'], formatedTables)))
 
    interpolatedRMO = np.zeros_like(KappaT)
 
    Xtarg = pContents[:, 0]
    Ztarg = pContents[:, 2]
 
    # interpolate a function for each grid point in LogR and LogT (1330
    #  functions on the DSEP grids) as a function of X and Z. Then use
    #  the target X and Z that dsep requires to find the correct opacities.
    for row in range(KappaT.shape[1]):
        for column in range(KappaT.shape[2]):
            data = KappaT[:, row, column]
 
            # need to make this an image because of a bug in 
            #  scipy interpolate 2D where for strict coordinate
            #  interpolation the number of free parameters is not
            #  properly constrained. This is equivilent mathematically
            #  and just takes a little longer to run (but works)
            dataImg = np.tile(data, (Xcomps.shape[0], 1))
 
            interpFunc = interp2d(Xcomps, Zcomps, dataImg)
 
            # the diagonals are the points in f(Xtarg[i], Ztarg[j]) where i=j
            #  this is what we want because we really are doing a strict 
            #  coordinate interpolation
            interpolatedRMO[:, row, column] = np.diag(interpFunc(Xtarg, Ztarg))
 
    return interpolatedRMO
 
def parse_abundance_map(path):
    with open(path, 'r') as f:
        contents = f.read().split('\n')
        pContents = np.array([[float(y) for y in x.split(',')]
                               for x in contents
                              if x != '' and x.lstrip()[0] != '#'])
    return pContents
 
def rebuild_formated_tables(formatedTables, interpRMO, pContents, upperNonRect):
    for tnum, (table, RMO, comp) in enumerate(zip(formatedTables, interpRMO, pContents)):
        table["X"] = comp[0]
        table["Y"] = comp[1]
        table["Z"] = comp[2]
 
        metaLine, fullTable = format_opal_comp_table(table['LogR'],
                                                     table['LogT'],
                                                     RMO,
                                                     tnum,
                                                     comp=comp,
                                                     upperNonRect=upperNonRect)
        table["Summary"] = metaLine
        table["Table"] = fullTable
        table["LogRMO"] = RMO
    return formatedTables
 
 
if __name__ == "__main__":
    # Command line arguments are extensive, but pretty well documented by
    #  themselves. Run the code with the --help flag to see this documentation
    #  from the command line
    parser = argparse.ArgumentParser(description="Generate opacity table in a"
                                    " format dsep can work with from TOPS")
    parser.add_argument("numFracProgram", help="path to program to generate"
                        " Number and Mass fractions", type=str)
    parser.add_argument("abundanceMap", help="Path to CSV list of X,Y,Z"
                        " required by DSEP", type=str)
    parser.add_argument("-p", "--prefix", help="Abundance file prefix",
                        default="TOPSA", type=str)
    parser.add_argument("-f", "--force", help="force the generation of new"
                        " abunance tables", action="store_true")
    parser.add_argument("-d", "--outputDirectory", help="directory to save"
                        " abundance files too", default=".", type=str)
    parser.add_argument("--opal", help="Run the code to convert TOPS table to"
                        "OPAL compatible tables", action="store_true")
    parser.add_argument("--nofetch", help="do not fetch opacity tables from"
                        " TOPS", action='store_true')
    parser.add_argument("-o", "--output", help="file to write OPAL formated"
                        " table to", default="TOPAL.dat", type=str)
    parser.add_argument("--interpF", help="Pickeled Interpolation File to go"
                        " between [Fe/H], [alpha/H], a(H) and X, Y, Z",
                        type=str)
    parser.add_argument("--abunTable", type=str, help="Table to pull abundances"
                        " from. If set numFracProgram must point to executable"
                        " expedciting table input")
    parser.add_argument("--gridDim", type=int, help="Grid dimension to use"
                        " when generating the enhancmnet -> abundence table.",
                        default=50)
    parser.add_argument("--inputDir", type=str, help="Directory to save input"
                        " file to (such as interpolation function)",
                        default=".")
    parser.add_argument("--nonRectMap", type=str, help="Map of appropriate non"
                        " rectangular file", default=None)
 
    args = parser.parse_args()
 
    # An composition to massFrac function is needed to run the code; however,
    #  they can take a really long time to make. Therefore, you can provide a 
    #  filepath to a pickled function from an earlier run and it will use that.
    #  if you don't provide that path it will first generate a function, and
    #  then save it automatically so that any future run may make use of that
    #  function
    if args.interpF:
        MetalAbunMap = pickle.load(open(args.interpF, 'rb'))
    else:
        # This run numfrac gridDim^3 times for a variety of compositions,
        #  at each it finds the X, Y, and Z values and uses these to build
        #  a 3D interpolated vector field function which can take X, Y, and Z, 
        #  and return [Fe/H], [alpha/H], and a(He) 
        A = np.empty(shape=(args.gridDim, args.gridDim, args.gridDim, 3))
        FeH = np.linspace(-3.0, 3.0, args.gridDim)
        AlphaH = np.linspace(-3.0, 3.0, args.gridDim)
        aHe = np.linspace(5.0, 17.0, args.gridDim)
        if not args.abunTable:
            saveName = "{}2XYZ.p".format(args.numFracProgram.split('/')[-1])
            for i, feh in enumerate(FeH):
                for j, alphah in enumerate(AlphaH):
                    for k, ahe in enumerate(aHe):
                        # this callibrate.tmp file is to store the output of one
                        #  run, it will be overwritten each time and eventually
                        #  deleted to clean up
                        status = call_num_frac(args.numFracProgram,
                                "calibrate.tmp", feh, alphah, ahe, force=True)
                        _, oX, oY, oZ = parse_abundance_file("calibrate.tmp")
                        A[i, j, k] = np.array([oX, oY, oZ])
        else:
            saveName = f"{args.abunTable.split('/')[-1].split('.')[0]}2XYZ.p"
            FeH_tup = (FeH.min(), FeH.max(), FeH.shape[0])
            AlphaH_tup = (AlphaH.min(), AlphaH.max(), AlphaH.shape[0])
            aHe_tup = (aHe.min(), aHe.max(), aHe.shape[0])
 
            status = call_num_frac(args.numFracProgram, "calibrate.tmp",
                                   FeH_tup, AlphaH_tup, aHe_tup, force=True,
                                   abunTable=args.abunTable)
            _, oX, oY, oZ = parse_abundance_file("calibrate.tmp", big=True)
            A[:, :, :, 0] = oX
            A[:, :, :, 1] = oY
            A[:, :, :, 2] = oZ
 
        os.remove("calibrate.tmp")
        MetalAbunMap = RegularGridInterpolator((FeH, AlphaH, aHe), A)
 
        # force this to save to disk so we don't have to do this every time
        pickle.dump(MetalAbunMap,
                    open(os.path.join(args.inputDir, saveName), 'wb'))
 
 
    # you can set nofetch so you avoid the TOPS web query part if that was
    #  already done and you just want to reparse into a DSEP formated table
    #  already queried files.
    if not args.nofetch:
        # Some defined excetions
        # if the directory exists, and the force flag is set, but the TOPS
        #  files have not been fully quiered go aheead and allow the code to
        #  continue buy removing the direcotory and starting over
        if (
                os.path.exists(args.outputDirectory)
                and args.force
                and not validate_extant_tables(args.outputDirectory,
                                               args.prefix)
           ):
            shutil.rmtree(args.outputDirectory)
        # If the directory exists an the force flag is set and all the TOPS
        #  files have been quired prevent the code from running. This is to
        #  keep the traffic on the tops site down. If you need to run again
        #  manually delete the folder. i.e. no way to programtically
        #  overwride this
        if (
                os.path.exists(args.outputDirectory)
                and args.force
                and validate_extant_tables(args.outputDirectory, args.prefix)
           ):
            raise OSError(f"Cannot force override of {args.outputDirectory} as"
                            " it has been validated as complete")
        # If the directory exists, and is not validated, but the force flag is
        #  not set alert the user the directory will not be overwritten if the
        #  force flag is not set
        elif (
                not args.force
                and os.path.exists(args.outputDirectory)
                and not validate_extant_tables(args.outputDirectory,
                                               args.prefix)
             ):
            raise OSError(f"Output dir {args.outputDirectory} exists, you can"
                           " force this path to be overwritten with the"
                           " -f/--force flag")
        # if the path exists, the path is validate, and the force flag is not
        #  set, let the user know to run with nofetch and opal to skip this
        #  stage entirly
        elif (
                not args.force
                and os.path.exists(args.outputDirectory)
                and validate_extant_tables(args.outputDirectory, args.prefix)
             ):
            raise OSError(f"Output dir {args.outputDirectory} already exists"
                           " and is validated as complete, set the --opal flag"
                           " to generate OPAL compatible tables and set the"
                           " --nofetch flag to skip the TOPS fetch portion of"
                           " the program.")
 
        os.mkdir(args.outputDirectory)
 
        # Read the list of X,Y, and Zs that DSEP expects in its input opacity
        #  files
        pContents = parse_abundance_map(args.abundanceMap)
 
        # Save the required [Fe/H], a(He) to get the required X, Y, Z from the
        #  abundance map at the moment we always let [alpha/H] = 0 as we are
        #  not considering alpha enhancment however, in future with some light
        #  modification to this seciton and the convert_XYZ_FeHAlpahHaHe
        #  function that would be easy to include.
        numFracInputs =  np.empty(shape=(len(pContents), 3))
        for i, abund in tqdm(enumerate(pContents), total=len(pContents)):
            res = convert_XYZ_FeHAlphaHaHe(MetalAbunMap, abund[0], abund[2])
            numFracInputs[i] = np.array([res[0], 0.0, res[1]])
 
 
        # Generate the number fractions and mass fractions for each composition
        #  DSEP requires and save the file paths to those files
        afiles = list()
        for i, rA in enumerate(numFracInputs):
            fileName = f"{args.outputDirectory}/{args.prefix}_{int(i)}.dat"
            numFracStatus = call_num_frac(args.numFracProgram,
                    fileName, rA[0], rA[1], rA[2], force=args.force,
                    abunTable=args.abunTable)
            if int(numFracStatus) == 0:
                afiles.append(fileName)
 
        # Using the number and mass fractions DSEP needs query the TOPS form
        #  for each and save its output
        for i, file in tqdm(enumerate(afiles), total=len(afiles)):
            compList, X, Y, Z = parse_abundance_file(file)
            mixString = format_TOPS_string(compList)
            mixName = f"X{int(X*1000)} Y{int(Y*1000)} Z{int(Z*1000)}"
            # Does the actually querying
            tableHTML = submit_TOPS_form(mixString, mixName)
            table = parse_table(tableHTML)
 
            filePath = f"{args.outputDirectory}/OP:{i}_{X}_{Y}_{Z}.dat"
            with open(filePath, 'w') as f: # save to disk
                f.write(table)
 
 
    # Convert the many files saved from the TOPS query to one file in the form
    #   DSEP expects
    if args.opal:
        # A defined execption
        # If the directory exists but the TOPS files cannot be validated halt
        if not validate_extant_tables(args.outputDirectory, args.prefix):
            raise OSError(f"Unable to validate quality of TOPS opacity tables"
                           " in {args.outputDirectory}, run again without"
                           " --nofetch flag to refetch tables")
        # select all the files matchiong the TOPS file name format from the
        #  save directory
        TOPSTables = filter(lambda x: re.match("OP:\d+", os.path.basename(x)),
                            os.listdir(args.outputDirectory))
        tableSummaries = list()
        OPALTables = list()
        formatedTables = list()
 
        pContents = parse_abundance_map(args.abundanceMap)
 
        # For each file put together the relevant information in a dictionary
        #  which will latter be merged and written to disk
        paths = map(lambda x: os.path.join(args.outputDirectory, x), TOPSTables)
        upperNonRect = np.load(args.nonRectMap)
        for i, tablePath in tqdm(enumerate(paths)):
            comp = pContents[i]
            summary, X, Y, Z, table, *raw = format_TOPS_to_OPAL(tablePath,
                                                                comp,
                                                                i)
            tableSummaries.append(summary)
            OPALTables.append(table)
            formatedTables.append({
                "X":X,
                "Y":Y,
                "Z":Z,
                "Summary":summary,
                "Table":table,
                "LogRMO": raw[2],
                "LogR": raw[0],
                "LogT": raw[1]
                })
 
        interpRMO = interp_table_to_composition(formatedTables, pContents)
        interpFormatedTables = rebuild_formated_tables(formatedTables,
                                                       interpRMO,
                                                       pContents,
                                                       upperNonRect)
 
        # Get the solar composition for use in the header
        solarCompFileName = f"{args.outputDirectory}/{args.prefix}_0.dat"
        template_compList, X, Y, Z = parse_abundance_file(solarCompFileName)
        compDict = comp_list_2_dict(template_compList)
 
        # Write all the tables to one file in the proper format
        with open(args.output, 'w') as f:
            f.write(format_OPAL_table(interpFormatedTables, compDict))