Astrophysical Opacity Table Fetching and Formatting Tool

This script is designed to automate the process of fetching, processing, and formatting opacity tables from the TOPS database for use in astrophysical simulations. It includes functionality to call external programs for calculating number and mass fractions of elements based on input metallicity, to parse and validate these fractions, to fetch opacity tables from a web resource, and to format the retrieved data into a structure compatible with the OPAL database format for use in simulations like DSEP. Additional features include command-line arguments for specifying input parameters and output paths, error handling for file operations, and options for force-overwriting existing data.
import argparse
import shutil
import os
import subprocess
import numpy as np
from bs4 import BeautifulSoup
import re
from scipy import interpolate
from tqdm import tqdm
 
TOPS_URL = "https://aphysics2.lanl.gov/apps/"
 
def call_num_frac(prog, name, feh, alpha, Y, force=False):
    if os.path.exists(name):
        if force:
            os.remove(name)
        else:
            raise OSError(f"File {name} exists, you can force this file to be overwritten with the -f/--force flag")
    numFrac = subprocess.Popen(prog, stdin=subprocess.PIPE)
    stdin = numFrac.stdin
 
    stdin.write(f"{name}\n".encode('utf-8'))
    stdin.write(f"{feh}\n".encode('utf-8'))
    stdin.write(f"{alpha}\n".encode('utf-8'))
    stdin.write(f"{Y}\n".encode('utf-8'))
 
    stdin.close()
    status = numFrac.wait()
 
    return status
 
def parse_abundence_file(filePath):
    with open(filePath) as f:
        contents = f.read().split('\n')
    metaLine = contents[49].replace('+/-', '').replace('=', '').split()
    Z = float(metaLine[1])
    ZX = float(metaLine[4])
    X = Z/ZX
    Y = 1 - Z - X
    NumberFractions = [x.replace('=', '').split()  for x in contents[2:25]]
    NumberFractions = [(x[0], float(x[1])*Z) for x in NumberFractions]
 
    massFractions = [x.replace('=', '').split()  for x in contents[26:49]]
    massFractions = [(x[0], float(x[1])) for x in massFractions]
 
    return massFractions, X, Y, Z
 
def format_TOPS_string(fracs):
    TOPS_abundence_string = ' '.join([f"{x[1]:0.10f} {x[0]}" for x in fracs])
    return TOPS_abundence_string
 
def submit_TOPS_form(br, mixString, mixName, massFrac=True):
    mixString = "0.7473676929 H 0.2526151498 He 0.0000029492 C 0.0000008639 N 0.0000080209 O 0.0000017989 Ne 0.0000000356 Na 0.0000006852 Mg 0.0000000618 Al 0.0000007561 Si 0.0000000073 P 0.0000003945 S 0.0000000050 Cl 0.0000000744 Ar 0.0000000665 Ca 0.0000000034 Ti 0.0000000189 Cr 0.0000000132 Mn 0.0000013095 Fe 0.0000000774 Ni 0.0000000105 Li 0.0000000002 Be 0.0000000049 B"
    br.open(TOPS_URL)
    br.select_form(nr=0)
    if massFrac:
        br.form.find_control(name="fractype").value = ['atomic']
    br.form['mixture'] = mixString
    br.form['mixname'] = mixName
 
    tlow = br.find_control(name="tlow", type="select").get("0.0005")
    tlow.selected = True
    tup = br.find_control(name="tup", type="select").get("60")
    tup.selected = True
 
    br.form['rlow'] = "1.77827941e-15"
    br.form['rup'] = "1.25892541e+09"
    br.form['nr'] = '19'
 
 
    r1 = br.submit()
 
    br.select_form(nr=0)
    r2 = br.submit()
 
    tableHTML = r2.read()
    return tableHTML
 
def parse_table(html):
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('code').prettify().replace('<br/>', '')
    table = table.split('\n')
    table = [x for x in table[1:-2] if x.lstrip().rstrip() != '']
    table = '\n'.join(table)
    return table
 
def validate_extant_tables(path, prefix):
    files = os.listdir(path)
    abundenceFiles = list(filter(lambda x: prefix in x, files))
    opacityTables = list(filter(lambda x: 'OP:' in x, files))
 
    validated = all(map(lambda x: any([f"OP:{x.split('_')[-1].split('.')[0]}" in y for y in opacityTables]), abundenceFiles))
    return validated
 
def parse_RMO_TOPS_table_file(TOPSTable):
    with open(TOPSTable) as f:
        contents = f.read().replace("\u00A0", "\t")
    tgroups = re.findall(r"(Rosseland\s+and\s+Planck\s+opacities\s+and\s+free\s+electrons\s+Density\s+Ross\s+opa\s+Planck\s+opa\s+No\.\s+Free\s+Av\s+Sq\s+Free\s+(T=\s+\d+\.?\d+E[-|+]\d+)\s+((\d\.\d+E[-|+]\d+(\s+)?){5}\n?){19})", contents)
    temperatures = np.array([float(x[1].split()[1])*11604525.0061657 for x in tgroups])
    subTables = [x[0].split('\n')[2:] for x in tgroups]
    subTables = [[[float(z) for z in y.split()] for y in x if y.rstrip() != ''] for x in subTables]
    subTables = np.array(subTables)
    R = np.array([x/((y*1e-6)**3) for x, y in zip(subTables[:, :, 0], temperatures)])
    RMO = subTables[:, :, 1]
    logT = np.log10(temperatures)
    logR = np.log10(R[0])
    logRMO = np.log10(RMO)
 
    OPALTableInit = np.zeros(shape=(logR.shape[0], logT.shape[0]))
    for i, T in enumerate(logT):
        OPALTableInit[:, i] = logRMO[i]
    return logR, logT, OPALTableInit
 
def interpolate_TOPS_to_dsep(logR, logT, TOPSTable):
    interpFunc = interpolate.interp2d(logT, logR, TOPSTable)
    targetLogR = np.arange(-8.0, 1.5, 0.5)
    targetLogT_A = np.arange(3.75, 6.0, 0.05)
    targetLogT_B = np.arange(6.0, 8.1, 0.1)
    targetLogT_C = np.arange(8.1, 8.8, 0.2)
    targetLogT = np.concatenate((targetLogT_A, targetLogT_B, targetLogT_C), axis=None)
    targetTable = interpFunc(targetLogT, targetLogR)
    return targetLogR, targetLogT, targetTable
 
 
def extract_composition_path(path):
    filename = os.path.basename(path)
    filename = filename.strip('.dat')
    filenameParts = filename.split('_')
    X = float(filenameParts[1])
    Y = float(filenameParts[2])
    Z = float(filenameParts[3])
    return X, Y, Z
 
def format_TOPS_to_OPAL(TOPSTable, N):
    logRInit, logTInit, OPALTableInit = parse_RMO_TOPS_table_file(TOPSTable)
    logR, logT, OPALTable = interpolate_TOPS_to_dsep(logRInit, logTInit, OPALTableInit)
    logRHeader = '  '.join(["{: 0.2f}".format(round(x, 2)) for x in logR])
    X, Y, Z = extract_composition_path(TOPSTable)
    metaLine = f"TABLE #  TNUM     202012090001       X={abs(X):0.4f} Y={abs(Y):0.4f} Z={abs(Z):0.4f} dX1=0.0000 dX2=0.0000"
    titleLine = "                                                  log R"
    headerLine = f"logT  {logRHeader}"
 
    dataLines = list()
    for line, temperature in zip(OPALTable.T, logT):
        adjustedLine = [x if x < 9.99 else 9.99 for x in line]
        joinedLine = '  '.join(["{: 0.3f}".format(round(x, 3)) for x in adjustedLine])
        dataLines.append(f"{temperature:0.2f}  {joinedLine}")
    dataTable = '\n'.join(dataLines)
    fullTable = '\n'.join([metaLine, '', titleLine, '', headerLine, '', dataTable, ''])
    return metaLine, X, Y, Z, fullTable
 
def format_OPAL_header():
    headerBaseString = """
  Updated TOPS Rosseland mean opacity tables   (date 20201209)
  Description of modifications given in Iglesias & Rogers, ApJ464,943(1996)
 
   lines 1-60 contain brief table description
   lines 61-240 contain table summaries
   lines 241-end contain tables. Each table is 77 lines long
 
  NOTE: File is compatible with interpolation routines.
 
  Definitions:
 
  The logarithm of the Rosseland mean opacity [cm**2/g] as a function
  of log(T) for columns of constant log(R), where
 
     R=density[g/cm**3]/T6**3, T6=1.e-6*T[degrees]
     log(T) range: 70 values from 3.75 to 8.70
     log(R) range: 19 values from -8.0 to +1.0
 
  NOTE: Tables are NOT rectangular
        values=9.999 or blanks are out of table range
 
  Composition parameters:
    X   = Hydrogen mass fraction
    Y   = Helium mass fraction
    Z   = Metal mass fraction
    dX1 is not relevant for Type 1 tables
    dX2 is not relevant for Type 1 tables
 
  Abundances for 202012090001     composition: GS98 mix (N.Grevesse & A.J.Sauval 1998, Space Sci. Rev. 85, 161)
 
  Element   Abundance - relative metal: number fraction   mass fraction   atomic mass
    """
 
    AbuncenceSummary = f"""
 Element   Abundance - relative metal: number fraction   mass fraction   atomic mass
   H     log(A)=                                                           1.00790
   He    log(A)=                                                           4.00260
   Li    log(A)=
   Be    log(A)=
   B     log(A)=
   C     log(A)=        -----------------  0.245825        0.171836       12.01100
   N     log(A)=        -----------------  0.061748        0.050335       14.00670
   O     log(A)=        -----------------  0.501922        0.467356       15.99940
   F     log(A)=
   Ne    log(A)=        -----------------  0.089265        0.104831       20.17900
   Na    log(A)=        -----------------  0.001562        0.002090       22.98977
   Mg    log(A)=        -----------------  0.028224        0.039924       24.30500
   Al    log(A)=        -----------------  0.002294        0.003603       26.98154
   Si    log(A)=        -----------------  0.026954        0.044057       28.08550
   P     log(A)=        -----------------  0.000235        0.000423       30.97376
   S     log(A)=        -----------------  0.012602        0.023513       32.06000
   Cl    log(A)=        -----------------  0.000141        0.000292       35.45300
   Ar    log(A)=        -----------------  0.001865        0.004335       39.94800
   K     log(A)=        -----------------  0.000100        0.000228       39.09830
   Ca    log(A)=        -----------------  0.001670        0.003896       40.08000
   Sc    log(A)=
   Ti    log(A)=        -----------------  0.000070        0.000195       47.90000
   V     log(A)=
   Cr    log(A)=        -----------------  0.000369        0.001117       51.99600
   Mn    log(A)=        -----------------  0.000244        0.000779       54.93800
   Fe    log(A)=        -----------------  0.023517        0.076433       55.84700
   Co    log(A)=
   Ni    log(A)=        -----------------  0.001393        0.004757       58.70000
    """
    return '\n'.join([headerBaseString, '', AbuncenceSummary, ''])
 
 
 
def format_OPAL_table(tableDict):
    header = format_OPAL_header()
    summaryHeader = f"Table Summaries- There are {len(tableDict)} tables"
    summaries = map(lambda x: x[1]['Summary'].replace('TNUM', f"{x[0]+1: d}"), enumerate(tableDict))
    summary = '\n'.join(list(summaries))
    interStage = '\n'.join(['']*50)
    tableStart = "************************************ Tables ************************************"
    tables = map(lambda x: x[1]['Table'].replace('TNUM', f"{x[0]+1: d}"), enumerate(tableDict))
    tablesFull = '\n'.join(list(tables))
    return '\n'.join([header, summaryHeader, '', summary, interStage, tableStart, '', tablesFull])
 
 
def validate_composition(NumberFractions, tolerence=0.0001):
        total = 0
        for e in NumberFractions:
            total += e[1]
        return (not abs(total-1) > tolerence, total)
 
def update_mass_fraction(massFraction, target, new):
    for i, (element, old) in enumerate(massFraction):
        if element == target:
            key = i
    massFraction[key] = (target, new)
    return massFraction
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate opacity table in a format dsep can work with from TOPS")
    parser.add_argument("numFracProgram", help="path to program to generate Number and Mass fractions", type=str)
    parser.add_argument("-p", "--prefix", help="Abundance file prefix", default="TOPSA", type=str)
    parser.add_argument("-f", "--force", help="force the generation of new abunance tables", action="store_true")
    parser.add_argument("--feh", help="[Fe/H] in solar units. Format as <min max number> for range", type=float, nargs=3)
    parser.add_argument("--alpha",help="[alpha/H] in solar units. Format as <min max number> for range", type=float, nargs=3)
    parser.add_argument("-d", "--outputDirectory", help="directory to save abundence files too", default=".", type=str)
    parser.add_argument("--opal", help="Run the code to convert TOPS table to OPAL compativle tables", action="store_true")
    parser.add_argument("--nofetch", help="do not fetch opacity tables from TOPS", action='store_true')
    parser.add_argument("-o", "--output", help="file to write OPAL formated table to", default="TOPAL.dat", type=str)
 
    args = parser.parse_args()
 
 
 
    if not args.nofetch:
        if os.path.exists(args.outputDirectory) and args.force and not validate_extant_tables(args.outputDirectory, args.prefix):
            shutil.rmtree(args.outputDirectory)
        if os.path.exists(args.outputDirectory) and args.force and validate_extant_tables(args.outputDirectory, args.prefix):
            raise OSError(f"Cannot force override of {args.outputDirectory} as it has been validated as complete")
        elif not args.force and os.path.exists(args.outputDirectory) and not validate_extant_tables(args.outputDirectory, args.prefix):
            raise OSError(f"Output dir {args.outputDirectory} exists, you can force this path to be overwritten with the -f/--force flag")
        elif not args.force and os.path.exists(args.outputDirectory) and validate_extant_tables(args.outputDirectory, args.prefix):
            raise OSError(f"Output dir {args.outputDirectory} already exists and is validated as complete, set the --opal flag to generate OPAL compatible tables")
        os.mkdir(args.outputDirectory)
 
 
        FeH = np.linspace(args.feh[0], args.feh[1], int(args.feh[2]))
        AlphaH = np.linspace(args.alpha[0], args.alpha[1], int(args.alpha[2]))
 
        afiles = list()
        for i, feh in enumerate(FeH):
            for j, alphah in enumerate(AlphaH):
                fileName = f"{args.outputDirectory}/{args.prefix}_{int(i*args.alpha[2] + j)}.dat"
                numFracStatus = call_num_frac(args.numFracProgram, fileName, feh, alphah, 0.20, force=args.force)
                print("status: ", numFracStatus)
                if int(numFracStatus) == 0:
                    afiles.append(fileName)
 
        template_massFractions, X, Y, Z = parse_abundence_file(f"{args.outputDirectory}/{args.prefix}_0.dat")
        with open('AGSS09AbundenceMap', 'r') as f:
            AbundencesToMatch = f.read().split('\n')
        AbundencesToMatch = filter(lambda x: x != '', AbundencesToMatch)
        AbundencesToMatch = [[float(y) for y in x.split(',')] for x in AbundencesToMatch]
        for i, file in tqdm(enumerate(afiles), total=len(afiles)):
            massFractions, X, Y, Z = parse_abundence_file(file)
            mixString = format_TOPS_string(massFractions)
            mixName = f"X{int(X*1000)} Y{int(Y*1000)} Z{int(Z*1000)}"
            br = mechanize.Browser()
            tableHTML = submit_TOPS_form(br, mixString, mixName)
            table = parse_table(tableHTML)
            with open(f"{args.outputDirectory}/OP:{i}_{X}_{Y}_{Z}.dat", 'w') as f:
                f.write(table)
            br.close()
            continue
 
 
    if args.opal:
        if not validate_extant_tables(args.outputDirectory, args.prefix):
            raise OSError(f"Unable to validate quality of TOPS opacity tables in {args.outputDirectory}, run again without --nofetch flag to refetch tables")
        TOPSTables = filter(lambda x: re.match("OP:\d+", os.path.basename(x)), os.listdir(args.outputDirectory))
        tableSummaries = list()
        OPALTables = list()
        formatedTables = list()
        for i, tablePath in enumerate(map(lambda x: os.path.join(args.outputDirectory, x), TOPSTables)):
            print(tablePath)
            summary, X, Y, Z, table = format_TOPS_to_OPAL(tablePath, i+1)
            tableSummaries.append(summary)
            OPALTables.append(table)
            formatedTables.append({"X":X, "Y":Y, "Z":Z, "Summary":summary, "Table":table})
 
        sortedFormatedTables = sorted(formatedTables, key=lambda x: (x['X'], x['Y'], x['Z']))
        with open(args.output, 'w') as f:
            f.write(format_OPAL_table(sortedFormatedTables))