Analyzing Fiducial Points in Stellar Photometry

fiducial.py

The file 'fiducial.py' contains a Python script designed for the analysis and manipulation of stellar photometric data, focusing on the identification and extraction of fiducial points across various stages of stellar evolution (main sequence, sub-giant, and red giant branches). It employs a combination of statistical and machine learning techniques, including clustering, curve fitting, and smoothing operations to process and distinguish between different stellar populations based on their color and magnitude. The resulting fiducial points and functions are crucial for understanding stellar distribution and evolution in a given cluster or field.

import matplotlib.pyplot as plt
import pickle as pkl
import numpy as np
from scipy.optimize import minimize
from scipy.interpolate import interp1d
from functools import lru_cache, wraps
from scipy.spatial.distance import cdist
import hdbscan
from scipy.optimize import curve_fit
from sklearn.cluster import KMeans
from scipy.signal import savgol_filter
from scipy.interpolate import splrep, BSpline
 
 
def two_point(r0, ri, n=50):
    distance = cdist(r0.reshape(1,2), ri)[0]
    nSmallestDistances = distance[np.argpartition(distance, n)[:n]]
    return np.median(nSmallestDistances)
 
 
 
def iter_cut(color, mag, n=100, m=1.5):
    # presort points based on their angle off the horizontal
    # this is to try to push the distance partitionaing to a better
    # case senario
    theta = np.arctan(mag/color)
    sortedTheta = theta.argsort()
    color = color[sortedTheta]
    mag = mag[sortedTheta]
 
    ri = np.vstack((color, mag)).T
    c = np.array([two_point(x,ri,n=n) for x in ri])
    stdC = np.std(c)
    meanC = np.median(c)
    c2 = cdist(c.reshape(c.shape[0],1), np.array([meanC]).reshape(1,1)).flatten()/stdC
 
    color = color[c2 != 0]
    mag = mag[c2 != 0]
    c2b = c2.copy()
    c2 = c2[c2 != 0]
 
    condition = np.log10(c2) < m*np.median(np.log10(c2))#-1.5
    colorCut = color[condition]
    magCut = mag[condition]
    c2Cut = c2[condition]
    return {
        "color": colorCut,
        "mag": magCut,
        "sigDistCut": c2Cut,
        "corr": c,
        "log10_sigDist": np.log10(c2b)
    }
 
def smooth_sequence(color, mag, s=1):
    sortedMag = mag.argsort()
    sC = color[sortedMag]
    sM = mag[sortedMag]
    tck = splrep(sM, sC, s=s)
    f = BSpline(*tck)
 
    return f(mag), f
 
def bifricated_MS_fiducials(color, mag, binSize=0.1):
    magBins = np.arange(mag.min(), mag.max()+binSize, binSize)
    fiducialPoints = np.empty(shape=(2,magBins.shape[0]-1,2))
    for idx, (bb,bt) in enumerate(zip(magBins[:-1], magBins[1:])):
        cMag = mag[(mag >= bb) & (mag < bt)]
        cCol = color[(mag >= bb) & (mag < bt)]
        points = np.vstack((cCol, cMag)).T
        clusters = KMeans(n_clusters=2,random_state=0,n_init='auto').fit(points)
        LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1
        group1Points = points[clusters.labels_ == LRdet]
        group2Points = points[clusters.labels_ == int(not LRdet)]
 
        fiducialPoints[0,idx,0] = np.mean(group1Points[:,0])
        fiducialPoints[0,idx,1] = np.mean(group1Points[:,1])
        fiducialPoints[1,idx,0] = np.mean(group2Points[:,0])
        fiducialPoints[1,idx,1] = np.mean(group2Points[:,1])
    return fiducialPoints
 
def sub_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1):
    gaus = lambda x, a, b, c: a*np.exp(-(x-b)**2/(2*c**2))
    belowMSColor = color[mag > MSFiducials[:,:,1].max()]
    belowMSMag = mag[mag > MSFiducials[:,:,1].max()]
 
    magRange = (np.percentile(belowMSMag,5), np.percentile(belowMSMag,95))
    belowMSColor = belowMSColor[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])]
    belowMSMag = belowMSMag[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])]
 
    subMSMags = np.arange(belowMSMag.min(), belowMSMag.max()+binSize, binSize)
    fiducialPoints = np.empty(shape=(subMSMags.shape[0]-1,2))
    for idx, (bb, bt) in enumerate(zip(subMSMags[:-1], subMSMags[1:])):
        cC = belowMSColor[(belowMSMag >= bb) * (belowMSMag < bt)]
        hist,bins = np.histogram(cC, density=True,bins='fd')
        centers = (bins[1:] + bins[:-1])/2
        try:
            fit, covar = curve_fit(gaus, centers, hist)
            fiducialColor = fit[1]
        except RuntimeError:
            print("FAIL")
            fiducialColor = np.median(cC)
        fiducialPoints[idx, 0] = fiducialColor
        fiducialPoints[idx, 1] = (bb+bt)/2
    return fiducialPoints
 
def bifricated_super_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1):
    aboveMSColor = color[mag < MSFiducials[:,:,1].min()]
    aboveMSMag = mag[mag < MSFiducials[:,:,1].min()]
 
    magRange = (np.percentile(aboveMSMag,5), np.percentile(aboveMSMag,95))
    colorRange = (np.percentile(aboveMSColor,5), np.percentile(aboveMSColor,95))
 
    aboveMSColor = aboveMSColor[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])]
    aboveMSMag = aboveMSMag[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])]
 
    aboveMSMag = aboveMSMag[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])]
    aboveMSColor = aboveMSColor[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])]
 
    r = iter_cut(aboveMSColor.values, aboveMSMag.values, n=50,m=0)
    superMSMagBins = np.arange(r["mag"].min(), r["mag"].max()+binSize, binSize)
    fiducialPoints = np.empty(shape=(2,superMSMagBins.shape[0]-1,2))
    for idx, (bb, bt) in enumerate(zip(superMSMagBins[:-1], superMSMagBins[1:])):
        cC = r["color"][(r["mag"] >= bb) & (r["mag"] < bt)]
        cM = r["mag"][(r["mag"] >= bb) & (r["mag"]< bt)]
        colorRange = (np.percentile(cC,5), np.percentile(cC,95))
        cM = cM[(cC >= colorRange[0]) & (cC < colorRange[1])]
        cC = cC[(cC >= colorRange[0]) & (cC < colorRange[1])]
 
        points = np.vstack((cC, cM)).T
        clusters = KMeans(n_clusters=2, random_state=0,n_init='auto').fit(points)
        LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1
        group1Points = points[clusters.labels_ == LRdet]
        group2Points = points[clusters.labels_ == int(not LRdet)]
 
        fiducialPoints[0,idx,0] = np.mean(group1Points[:,0])
        fiducialPoints[0,idx,1] = np.mean(group1Points[:,1])
        fiducialPoints[1,idx,0] = np.mean(group2Points[:,0])
        fiducialPoints[1,idx,1] = np.mean(group2Points[:,1])
 
    return fiducialPoints
 
def sequence_mid_point(A, B, n=3,s=0.5):
    AltB = True if A[:,1].min() > B[:,1].max() else False
    sortedA = np.sort(A.copy())
    sortedB = np.sort(B.copy())
 
    if AltB:
        Ap = A[:n]
        Bp = B[-n:]
    else:
        Ap = A[-n:]
        Bp = B[:n]
 
    fiducialWindow = np.vstack((Ap,Bp))
    fiducialWindow = fiducialWindow[fiducialWindow[:,0].argsort()]
 
    _, fSmooth = smooth_sequence(fiducialWindow[:,0], fiducialWindow[:,1],s=s)
 
    if AltB:
        A[:n, 0] = fSmooth(A[:n, 1])
        B[-n:, 0] = fSmooth(B[-n:, 1])
    else:
        A[-n:, 0] = fSmooth(A[-n:, 1])
        B[:n, 0] = fSmooth(B[:n, 1])
    return A, B
 
def fiducial_points(color, mag):
    MSTOR = iter_cut(color, mag, m=1.5)
    mainSequences = np.vstack((MSTOR["color"], MSTOR["mag"])).T
 
    MSFiducials = bifricated_MS_fiducials(mainSequences[:,0], mainSequences[:,1])
 
    subMSFiducialsRoot = sub_MSTO_fiducial(color,mag,MSFiducials,binSize=0.1)
    subMSFiducials = np.empty(shape=(2,subMSFiducialsRoot.shape[0],2))
 
    subMSFiducials[0] = subMSFiducialsRoot
    subMSFiducials[1] = subMSFiducialsRoot
 
    superMSFiducials = bifricated_super_MSTO_fiducial(color, mag, MSFiducials)
 
    for sequenceID, (MS, RGB) in enumerate(zip(MSFiducials, superMSFiducials)):
        MSsmooth, RGBsmooth = sequence_mid_point(MS, RGB, n=4,s=1)
        MSFiducials[sequenceID] = MSsmooth
        superMSFiducials[sequenceID] = RGBsmooth
 
    for sequenceID, sequence in enumerate(superMSFiducials):
        superMSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0.1)
        superMSFiducials[sequenceID, :, 0] = superMSSmoothColor
 
    for sequenceID, (PMS, MS) in enumerate(zip(subMSFiducials, MSFiducials)):
        PMSsmooth, MSsmooth = sequence_mid_point(PMS, MS,n=4,s=1)
        MSFiducials[sequenceID] = MS
        subMSFiducials[sequenceID] = PMS
 
    for sequenceID, sequence in enumerate(MSFiducials):
        MSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0)
        MSFiducials[sequenceID, :, 0] = MSSmoothColor
 
    tfp = MSFiducials.shape[1] + subMSFiducials.shape[1] + superMSFiducials.shape[1]
    fiducialPoints = np.empty(
        shape=(
            MSFiducials.shape[0],
            tfp,
            2
        )
    )
 
 
    for sequenceID, _ in enumerate(fiducialPoints):
        fiducialPoints[sequenceID] = np.vstack(
            (subMSFiducials[sequenceID],MSFiducials[sequenceID],superMSFiducials[sequenceID])
        )
 
    return fiducialPoints
 
def fiducial_functions(color, mag, s=0):
    fiducialPoints = fiducial_points(color, mag)
    functions = list()
    for sequenceID, sequence in enumerate(fiducialPoints):
        functions.append(interp1d(sequence[:,1], sequence[:,0], bounds_error=False, fill_value="extrapolate"))
    return {i+1: f for i, f in enumerate(functions)}