Astronomical Data Analysis with Advanced Clustering and Fiducial Points Identification
This Python script is designed for the analysis of astronomical data, focusing on the identification of fiducial points across main sequences and sub-giant branches in stellar populations. It employs various scientific libraries to process and analyze color-magnitude data of stars, using methods such as hierarchical clustering (HDBSCAN), KMeans clustering, curve fitting, and smoothing techniques (Savitzky-Golay filter, B-splines) to identify and refine fiducial sequences. The script is intended for advanced users with a background in astrophysics or astronomy, conducting research that requires precise characterization of stellar populations.
import matplotlib.pyplot as plt import pickle as pkl import numpy as np from scipy.optimize import minimize from scipy.interpolate import interp1d from functools import lru_cache, wraps from scipy.spatial.distance import cdist import hdbscan from scipy.optimize import curve_fit from sklearn.cluster import KMeans from scipy.signal import savgol_filter from scipy.interpolate import splrep, BSpline def two_point(r0, ri, n=50): distance = cdist(r0.reshape(1,2), ri)[0] nSmallestDistances = distance[np.argpartition(distance, n)[:n]] return np.median(nSmallestDistances) def iter_cut(color, mag, n=100, m=1.5): # presort points based on their angle off the horizontal # this is to try to push the distance partitionaing to a better # case senario theta = np.arctan(mag/color) sortedTheta = theta.argsort() color = color[sortedTheta] mag = mag[sortedTheta] ri = np.vstack((color, mag)).T c = np.array([two_point(x,ri,n=n) for x in ri]) stdC = np.std(c) meanC = np.median(c) c2 = cdist(c.reshape(c.shape[0],1), np.array([meanC]).reshape(1,1)).flatten()/stdC color = color[c2 != 0] mag = mag[c2 != 0] c2b = c2.copy() c2 = c2[c2 != 0] condition = np.log10(c2) < m*np.median(np.log10(c2))#-1.5 colorCut = color[condition] magCut = mag[condition] c2Cut = c2[condition] return { "color": colorCut, "mag": magCut, "sigDistCut": c2Cut, "corr": c, "log10_sigDist": np.log10(c2b) } def smooth_sequence(color, mag, s=1): sortedMag = mag.argsort() sC = color[sortedMag] sM = mag[sortedMag] tck = splrep(sM, sC, s=s) f = BSpline(*tck) return f(mag), f def bifricated_MS_fiducials(color, mag, binSize=0.1): magBins = np.arange(mag.min(), mag.max()+binSize, binSize) fiducialPoints = np.empty(shape=(2,magBins.shape[0]-1,2)) for idx, (bb,bt) in enumerate(zip(magBins[:-1], magBins[1:])): cMag = mag[(mag >= bb) & (mag < bt)] cCol = color[(mag >= bb) & (mag < bt)] points = np.vstack((cCol, cMag)).T clusters = KMeans(n_clusters=2,random_state=0,n_init='auto').fit(points) LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1 group1Points = points[clusters.labels_ == LRdet] group2Points = points[clusters.labels_ == int(not LRdet)] fiducialPoints[0,idx,0] = np.mean(group1Points[:,0]) fiducialPoints[0,idx,1] = np.mean(group1Points[:,1]) fiducialPoints[1,idx,0] = np.mean(group2Points[:,0]) fiducialPoints[1,idx,1] = np.mean(group2Points[:,1]) return fiducialPoints def sub_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1): gaus = lambda x, a, b, c: a*np.exp(-(x-b)**2/(2*c**2)) belowMSColor = color[mag > MSFiducials[:,:,1].max()] belowMSMag = mag[mag > MSFiducials[:,:,1].max()] magRange = (np.percentile(belowMSMag,5), np.percentile(belowMSMag,95)) belowMSColor = belowMSColor[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])] belowMSMag = belowMSMag[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])] subMSMags = np.arange(belowMSMag.min(), belowMSMag.max()+binSize, binSize) fiducialPoints = np.empty(shape=(subMSMags.shape[0]-1,2)) for idx, (bb, bt) in enumerate(zip(subMSMags[:-1], subMSMags[1:])): cC = belowMSColor[(belowMSMag >= bb) * (belowMSMag < bt)] hist,bins = np.histogram(cC, density=True,bins='fd') centers = (bins[1:] + bins[:-1])/2 try: fit, covar = curve_fit(gaus, centers, hist) fiducialColor = fit[1] except RuntimeError: print("FAIL") fiducialColor = np.median(cC) fiducialPoints[idx, 0] = fiducialColor fiducialPoints[idx, 1] = (bb+bt)/2 return fiducialPoints def bifricated_super_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1): aboveMSColor = color[mag < MSFiducials[:,:,1].min()] aboveMSMag = mag[mag < MSFiducials[:,:,1].min()] magRange = (np.percentile(aboveMSMag,5), np.percentile(aboveMSMag,95)) colorRange = (np.percentile(aboveMSColor,5), np.percentile(aboveMSColor,95)) aboveMSColor = aboveMSColor[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])] aboveMSMag = aboveMSMag[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])] aboveMSMag = aboveMSMag[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])] aboveMSColor = aboveMSColor[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])] r = iter_cut(aboveMSColor.values, aboveMSMag.values, n=50,m=0) superMSMagBins = np.arange(r["mag"].min(), r["mag"].max()+binSize, binSize) fiducialPoints = np.empty(shape=(2,superMSMagBins.shape[0]-1,2)) for idx, (bb, bt) in enumerate(zip(superMSMagBins[:-1], superMSMagBins[1:])): cC = r["color"][(r["mag"] >= bb) & (r["mag"] < bt)] cM = r["mag"][(r["mag"] >= bb) & (r["mag"]< bt)] colorRange = (np.percentile(cC,5), np.percentile(cC,95)) cM = cM[(cC >= colorRange[0]) & (cC < colorRange[1])] cC = cC[(cC >= colorRange[0]) & (cC < colorRange[1])] points = np.vstack((cC, cM)).T clusters = KMeans(n_clusters=2, random_state=0,n_init='auto').fit(points) LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1 group1Points = points[clusters.labels_ == LRdet] group2Points = points[clusters.labels_ == int(not LRdet)] fiducialPoints[0,idx,0] = np.mean(group1Points[:,0]) fiducialPoints[0,idx,1] = np.mean(group1Points[:,1]) fiducialPoints[1,idx,0] = np.mean(group2Points[:,0]) fiducialPoints[1,idx,1] = np.mean(group2Points[:,1]) return fiducialPoints def sequence_mid_point(A, B, n=3,s=0.5): AltB = True if A[:,1].min() > B[:,1].max() else False sortedA = np.sort(A.copy()) sortedB = np.sort(B.copy()) if AltB: Ap = A[:n] Bp = B[-n:] else: Ap = A[-n:] Bp = B[:n] fiducialWindow = np.vstack((Ap,Bp)) fiducialWindow = fiducialWindow[fiducialWindow[:,0].argsort()] _, fSmooth = smooth_sequence(fiducialWindow[:,0], fiducialWindow[:,1],s=s) if AltB: A[:n, 0] = fSmooth(A[:n, 1]) B[-n:, 0] = fSmooth(B[-n:, 1]) else: A[-n:, 0] = fSmooth(A[-n:, 1]) B[:n, 0] = fSmooth(B[:n, 1]) return A, B def fiducial_points(color, mag): MSTOR = iter_cut(color, mag, m=1.5) mainSequences = np.vstack((MSTOR["color"], MSTOR["mag"])).T MSFiducials = bifricated_MS_fiducials(mainSequences[:,0], mainSequences[:,1]) subMSFiducialsRoot = sub_MSTO_fiducial(color,mag,MSFiducials,binSize=0.1) subMSFiducials = np.empty(shape=(2,subMSFiducialsRoot.shape[0],2)) subMSFiducials[0] = subMSFiducialsRoot subMSFiducials[1] = subMSFiducialsRoot superMSFiducials = bifricated_super_MSTO_fiducial(color, mag, MSFiducials) for sequenceID, (MS, RGB) in enumerate(zip(MSFiducials, superMSFiducials)): MSsmooth, RGBsmooth = sequence_mid_point(MS, RGB, n=4,s=1) MSFiducials[sequenceID] = MSsmooth superMSFiducials[sequenceID] = RGBsmooth for sequenceID, sequence in enumerate(superMSFiducials): superMSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0.1) superMSFiducials[sequenceID, :, 0] = superMSSmoothColor for sequenceID, (PMS, MS) in enumerate(zip(subMSFiducials, MSFiducials)): PMSsmooth, MSsmooth = sequence_mid_point(PMS, MS,n=4,s=1) MSFiducials[sequenceID] = MS subMSFiducials[sequenceID] = PMS for sequenceID, sequence in enumerate(MSFiducials): MSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0) MSFiducials[sequenceID, :, 0] = MSSmoothColor tfp = MSFiducials.shape[1] + subMSFiducials.shape[1] + superMSFiducials.shape[1] fiducialPoints = np.empty( shape=( MSFiducials.shape[0], tfp, 2 ) ) for sequenceID, _ in enumerate(fiducialPoints): fiducialPoints[sequenceID] = np.vstack( (subMSFiducials[sequenceID],MSFiducials[sequenceID],superMSFiducials[sequenceID]) ) return fiducialPoints def fiducial_functions(color, mag, s=0): fiducialPoints = fiducial_points(color, mag) functions = list() for sequenceID, sequence in enumerate(fiducialPoints): functions.append(interp1d(sequence[:,1], sequence[:,0], bounds_error=False, fill_value="extrapolate")) return {i+1: f for i, f in enumerate(functions)}