"""Visualizes the FreeSolv data set using tmap."""
import pandas as pd
import tmap as tm
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem
def main():
"""Run the visualization on script start."""
df = pd.read_csv("SAMPL.csv")
enc = MHFPEncoder()
lf = tm.LSHForest(2048, 128)
fps = []
labels = []
total = len(df)
for i, row in df.iterrows():
if i % 1000 == 0 and i > 0:
print(f"{round(100 * (i / total))}% done ...")
smiles = row["smiles"]
mol = AllChem.MolFromSmiles(smiles)
fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0)))
labels.append(smiles + "__" + row["iupac"].replace("'", "ยด"))
# Create the labels and the integer encoded array for the groups,
# as they're categorical
lf.batch_add(fps)
lf.index()
cfg = tm.LayoutConfiguration()
cfg.k = 100
cfg.sl_extra_scaling_steps = 6
cfg.sl_repeats = 2
cfg.mmm_repeats = 2
cfg.node_size = 2
x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg)
f = Faerun(
clear_color="#222222",
coords=False,
view="front",
impress='made with tmap
and faerun',
)
f.add_scatter(
"FreeSolv",
{
"x": x,
"y": y,
"c": [df["expt"], df["calc"], abs(df["expt"] - df["calc"])],
"labels": labels,
},
title_index=1,
categorical=[False, False, False],
colormap=["rainbow", "rainbow"],
has_legend=True,
series_title=["Experimental", "Calculated", "Diff Expt Calc"],
point_scale=5,
shader="smoothCircle",
)
f.add_tree("FreeSolv_tree", {"from": s, "to": t}, point_helper="FreeSolv")
f.plot(template="smiles")
if __name__ == "__main__":
main()