"""Visualizes the ESOL data set using tmap."""
import pandas as pd
import tmap as tm
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem
def main():
"""Run the visualization on script start."""
df = pd.read_csv("delaney-processed.csv")
enc = MHFPEncoder()
lf = tm.LSHForest(2048, 128)
fps = []
labels = []
total = len(df)
for i, row in df.iterrows():
if i % 1000 == 0 and i > 0:
print(f"{round(100 * (i / total))}% done ...")
smiles = row["smiles"]
mol = AllChem.MolFromSmiles(smiles)
fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0)))
labels.append(smiles + "__" + row["Compound ID"].replace("'", "ยด"))
# Create the labels and the integer encoded array for the groups,
# as they're categorical
lf.batch_add(fps)
lf.index()
cfg = tm.LayoutConfiguration()
cfg.k = 100
# cfg.sl_extra_scaling_steps = 1
cfg.sl_repeats = 2
cfg.mmm_repeats = 2
cfg.node_size = 2
x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg)
f = Faerun(
clear_color="#222222",
coords=False,
view="front",
impress='made with tmap
and faerun',
)
f.add_scatter(
"ESOL",
{
"x": x,
"y": y,
"c": [
df["ESOL predicted log solubility in mols per litre"],
df["measured log solubility in mols per litre"],
abs(
df["measured log solubility in mols per litre"]
- df["ESOL predicted log solubility in mols per litre"]
),
df["Minimum Degree"],
df["Molecular Weight"],
df["Number of H-Bond Donors"],
df["Number of Rings"],
df["Number of Rotatable Bonds"],
df["Polar Surface Area"],
],
"labels": labels,
},
title_index=1,
categorical=[False, False, False, True, False, False, True, False, False],
colormap=[
"viridis",
"viridis",
"viridis",
"Set1",
"viridis",
"viridis",
"Set1",
"viridis",
"viridis",
],
has_legend=True,
series_title=[
"ESOL [mols/L]",
"Measured [mols/L]",
"Diff ESOL Measured",
"Minimum Degree",
"Molecular Weight",
"H-Bond Donors",
"Ring Count",
"Rotatable Bonds Count",
"Polar Surface Area",
],
point_scale=5,
shader="smoothCircle",
)
f.add_tree("ESOL_tree", {"from": s, "to": t}, point_helper="ESOL")
f.plot(template="smiles")
if __name__ == "__main__":
main()