如何使用rdkit正确产生高斯坐标 - 计算化学公社

def generate_molecule_hash(smiles):
"""
生成分子SMILES的哈希值，用于避免重复生成相同分子的XYZ文件
"""
return hashlib.md5(smiles.encode('utf-8')).hexdigest()[:8]
def mol_to_xyz_with_smiles(mol, file_path, smiles=None, molecule_type=None):
"""
将rdkit分子对象写为xyz文件，并在文件头部添加SMILES注释和分子类型信息。
参数:
mol: RDKit Mol对象
file_path: 输出文件路径
smiles: SMILES字符串（用于备注）
molecule_type: 分子类型（Lipid, Probe或Product）
"""
try:
# 检查输入分子是否有效
if mol is None:
raise ValueError("输入分子对象为空")
# 确保分子有氢原子
mol = Chem.AddHs(mol)
# 检查并生成3D坐标
conf = None
conformer_error = None
# 步骤1: 尝试获取现有构象
try:
conf = mol.GetConformer()
if not conf.Is3D():
print(f"警告: 分子已有构象但不是3D构象，将尝试生成3D坐标")
conformer_error = "现有构象不是3D"
conf = None
except Exception as e:
print(f"信息: 分子没有现有构象 ({str(e)})，将尝试生成3D坐标")
conformer_error = str(e)
conf = None
# 步骤2: 如果没有有效构象，尝试ETKDG方法生成（更高级的构象生成方法）
if conf is None:
try:
print(f"尝试使用ETKDG方法为分子生成3D构象...")
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
AllChem.UFFOptimizeMolecule(mol)
conf = mol.GetConformer()
if not conf.Is3D():
raise ValueError("ETKDG方法生成的构象不是3D")
print(f"ETKDG方法成功生成3D构象")
except Exception as e:
print(f"ETKDG方法失败: {str(e)}，尝试基本嵌入方法...")
conformer_error = f"{conformer_error} -> ETKDG失败: {str(e)}"
# 步骤3: 如果ETKDG失败，尝试基本的嵌入方法
if conf is None or not conf.Is3D():
try:
print(f"尝试使用基本嵌入方法生成3D构象...")
# 先清除所有构象
while mol.GetNumConformers() > 0:
mol.RemoveConformer(0)
# 尝试多次随机种子
for seed in [42, 123, 987, 555]:
ps = AllChem.ETKDGv3()
ps.randomSeed = seed
cid = AllChem.EmbedMolecule(mol, ps)
if cid >= 0: # 成功嵌入
conf = mol.GetConformer(cid)
if conf.Is3D():
print(f"基本嵌入方法在种子{seed}下成功")
# 优化构象
AllChem.UFFOptimizeMolecule(mol, confId=cid)
break
if conf is None or not conf.Is3D():
raise ValueError("多次尝试后仍无法生成有效的3D构象")
except Exception as e:
print(f"所有嵌入方法均失败: {str(e)}")
conformer_error = f"{conformer_error} -> 基本嵌入失败: {str(e)}"
raise ValueError(f"无法生成3D构象: {conformer_error}")
# 确认我们有有效的3D构象
if conf is None or not conf.Is3D():
raise ValueError("尝试所有方法后仍无法获得有效的3D构象")
# 写入XYZ文件
with open(file_path, 'w') as f:
f.write(f"{mol.GetNumAtoms()}\n")
comments = []
if molecule_type:
comments.append(f"Type: {molecule_type}")
if smiles:
comments.append(f"SMILES: {smiles}")
f.write(f"{' | '.join(comments)}\n")
for atom in mol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
f.write(f"{atom.GetSymbol()} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n")
return True
except Exception as e:
print(f"警告: 无法将分子转换为XYZ格式: {e}")
return False
def smiles_to_xyz(smiles, file_path, molecule_type=None):
"""
从SMILES生成XYZ文件（备用方法）
"""
if not smiles:
return False
try:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError("无效的SMILES字符串")
return mol_to_xyz_with_smiles(mol, file_path, smiles, molecule_type)
except Exception as e:
print(f"警告: 无法从SMILES生成XYZ文件: {e}")
return False
def safe_write_xyz(mol, smiles, filename, output_dir, molecule_type):
"""
安全地生成XYZ文件，优先使用RDKit Mol对象，失败时尝试使用SMILES
返回:
tuple: (文件名, 成功状态, 使用的方法)
"""
file_path = os.path.join(output_dir, filename)
# 检查文件是否已存在
if os.path.exists(file_path):
return filename, True, "existing"
# 尝试使用RDKit Mol对象生成XYZ
if mol is not None:
success = mol_to_xyz_with_smiles(mol, file_path, smiles, molecule_type)
if success:
return filename, True, "mol"
# 如果Mol对象失败或不存在，尝试使用SMILES
if smiles:
success = smiles_to_xyz(smiles, file_path, molecule_type)
if success:
return filename, True, "smiles"
# 如果都失败了，返回失败信息
return filename, False, "failed"

复制代码

'''
This script generates xyz files from a SMILES string. This script requires
RDKit which should be at least version 2020.03. Cite the following paper
if you use this script in any published work.
S. Wang, J. Witek, G. A. Landrum, S. Riniker, J. Chem. Inf. Model. 2020, 60(4), 2044–2058
Generate one conformer for a molecule with the following command:
python smiles2xyz.py -s "CC(CC1=CC=CC=C1)[N+]2=NOC(=C2)/N=C(/NC3=CC=CC=C3)\[O-]" -c mesocarb
Generate ten conformers for a molecule with the following command:
python smiles2xyz.py -s "CC(CC1=CC=CC=C1)[N+]2=NOC(=C2)/N=C(/NC3=CC=CC=C3)\[O-]" -c mesocarb -n 10
'''
import sys
import argparse
from rdkit import Chem
from rdkit.Chem import AllChem
def write_xyz_file(fragment, fragment_name):
number_of_atoms = fragment.GetNumAtoms()
symbols = [a.GetSymbol() for a in fragment.GetAtoms()]
fNames = []
for i,conf in enumerate(fragment.GetConformers()):
file_name = fragment_name+"_"+str(i)+".xyz"
fNames.append(file_name)
with open(file_name, "w") as file:
file.write(str(number_of_atoms)+"\n")
file.write("\n")
for atom,symbol in enumerate(symbols):
p = conf.GetAtomPosition(atom)
line = " ".join((symbol,str(p.x),str(p.y),str(p.z),"\n"))
file.write(line)
return fNames
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument('--string', '-s', required=True, type=str, help='SMILES string')
parser.add_argument('--compound', '-c', required=True, type=str, help='Compound name, which will be used as the prefix for xyz file')
parser.add_argument('--number', '-n', default=argparse.SUPPRESS, type=int, help='Number of conformers for a compound')
args = parser.parse_args()
mol = Chem.MolFromSmiles(args.string)
mol_h = Chem.AddHs(mol)
params = Chem.rdDistGeom.srETKDGv3()
params.randomSeed = 0xf00d
params.clearConfs = True
if 'number' in args:
if args.number > 0:
number_of_conformers = args.number
else:
print("The argument for number of conformers in the input cannot be used.")
sys.exit(1)
else:
number_of_conformers = 1
cids = AllChem.EmbedMultipleConfs(mol_h, number_of_conformers, params)
fn = write_xyz_file(mol_h, args.compound)

复制代码