|
- def generate_molecule_hash(smiles):
- """
- 生成分子SMILES的哈希值,用于避免重复生成相同分子的XYZ文件
- """
- return hashlib.md5(smiles.encode('utf-8')).hexdigest()[:8]
- def mol_to_xyz_with_smiles(mol, file_path, smiles=None, molecule_type=None):
- """
- 将rdkit分子对象写为xyz文件,并在文件头部添加SMILES注释和分子类型信息。
-
- 参数:
- mol: RDKit Mol对象
- file_path: 输出文件路径
- smiles: SMILES字符串(用于备注)
- molecule_type: 分子类型(Lipid, Probe或Product)
- """
- try:
- # 检查输入分子是否有效
- if mol is None:
- raise ValueError("输入分子对象为空")
-
- # 确保分子有氢原子
- mol = Chem.AddHs(mol)
-
- # 检查并生成3D坐标
- conf = None
- conformer_error = None
-
- # 步骤1: 尝试获取现有构象
- try:
- conf = mol.GetConformer()
- if not conf.Is3D():
- print(f"警告: 分子已有构象但不是3D构象,将尝试生成3D坐标")
- conformer_error = "现有构象不是3D"
- conf = None
- except Exception as e:
- print(f"信息: 分子没有现有构象 ({str(e)}),将尝试生成3D坐标")
- conformer_error = str(e)
- conf = None
-
- # 步骤2: 如果没有有效构象,尝试ETKDG方法生成(更高级的构象生成方法)
- if conf is None:
- try:
- print(f"尝试使用ETKDG方法为分子生成3D构象...")
- AllChem.EmbedMolecule(mol, AllChem.ETKDG())
- AllChem.UFFOptimizeMolecule(mol)
- conf = mol.GetConformer()
- if not conf.Is3D():
- raise ValueError("ETKDG方法生成的构象不是3D")
- print(f"ETKDG方法成功生成3D构象")
- except Exception as e:
- print(f"ETKDG方法失败: {str(e)},尝试基本嵌入方法...")
- conformer_error = f"{conformer_error} -> ETKDG失败: {str(e)}"
-
- # 步骤3: 如果ETKDG失败,尝试基本的嵌入方法
- if conf is None or not conf.Is3D():
- try:
- print(f"尝试使用基本嵌入方法生成3D构象...")
- # 先清除所有构象
- while mol.GetNumConformers() > 0:
- mol.RemoveConformer(0)
-
- # 尝试多次随机种子
- for seed in [42, 123, 987, 555]:
- ps = AllChem.ETKDGv3()
- ps.randomSeed = seed
- cid = AllChem.EmbedMolecule(mol, ps)
- if cid >= 0: # 成功嵌入
- conf = mol.GetConformer(cid)
- if conf.Is3D():
- print(f"基本嵌入方法在种子{seed}下成功")
- # 优化构象
- AllChem.UFFOptimizeMolecule(mol, confId=cid)
- break
-
- if conf is None or not conf.Is3D():
- raise ValueError("多次尝试后仍无法生成有效的3D构象")
- except Exception as e:
- print(f"所有嵌入方法均失败: {str(e)}")
- conformer_error = f"{conformer_error} -> 基本嵌入失败: {str(e)}"
- raise ValueError(f"无法生成3D构象: {conformer_error}")
-
- # 确认我们有有效的3D构象
- if conf is None or not conf.Is3D():
- raise ValueError("尝试所有方法后仍无法获得有效的3D构象")
-
- # 写入XYZ文件
- with open(file_path, 'w') as f:
- f.write(f"{mol.GetNumAtoms()}\n")
- comments = []
- if molecule_type:
- comments.append(f"Type: {molecule_type}")
- if smiles:
- comments.append(f"SMILES: {smiles}")
- f.write(f"{' | '.join(comments)}\n")
-
- for atom in mol.GetAtoms():
- pos = conf.GetAtomPosition(atom.GetIdx())
- f.write(f"{atom.GetSymbol()} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n")
-
- return True
- except Exception as e:
- print(f"警告: 无法将分子转换为XYZ格式: {e}")
- return False
- def smiles_to_xyz(smiles, file_path, molecule_type=None):
- """
- 从SMILES生成XYZ文件(备用方法)
- """
- if not smiles:
- return False
-
- try:
- mol = Chem.MolFromSmiles(smiles)
- if mol is None:
- raise ValueError("无效的SMILES字符串")
-
- return mol_to_xyz_with_smiles(mol, file_path, smiles, molecule_type)
- except Exception as e:
- print(f"警告: 无法从SMILES生成XYZ文件: {e}")
- return False
- def safe_write_xyz(mol, smiles, filename, output_dir, molecule_type):
- """
- 安全地生成XYZ文件,优先使用RDKit Mol对象,失败时尝试使用SMILES
-
- 返回:
- tuple: (文件名, 成功状态, 使用的方法)
- """
- file_path = os.path.join(output_dir, filename)
-
- # 检查文件是否已存在
- if os.path.exists(file_path):
- return filename, True, "existing"
-
- # 尝试使用RDKit Mol对象生成XYZ
- if mol is not None:
- success = mol_to_xyz_with_smiles(mol, file_path, smiles, molecule_type)
- if success:
- return filename, True, "mol"
-
- # 如果Mol对象失败或不存在,尝试使用SMILES
- if smiles:
- success = smiles_to_xyz(smiles, file_path, molecule_type)
- if success:
- return filename, True, "smiles"
-
- # 如果都失败了,返回失败信息
- return filename, False, "failed"
复制代码 这个是我原先写的,利用您的第三方的代码,适合放在我的代码的哪部分呢?出现失败的smile是带这TCO的结构,如CCCOCOC1CC/C=C\CCC1 |
|