import os
from math import sqrt
import numpy
import torch
from Bio.PDB import PDBParser
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda")
class P450Dataset(Dataset):
def __init__(self, testp450, transform=None, target_transform=None):
# 处理pdb数据
path = './testp450'
arr = []
max_num = 0
index = 0
self.data = []
# 遍历文件夹下的pdb文件名
for filename in os.listdir('testp450'):
p = PDBParser()
struct_id = "1fat"
filename = path + '/' + filename
structure = p.get_structure(struct_id, filename)
atoms = structure.get_atoms()
atoms = list(atoms)
# 获得一个结构中的原子总数
atom_num = 0
for atom in atoms:
atom_num = atom_num + 1
print(atom_num)
# arr.append(atom_num)
# max_num = max(arr)
# print(max_num)
# 计算距离矩阵
i = 0
n = numpy.zeros(shape=(1, 1))
a = numpy.zeros(shape=(atom_num ** 2, 1))
b = numpy.zeros(shape=(atom_num, atom_num))
# 快速遍历一个结构中的所有原子
for atom in atoms:
for ato in atoms:
n = sqrt((atom.get_coord()[0] - ato.get_coord()[0]) ** 2 +
(atom.get_coord()[1] - ato.get_coord()[1]) ** 2 +
(atom.get_coord()[2] - ato.get_coord()[2]) ** 2)
a[i] = n
i = i + 1
# 创建距离矩阵
m = 0
for p in range(0, atom_num):
for q in range(0, atom_num):
b[p][q] = a[m]
m = m + 1
# print(b)
# 把所有数据padding到同一大小,计算剩余需要填补
padding_num = 4996 - atom_num
# b = torch.tensor(b, dtype=torch.float32)
b = numpy.pad(b, (0, padding_num), 'constant')
b = torch.tensor(b, dtype=torch.float32)
# b = torch.reshape(b, (1, 5000, 5000))
b = b[numpy.newaxis, :, :]
print(b)
print(b.shape)
# 首先读取数据,计算残基距离矩阵
# 加载所有数据,处理成相应的格式,
self.data.append(b)
print(self.data)
self.testp450 = testp450
def __len__(self):
# 返回数据集长度,(有多少数据)
return len(self.data)
def __getitem__(self, item):
return self.data[item]
num_epochs = 1000
batch_size = 2
learning_rate = 1e-3
total_train_step = 0
model = autoencoder()
model.to(device)
criterion = nn.MSELoss()
criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
dataset = P450Dataset(testp450="testp450")
dataloader = DataLoader(dataset, batch_size, shuffle=True)
writer = SummaryWriter("./logs_testp450")
for epoch in range(num_epochs):
for data in dataloader:
img = data
img = img.to(device)
# print(img.shape)
# ===================forward=====================
output = model(img)
# print(output.shape)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss1000", loss.item(), total_train_step)
writer.close()
Original: https://blog.csdn.net/qq_40223341/article/details/121577188
Author: 小新壹鸢
Title: 处理pdb文件
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/761448/
转载文章受原作者版权保护。转载请注明原作者出处!