diff --git a/utils/data_utils.py b/utils/data_utils.py
index 1854ef0..4acdd98 100755
--- a/utils/data_utils.py
+++ b/utils/data_utils.py
@@ -6,7 +6,7 @@ import os
 import h5py
 import plyfile
 import numpy as np
-from matplotlib import cm
+#from matplotlib import cm
 import scipy.spatial.distance as distance
 
 
diff --git a/utils/model.py b/utils/model.py
index 4f66ad8..404f118 100755
--- a/utils/model.py
+++ b/utils/model.py
@@ -66,7 +66,7 @@ class XConv(nn.Module):
             out_channels = C_out,
             kernel_size = (1, K),
             depth_multiplier = depth_multiplier
-        )).cuda()
+        ))
         
     def forward(self, x : Tuple[UFloatTensor,            # (N, P, dims)
                                 UFloatTensor,            # (N, P, K, dims)
@@ -197,7 +197,7 @@ class PointCNN(nn.Module):
         fts = self.dense(fts) if fts is not None else fts
 
         # This step takes ~97% of the time. Prime target for optimization: KNN on GPU.
-        pts_idx = self.r_indices_func(rep_pts.cpu(), pts.cpu()).cuda()
+        pts_idx = self.r_indices_func(rep_pts.cpu(), pts.cpu())
         # -------------------------------------------------------------------------- #
 
         pts_regional = self.select_region(pts, pts_idx)
diff --git a/utils/util_funcs.py b/utils/util_funcs.py
index 2183670..972c31f 100755
--- a/utils/util_funcs.py
+++ b/utils/util_funcs.py
@@ -56,8 +56,13 @@ def knn_indices_func_gpu(rep_pts : cuda.FloatTensor,  # (N, pts, dim)
         ref = pts[n]
         n, d = ref.size()
         m, d = qry.size()
+        d=3
         mref = ref.expand(m, n, d)
-        mqry = qry.expand(n, m, d).transpose(0, 1)
+        #mqry = qry.expand(n, m, d).transpose(0, 1)
+        if m==n:
+            mqry = qry.reshape(n,1,d).repeat((1,m,1))
+        else:
+            mqry = qry.expand(n, m, d).transpose(0, 1)
         dist2 = torch.sum((mqry - mref)**2, 2).squeeze()
         _, inds = torch.topk(dist2, k*d + 1, dim = 1, largest = False)
         region_idx.append(inds[:,1::d])