import numpy as np
import torch
import matplotlib.pyplot as plt
import pytransform3d.transformations as pt


# 텐서로 이루어진 픽셀 좌표계 만들어보기
W = 6
H = 4
# torch.linspace(0,W-1,W): 0, 1, 2, ... W-1
# 0부터 W-1까지 W개의 1D 텐서 (W = 5: [0, 1, 2, 3, 4])
i, j = torch.meshgrid(torch.linspace(0,W-1,W), torch.linspace(0, H-1, H))
# 두 개의 1D 텐서를 받아, 2D 격자 좌표 i, j를 생성.
# 첫 번째 인자(가로축), 두 번째 인자(세로축)에 대응하는 모든 조합의 좌표를 만들어낸다.
# 여기서는 (x, y) 대신 (i, j)를 두 변수로 받고 있음.
# 결과적으로 i, j는 shape가 (W, H) 또는 (H, W) 형태의 2D 텐서.
# 주의: PyTorch torch.meshgrid의 인자 순서에 따라, 결과 텐서의 shape 순서가 (rows, cols) 혹은 (cols, rows)가 달라질 수 있으니, 보통은 (y, x) 순서로 호출하는 관습이 많습니다.

print(i)
print(j)

# i와 j를 전치해서, (W×H) 격자의 축을 원하는 대로 맞추는 과정.
i = i.t()
j = j.t()

print(i) # x좌표
print(j) # y좌표

grid = torch.stack([i, j], -1)
# 텐서 i, j를 마지막 차원에 새 축으로 쌓음.
# 즉, 각 픽셀 위치마다 (i, j) 두 좌표를 [i, j] 형태로 묶어, shape가 (H, W, 2)인 텐서가 됩니다.
# 예: grid[y, x] = [i_value, j_value].
print(grid[0]) # y = 0인 픽셀 좌표
print(grid[1]) # y = 1인 픽셀 좌표
print(grid[0][1]) # (x, y) = (0, 1)

tensor([[0., 0., 0., 0.],
        [1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.]])
tensor([[0., 1., 2., 3.],
        [0., 1., 2., 3.],
        [0., 1., 2., 3.],
        [0., 1., 2., 3.],
        [0., 1., 2., 3.],
        [0., 1., 2., 3.]])
tensor([[0., 1., 2., 3., 4., 5.],
        [0., 1., 2., 3., 4., 5.],
        [0., 1., 2., 3., 4., 5.],
        [0., 1., 2., 3., 4., 5.]])
tensor([[0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.],
        [2., 2., 2., 2., 2., 2.],
        [3., 3., 3., 3., 3., 3.]])
tensor([[0., 0.],
        [1., 0.],
        [2., 0.],
        [3., 0.],
        [4., 0.],
        [5., 0.]])
tensor([[0., 1.],
        [1., 1.],
        [2., 1.],
        [3., 1.],
        [4., 1.],
        [5., 1.]])
tensor([1., 0.])


# Ray helpers
def get_rays(H, W, K, c2w):
    i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H))
    i = i.t()
    j = j.t()
    # Image Coordinate to Normalized Coordinate
    # 이 때, NeRF는 위의 이미지와 다르게 Normalized Plane이 Z = -1인 곳에 존재한다
    # 따라서 y값에 전체 -를 해주는 것, z 좌표는 -1인 것. -> -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1
    dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1) # (H, W, 3)
    
    rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) # c2w @ dir  c2w.dot(dir) dot product, equals to: [c2w.dot(dir) for dir in dirs]
    # Translate camera frame's origin to the world frame. It is the origin of all rays.
    # dirs[..., np.newaxis, :] → shape이 (H,W,1,3).
    # c2w[:3,:3] → shape이 (3,3).
    # 곱셈에서 두 텐서가 “(H, W, 1, 3) × (3, 3)”으로 브로드캐스트되어,
    # 사실상 dirs×R 연산이 이루어짐.
    # 그 뒤 np.sum(..., -1)로 마지막 축(3)을 합쳐서 (H,W,3) 결과를 얻음.
    # 이는 “원소별 곱 + 합” = “행렬 곱” 과 동일한 효과.

    rays_o = c2w[:3, -1].expand(rays_d.shape)
    # c2w[:3, -1]는 3D 벡터(카메라의 월드 좌표 원점).
    # expand(rays_d.shape)로 shape (H,W,3)로 늘림:
    # 결국 모든 픽셀에서의 광선 시작점(origin)은 동일한 카메라 위치를 공유.
    # 따라서 rays_o[y, x] = 카메라 원점(월드 좌표)

    # rays_o: shape (H, W, 3), 모든 픽셀이 공유하는 카메라 원점 위치
    # rays_d: shape (H, W, 3), 각 픽셀별 광선 방향
    
    return rays_o, rays_d

def get_rays_np(H, W, K, c2w):
    i, j = np.meshgrid(np.arange(W, dtype=np.float32), np.arange(H, dtype=np.float32), indexing='xy')
    dirs = np.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -np.ones_like(i)], -1)
    # Rotate ray directions from camera frame to the world frame
    rays_d = np.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
    # Translate camera frame's origin to the world frame. It is the origin of all rays.
    rays_o = np.broadcast_to(c2w[:3,-1], np.shape(rays_d))
    return rays_o, rays_d


def visualize_image_plane_3d(H, W, K, sample_points=None):
    """
    - 이미지 평면: z=f (f=fx), (x=i, y=j)
    - 노멀 평면: z=-1, (x=(i-cx)/fx, y=-(j-cy)/fy)
    
    - 두 평면 모두 로컬 카메라 좌표계에서만 그림(no c2w).
    - sample_points: [(i1,j1), (i2,j2), ...] 형태로 
      특정 픽셀 좌표를 골라 대응점을 연결해준다.
    """

    if sample_points is None:
        # 디폴트로는 네 구석 픽셀 + 중앙 픽셀 정도만 예시로 표시
        sample_points = [
            (0, 0),
            (W-1, 0),
            (0, H-1),
            (W-1, H-1),
            (int(W//2), int(H//2))
        ]

    # 이미지 플레인의 좌표 지정
    i, j = torch.meshgrid(
        torch.linspace(0, W-1, W),
        torch.linspace(0, H-1, H)
    )
    i = i.t()
    j = j.t()

    img_plane_x = i
    img_plane_y = j
    img_plane_z = torch.full_like(i, K[0][0]) # z는 focal length x로 가정 (어차피 focal length x와 focal length y는 동일한 값이므로)
    img_plane = torch.stack([img_plane_x, img_plane_y, img_plane_z], dim=-1)  # (H,W,3)
    img_plane_flat = img_plane.view(-1, 3).cpu().numpy()

    # 노멀 플레인의 좌표 지정
    dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1)
    dirs_flat = dirs.view(-1, 3).cpu().numpy()

    print(dirs.shape)

    # --- 시각화 시작 ---
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111, projection='3d')

    # (A) 카메라 좌표축 표시
    pt.plot_transform(ax=ax, A2B=np.eye(4), s=0.3, name="CameraCoord")

    # (B) 이미지 평면 산점도 (빨강)
    ax.scatter(img_plane_flat[:, 0],
               img_plane_flat[:, 1],
               img_plane_flat[:, 2],
               c='red', s=5, alpha=0.6, label="Image plane (z=f)")

    # (C) 노멀 평면 산점도 (파랑)
    ax.scatter(dirs_flat[:, 0],
               dirs_flat[:, 1],
               dirs_flat[:, 2],
               c='blue', s=5, alpha=0.6, label="Normal plane (z=-1)")

    # (D) 대응점 표시 및 직선 연결
    for (i_p, j_p) in sample_points:
        # 이미지 평면에서의 3D 좌표
        # z=f, x=i, y=j
        img_x = i_p
        img_y = j_p
        img_z = fx
        # 노멀 평면에서의 3D 좌표
        nor_x = (i_p - cx)/fx
        nor_y = -(j_p - cy)/fy
        nor_z = -1

        # 직선으로 연결
        Xs = [img_x, nor_x]
        Ys = [img_y, nor_y]
        Zs = [img_z, nor_z]
        ax.plot(Xs, Ys, Zs, c='black', alpha=0.8)

        # 텍스트 라벨 (예: "(0,0)->(-cx/f, -cy/f,-1)")
        ax.text(img_x, img_y, img_z,
                f"({i_p},{j_p})",
                color='red')
        ax.text(nor_x, nor_y, nor_z,
                f"[{nor_x:.2f},{nor_y:.2f},{nor_z:.2f}]",
                color='blue')

    # 보기 편하게 범위 설정
    # max_val = max(W, H, fx.item()+5)  # 대충 넉넉히
    # ax.set_xlim([-max_val, max_val])
    # ax.set_ylim([-max_val, max_val])
    # ax.set_zlim([-max_val, max_val])
    # ax.set_box_aspect((1,1,1))

    ax.view_init(elev=0, azim=0)
    ax.legend()
    plt.title("Image plane vs. Normal plane with sample correspondences")
    plt.show()


# --------------------------
# 사용 예시
# --------------------------
if __name__ == "__main__":
    import torch

    H, W = 4, 6
    fx = 2.0
    fy = 2.0
    cx = W/2
    cy = H/2
    K = torch.tensor([
        [fx, 0,  cx],
        [0,  fy, cy],
        [0,  0,   1 ]
    ], dtype=torch.float32)

    # 원하는 (i,j) 좌표를 골라 연결
    sample_pts = [(0, 0), (0, 1), (0, 2), (0, 3), (5, 0), (5, 1), (5, 2), (5, 3)]
    # sample_pts = [(0,0), (W-1,0), (0,H-1), (W-1,H-1), (2,2)]

    visualize_image_plane_3d(H, W, K, sample_points=sample_pts)

torch.Size([4, 6, 3])


# [-n, ~)에 존재하는 X, Y, Z 좌표를 [-1, 1] 사이로 정규화
def ndc_rays(H, W, focal, near, rays_o, rays_d):
    # Shift ray origins to near plane
    t = -(near + rays_o[...,2]) / rays_d[...,2]
    rays_o = rays_o + t[...,None] * rays_d
    
    # Projectionㅌ
    o0 = -1./(W/(2.*focal)) * rays_o[...,0] / rays_o[...,2]
    o1 = -1./(H/(2.*focal)) * rays_o[...,1] / rays_o[...,2]
    o2 = 1. + 2. * near / rays_o[...,2]

    d0 = -1./(W/(2.*focal)) * (rays_d[...,0]/rays_d[...,2] - rays_o[...,0]/rays_o[...,2])
    d1 = -1./(H/(2.*focal)) * (rays_d[...,1]/rays_d[...,2] - rays_o[...,1]/rays_o[...,2])
    d2 = -2. * near / rays_o[...,2]
    
    rays_o = torch.stack([o0,o1,o2], -1)
    rays_d = torch.stack([d0,d1,d2], -1)
    
    return rays_o, rays_d

3. NeRF Network (0)	2025.03.24
1. Camera Matrix (0)	2025.03.24

일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

Fluffy Shark Studio

티스토리 뷰

2. Calculating Rays

[2] Calculating Rays¶

1. 픽셀 좌표와 월드 좌표 대응하기¶

1) 픽셀 좌표 $(x,y,1)$ → 정규 좌표 $(u,v,1)$¶

(a) 수식¶

(b) 역변환¶

2) 정규 좌표 $(u,v,1)$ → 카메라 좌표 $(x_c,y_c,z_c)$¶

(a) 스칼라 $z_c$ 곱¶

3) 카메라 좌표 $X_c = (x_c,y_c,z_c)$ → 월드 좌표 $X_w = (x_w,y_w,z_w)$¶

(a) 카메라 외부행렬¶

(b) 카메라 외부 행렬의 역행렬¶

4) 전체 과정 수식 요약¶

5) 픽셀 좌표계와 정규 좌표계 가시화¶

2. Normalized Device Coordinates(NDC) 개념¶

1) 배경: Forward-facing 카메라와 NDC¶

2) 수식 유도 개괄¶

3) 항목별 해석¶

(a) $-\dfrac{f_\mathrm{cam}}{W/2}\,\dfrac{o_x}{o_z}$¶

(b) $1 + \dfrac{2\,n}{o_z}$¶

(c) $\dfrac{d_x}{d_z} - \dfrac{o_x}{o_z}$¶

4) 해석: 왜 이런 변환을 쓰는가?¶

5) 그림으로 요약¶

'코드 분석 > NeRF' 카테고리의 다른 글

티스토리툴바