-
Notifications
You must be signed in to change notification settings - Fork 0
/
cur_replace.py
102 lines (75 loc) · 2.53 KB
/
cur_replace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Assignment #2
Project: To implement both SVD and CUR Matrix decomposition algorithms and
compare their efficency (in terms of space, time etc.)
Instructor: Dr. Aruna Malapati
Contributors: G V Sandeep 2014A7PS106H
Kushagra Agrawal 2014AAPS334H
Snehal Wadhwani 2014A7PS430H
Course: CS F469 Information Retrieval
"""
import numpy as np
import math
import time
from numpy import linalg as LA
from svd import svd
from common import handle_input, calc_error
def calc_length(matrix):
"""
The Square sum of the values of the matrix
"""
total = 0
for i in xrange(len(matrix)):
for j in xrange(len(matrix[i])):
total = total + matrix[i][j]**2
return total
def selection(matrix):
"""
Input : The matrix of which we have to select random rows
Output : Randomly selected rows based on the calculated probability distribution. These rows are scaled so as to compensate the fact that they sometimes are selected multiple times. It also returns the indexes of the selected rows
No of rows selected = rank of the matrix
Seed for random number generator : 117
"""
matrix = matrix.tolist()
rows_selected = []
prob = [0]*len(matrix)
total = calc_length(matrix)
compact_matrix = []
np.random.seed(117)
for i in xrange(len(matrix)):
row = 0.0
for j in xrange(len(matrix[i])):
row = row + matrix[i][j]**2
prob[i] = row/total
rank = LA.matrix_rank(matrix)
rows_selected = np.random.choice(len(matrix), rank, p=prob)
for j in xrange(len(rows_selected)):
compact_matrix.append(matrix[rows_selected[j]])
factor = (rank*prob[rows_selected[j]])**0.5
for k in matrix[j]:
k = k / factor
return compact_matrix,rows_selected
ratings = handle_input("ratings.txt")
start_time = time.time()
R,rows_selected = selection(ratings)
C,columns_selected = selection(ratings.T)
intersection = np.zeros((len(rows_selected),len(columns_selected)))
for i in xrange(len(rows_selected)):
for j in xrange(len(columns_selected)):
intersection[i][j] = ratings[rows_selected[i]][columns_selected[j]]
u,sigma,v = svd(intersection)
u = u.T
v = v.T
for j in xrange(len(sigma)):
if abs(sigma[j][j])!=0:
sigma[j][j] = 1/ sigma[j][j]
intersection = np.dot(v,np.dot(sigma,u))
C = np.matrix(C)
C = C.T
R = np.matrix(R)
final_matrix = np.dot(C,np.dot(intersection,R))
print final_matrix
final_error = calc_error(ratings, final_matrix.tolist())
print "\nFrobenius Error Of the CUR Decomposition is : ",final_error
print " \n************* Execution Time : ************* "
print "--- %s seconds ---" %(time.time() - start_time)