Skip to content

Commit

Permalink
Fixed loader problems
Browse files Browse the repository at this point in the history
  • Loading branch information
robertlayton committed Aug 23, 2011
1 parent 8811af0 commit 1bc5e57
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 19 deletions.
3 changes: 1 addition & 2 deletions MANIFEST.in
Expand Up @@ -4,5 +4,4 @@ include scikits/__init__.py
recursive-include doc *
recursive-include examples *
recursive-include scikits *.c *.h *.pyx
recursive-include scikits/learn/datasets *.csv *.csv.gz *.TXT *.rst
recursive-include scikits/learn/datasets/images *.jpg *.txt
recursive-include scikits/learn/datasets *.csv *.csv.gz *.TXT *.rst *.jpg *.txt
30 changes: 14 additions & 16 deletions examples/cluster/vq_china.py
Expand Up @@ -8,25 +8,22 @@
number of colors required to show the image.
"""
print __doc__

import os
import numpy as np
import pylab as pl
from scikits.learn.cluster import KMeans
from scikits.learn.datasets import load_sample_images
# Try to import Image and imresize from PIL. We do this here to prevent
# this module from depending on PIL.
try:
try:
from scipy.misc import Image
except ImportError:
from scipy.misc.pilutil import Image
except ImportError:
raise ImportError("The Python Imaging Library (PIL)"
"is required to load data from jpeg files")

# Get all sample images and obtain just china.jpg
sample_image_name = "china.jpg"
sample_images = load_sample_images()
index = sample_images.filenames.index("china.jpg")
index = None
for i, filename in enumerate(sample_images.filenames):
if filename.endswith(sample_image_name):
index = i
break
if index is None:
raise AttributeError("Cannot find sample image: %s" % sample_image_name)
image_data = sample_images.images[index]

# Load Image and transform to a 2D numpy array.
Expand All @@ -36,13 +33,13 @@
# Take a sample of the data.
sample_indices = range(len(image_array))
np.random.shuffle(sample_indices)
sample_indices = sample_indices[:int(len(image_array) * 0.5)]
sample_indices = sample_indices[:int(len(image_array) * 0.2)]
sample_data = image_array[sample_indices]

# Perform Vector Quantisation with 256 clusters.
k = 256
kmeans = KMeans(k=k)
kmeans.fit(image_array)
kmeans.fit(sample_data)
# Get labels for all points
labels = kmeans.predict(image_array)
# Save the reduced dataset. Only the centroids and labels need to be saved.
Expand All @@ -56,8 +53,9 @@ def recreate_image(centroids, labels, w, h):
for i in range(w):
for j in range(h):
image[i][j] = centroids[labels[label_num]]
print labels[label_num], label_num
label_num += 1
print np.histogram(labels)
print set(labels)
return image

# Display all results, alongside original image
Expand All @@ -67,5 +65,5 @@ def recreate_image(centroids, labels, w, h):
centroids, labels = reduced_image
im = pl.imshow(recreate_image(centroids, labels, w, h))

show()
pl.show()

14 changes: 13 additions & 1 deletion scikits/learn/datasets/base.py
Expand Up @@ -7,6 +7,7 @@
# 2010 Olivier Grisel <olivier.grisel@ensta.org>
# License: Simplified BSD

import os
import csv
import shutil
import textwrap
Expand Down Expand Up @@ -372,9 +373,20 @@ def load_sample_images():
>>> # pl.matshow(images.images[0]) # Visualize the first image
>>> # pl.show()
"""
# Try to import Image and imresize from PIL. We do this here to prevent
# this module from depending on PIL.
try:
try:
from scipy.misc import Image
except ImportError:
from scipy.misc.pilutil import Image
except ImportError:
raise ImportError("The Python Imaging Library (PIL)"
"is required to load data from jpeg files")
module_path = join(dirname(__file__), "images")
descr = open(join(module_path, 'README.txt')).read()
filenames = [filename for filename in os.listdir(module_path)
filenames = [join(module_path, filename)
for filename in os.listdir(module_path)
if filename.endswith(".jpg")]
# Load image data for each image in the source folder.
images = [np.asarray(Image.open(filename))
Expand Down
1 change: 1 addition & 0 deletions scikits/learn/datasets/setup.py
Expand Up @@ -7,6 +7,7 @@ def configuration(parent_package='', top_path=None):
config = Configuration('datasets', parent_package, top_path)
config.add_data_dir('data')
config.add_data_dir('descr')
config.add_data_dir('images')


config.add_extension('_svmlight_format',
Expand Down

0 comments on commit 1bc5e57

Please sign in to comment.