python的wordcloud函数_Python:wordcloud.wordcloud()函数的参数解析及其说明

本文详细解析了Python中`wordcloud.wordcloud()`函数的参数及其用法,包括字体路径、宽度、高度、颜色映射等设置,以及如何从频率字典生成词云。通过这个函数,可以快速创建自定义的词云图。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

def __init__(self, font_path=None, width=400, height=200,

margin=2,

ranks_only=None, prefer_horizontal=.9, mask=None, scale=1,

color_func=None, max_words=200, min_font_size=4,

stopwords=None, random_state=None,

background_color='black',

max_font_size=None, font_step=1, mode="RGB",

relative_scaling=.5, regexp=None, collocations=True,

colormap=None, normalize_plurals=True):

if font_path is None:

font_path = FONT_PATH

if color_func is None and colormap is None:

# we need a color map

import matplotlib

version = matplotlib.__version__

if version[0] < "2" and version[2] < "5":

colormap = "hsv"

else:

colormap = "viridis"

self.colormap = colormap

self.collocations = collocations

self.font_path = font_path

self.width = width

self.height = height

self.margin = margin

self.prefer_horizontal = prefer_horizontal

self.mask = mask

self.scale = scale

self.color_func = color_func or colormap_color_func(colormap)

self.max_words = max_words

self.stopwords = stopwords if stopwords is not None else

STOPWORDS

self.min_font_size = min_font_size

self.font_step = font_step

self.regexp = regexp

if isinstance(random_state, int):

random_state = Random(random_state)

self.random_state = random_state

self.background_color = background_color

self.max_font_size = max_font_size

self.mode = mode

if relative_scaling < 0 or relative_scaling > 1:

raise ValueError(

"relative_scaling needs to be "

"between 0 and 1, got %f." %

relative_scaling)

self.relative_scaling = relative_scaling

if ranks_only is not None:

warnings.warn("ranks_only is deprecated and will be

removed as"

" it had no effect. Look into relative_scaling.",

DeprecationWarning)

self.normalize_plurals = normalize_plurals

def fit_words(self, frequencies):

"""Create a word_cloud from words and frequencies.

Alias to generate_from_frequencies.

Parameters

----------

frequencies : dict from string to float

A contains words and associated frequency.

Returns

-------

self

"""

return self.generate_from_frequencies(frequencies)

def generate_from_frequencies(self, frequencies,

max_font_size=None):

"""Create a word_cloud from words and frequencies. Parameters

----------

frequencies : dict from string to float

A contains words and associated frequency.

max_font_size : int

Use this font-size instead of self.max_font_size

Returns

-------

self

"""

# make sure frequencies are sorted and normalized

frequencies = sorted(frequencies.items(), key=itemgetter(1),

reverse=True)

if len(frequencies) <= 0:

raise ValueError("We need at least 1 word to plot a word

cloud, "

"got %d." %

len(frequencies))

frequencies = frequencies[:self.max_words] # largest entry will

be 1

max_frequency = float(frequencies[0][1])

frequencies = [(word, freq / max_frequency) for

word, freq in frequencies]

if self.random_state is not None:

random_state = self.random_state

else:

random_state = Random()

if self.mask is not None:

mask = self.mask

width = mask.shape[1]

height = mask.shape[0]

if mask.dtype.kind == 'f':

warnings.warn("mask image should be unsigned byte

between 0"

" and 255. Got a float array")

if mask.ndim == 2:

boolean_mask = mask == 255

elif mask.ndim == 3: # if all channels are white, mask out

:::3]255, axis=-1)

else:

boolean_mask = np.all(mask[ ==

raise ValueError("Got mask of invalid shape: %s" %

str(mask.shape))

else:

boolean_mask = None

height, width = self.height, self.width

occupancy = IntegralOccupancyMap(height, width,

boolean_mask)

# create image

img_grey = Image.new("L", (width, height))

draw = ImageDraw.Draw(img_grey)

img_array = np.asarray(img_grey)

font_sizes, positions, orientations, colors = [], [], [], []

last_freq = 1.

if max_font_size is None:

# if not provided use default font_size

max_font_size = self.max_font_size

if max_font_size is None:

# figure out a good font size by trying to draw with

# just the first two words

if len(frequencies) == 1:

# we only have one word. We make it big!

font_size = self.height

else:

self.generate_from_frequencies(dict(frequencies[:2]),

max_font_size=self.height)

# find font sizes

sizes = [x[1] for x in self.layout_]

try:

font_size = int(2 * sizes[0] * sizes[1] /

(sizes[0] + sizes[1]))

# quick fix for if self.layout_ contains less than 2 values

# on very small images it can be empty

except IndexError:

try:

font_size = sizes[0]

except IndexError:

raise ValueError('canvas size is too small')

else:

font_size = max_font_size

# we set self.words_ here because we called

generate_from_frequencies

# above... hurray for good design?

self.words_ = dict(frequencies)

# start drawing grey image

for word, freq in frequencies:

# select the font size

rs = self.relative_scaling

if rs != 0:

font_size = int(round((rs * (freq / float(last_freq)) +

(1 - rs)) * font_size))

if random_state.random() < self.prefer_horizontal:

orientation = None

else:

orientation = Image.ROTATE_90

tried_other_orientation = False

while True:

# try to find a position

font = ImageFont.truetype(self.font_path, font_size)

# transpose font optionally

transposed_font = ImageFont.TransposedFont(

font, orientation=orientation)

# get size of resulting text

box_size = draw.textsize(word, font=transposed_font)

# find possible places using integral image:

result = occupancy.sample_position(box_size[1] + self.

margin,

box_size[0] + self.margin,

random_state)

if result is not None or font_size < self.min_font_size:

# either we found a place or font-size went too small

break

# if we didn't find a place, make font smaller

# but first try to rotate!

if not tried_other_orientation and self.prefer_horizontal                  1:

orientation = Image.ROTATE_90 if orientation is None

else Image.ROTATE_90

tried_other_orientation = True

else:

font_size -= self.font_step

orientation = None

if font_size < self.min_font_size:

# we were unable to draw any more

break

x, y = np.array(result) + self.margin // 2

# actually draw the text

draw.text((y, x), word, fill="white", font=transposed_font)

positions.append((x, y))

orientations.append(orientation)

font_sizes.append(font_size)

colors.append(self.color_func(word, font_size=font_size,

position=(x, y),

orientation=orientation,

random_state=random_state,

font_path=self.font_path))

# recompute integral image

if self.mask is None:

img_array = np.asarray(img_grey)

else:

img_array = np.asarray(img_grey) + boolean_mask

# recompute bottom right

# the order of the cumsum's is important for speed ?!

occupancy.update(img_array, x, y)

last_freq = freq

self.layout_ = list(zip(frequencies, font_sizes, positions,

orientations, colors))

return self

def process_text(self, text):

"""Splits a long text into words, eliminates the stopwords.

Parameters

----------

text : string

The text to be processed.

Returns

-------

words : dict (string, int)

Word tokens with associated frequency.

..versionchanged:: 1.2.2

Changed return type from list of tuples to dict.

Notes

-----

There are better ways to do word tokenization, but I don't

want to

include all those things.

"""

stopwords = set([i.lower() for i in self.stopwords])

flags = re.UNICODE if sys.version < '3' and type(text) is unicode

else 0

regexp = self.regexp if self.regexp is not None else r"\w[\w']+"

words = re.findall(regexp, text, flags)

# remove stopwords

words = [word for word in words if word.lower() not in

stopwords]

# remove 's

words = [word[:-2] if word.lower().endswith("'s") else word for

word in words]

# remove numbers

words = [word for word in words if not word.isdigit()]

if self.collocations:

word_counts = unigrams_and_bigrams(words, self.

normalize_plurals)

else:

word_counts, _ = process_tokens(words, self.

normalize_plurals)

return word_counts

def generate_from_text(self, text):

"""Generate wordcloud from text.

The input "text" is expected to be a natural text. If you pass a

sorted

list of words, words will appear in your output twice. To

remove this

duplication, set ``collocations=False``.

Calls process_text and generate_from_frequencies.

..versionchanged:: 1.2.2

Argument of generate_from_frequencies() is not return of

process_text() any more.

Returns

-------

self

"""

words = self.process_text(text)

self.generate_from_frequencies(words)

return self

def generate(self, text):

"""Generate wordcloud from text.

The input "text" is expected to be a natural text. If you pass a

sorted

list of words, words will appear in your output twice. To

remove this

duplication, set ``collocations=False``.

Alias to generate_from_text.

Calls process_text and generate_from_frequencies.

Returns

-------

self

"""

return self.generate_from_text(text)

def _check_generated(self):

"""Check if ``layout_`` was computed, otherwise raise error."""

if not hasattr(self, "layout_"):

raise ValueError("WordCloud has not been calculated, call

generate"

" first.")

def to_image(self):

self._check_generated()

if self.mask is not None:

width = self.mask.shape[1]

height = self.mask.shape[0]

else:

height, width = self.height, self.width

img = Image.new(self.mode, (int(width * self.scale),

int(height * self.scale)),

self.background_color)

draw = ImageDraw.Draw(img)

for (word, count), font_size, position, orientation, color in self.

layout_:

font = ImageFont.truetype(self.font_path,

int(font_size * self.scale))

transposed_font = ImageFont.TransposedFont(

font, orientation=orientation)

pos = int(position[1] * self.scale), int(position[0] * self.scale)

draw.text(pos, word, fill=color, font=transposed_font)

return img

def recolor(self, random_state=None, color_func=None,

colormap=None):

"""Recolor existing layout.

Applying a new coloring is much faster than generating the

whole

wordcloud.

Parameters

----------

random_state : RandomState, int, or None, default=None

If not None, a fixed random state is used. If an int is given,

this

is used as seed for a random.Random state.

color_func : function or None, default=None

Function to generate new color from word count, font size,

position

and orientation.  If None, self.color_func is used.

colormap : string or matplotlib colormap, default=None

Use this colormap to generate new colors. Ignored if

color_func

is specified. If None, self.color_func (or self.color_map) is

used.

Returns

-------

self

"""

if isinstance(random_state, int):

random_state = Random(random_state)

self._check_generated()

if color_func is None:

if colormap is None:

color_func = self.color_func

else:

color_func = colormap_color_func(colormap)

self.layout_ = [(word_freq, font_size, position, orientation,

color_func(word=word_freq[0], font_size=font_size,

position=position, orientation=orientation,

random_state=random_state,

font_path=self.font_path)) for

word_freq, font_size, position, orientation, _ in

self.layout_]

return self

def to_file(self, filename):

"""Export to image file.

Parameters

----------

filename : string

Location to write to.

Returns

-------

self

"""

img = self.to_image()

img.save(filename, optimize=True)

return self

def to_array(self):

"""Convert to numpy array.

Returns

-------

image : nd-array size (width, height, 3)

Word cloud image as numpy matrix.

"""

return np.array(self.to_image())

def __array__(self):

"""Convert to numpy array.

Returns

-------

image : nd-array size (width, height, 3)

Word cloud image as numpy matrix.

"""

return self.to_array()

def to_html(self):

raise NotImplementedError("FIXME!!!")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值