python贪心算法_[Python算法实现]贪心算法

该博客介绍了如何生成包含性别、年龄和薪水的随机数据集,并定义了平方误差函数。接着,它实现了一个搜索算法,目标是找到一组行,使得选定行的平均年龄在指定范围内,总薪水也在特定区间内,最小化均方误差。最后,展示了算法的运行结果和所选数据的平均年龄及总薪水。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# -*- coding: utf-8 -*-

"""Created on Tue Oct 31 20:02:11 2017@author: liuzimu"""

import pandas as pd

import random, time

import numpy as np

"""1. Establish a random datasetThe dataset contains the following columns:Gender: randomly generate 'male' or 'female'Age: random integers between 22 and 65Salary: random integers between 3000 and 10000"""

n_row = 1000

random.seed(50)

# create a series of gender

gender = pd.Series([random.choice(['male','female']) for i in range(n_row)])

# create a series of age

age_low = 22

age_high = 65

age = pd.Series([random.randint(age_low, age_high) for i in range(n_row)])

# create a series of salary

salary_low = 3000

salary_high = 10000

salary = pd.Series([random.randint(salary_low, salary_high) for i in range(n_row)])

# create a dataframe by gender and salary

df = pd.DataFrame({"gender": gender,"age": age, "salary": salary})

df.head()

"""2. Create a dictionary of strings and their corresponding functions"average": numpy.mean function"sum": numpy.sum function"""

def str2func(x):

func_dict = {"average": np.mean, "sum": np.sum}

return func_dict[x]

"""3. Calculate square errorSuppose we want the variable x to fall on the [a, b] interval,then the calculation of the squared error is as followed:if x in [a, b], then SE=0if x > b, then SE=(x/b−1)^2if x < a,then SE=(1−x/a)^2"""

def get_se(x, rng):

a, b = rng

if a <= x <= b:

res = 0

elif x > b:

# Normalization

res = (x / b - 1) ** 2

else:

res = (1 - x / a) ** 2

return res

"""4. Calculate mean-square error"""

def get_mse(data, rows, cols, funcs, rngs, n_cond):

mse = 0.0

for col, func, rng in zip(cols, funcs, rngs):

se = func(data.loc[rows == 1, col])

se = get_se(se, rng)

mse += se / n_cond

return mse

"""5. Search functionThe variable rows is something like [1, 1, 0, 1, 0, 0, 0..., 0, 1, 0, 0, 0],in which 1 means the row number is selected.Set the mse and min_mse as "infinite" initially to make the code more elegant.a. Create an index array with n zeros.b. Calculate the mse of indexes which are zeros.c. Record the minimum mse as min_mse during step 2,and set the corresponding index as one.d. Compare the mse and min_mse then update the value of mse.e. Break the iteration if the mse cannot be lower anymore."""

def search(data, cols, funcs, rngs, threshold=10e-6):

n_row = data.shape[0]

n_cond = len(cols)

# create a series to show which rows are selected

rows = pd.Series(np.zeros(n_row, dtype = np.int32))

rows.index = data.index

# get functions

funcs = [str2func(x) for x in funcs]

i = 0

mse = float('inf')

while mse > threshold:

min_mse = float('inf')

for idx in data.loc[rows == 0].index:

rows.loc[idx] = 1

tmp_mse = get_mse(data, rows, cols, funcs, rngs, n_cond)

if tmp_mse < min_mse:

min_mse = tmp_mse

min_mse_idx = idx

else:

pass

rows.loc[idx] = 0

# check if mse cannot be lower any more

if min_mse > mse:

break

else:

mse = min_mse

rows.loc[min_mse_idx] = 1

# print loss

print("%dtimes iteration, mse%.3f" % (i+1, mse))

i += 1

return rows

"""6. Test the search fucntion and show results."""

print("\n" * 3)

print("Test search:")

run_time = time.time()

idxs = search(data = df

, cols = ["age", "salary"]

, funcs = ["average", "sum"]

, rngs = [[35,40], [100000, 120000]])

search_result = df.loc[idxs == 1]

average_age = search_result.age.mean()

total_salary = search_result.salary.sum()

print()

print("Target average age is 35 to 40 and target total salary is 100000 to 120000")

print("Average age is%.2fand total salary is%d" % (average_age, total_salary))

print("Run time is%.2fs" % (time.time() - run_time))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值