- 博客(11)
- 收藏
- 关注
原创 爬虫抓取加入网页中
# -*- encoding:utf-8 -*-import urllib.requestfrom bs4 import BeautifulSoupimport osimport lxml #文档解析器 #os模块就是对操作系统进行操作import numpy as npurls=[]titles=[]import webbrowserlist=['Aid', 'Aim', 'Air', 'Aisle', 'A
2022-01-11 10:35:35
10985
1
原创 signmod来实现分类
def plot_decision_boundary(pred_func, X, y): #输入:边界函数, 数据集, 类别标签 #描述:绘制决策边界(画图用) # 设置最小最大值, 加上一点外边界 x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = 0.01 # 根据最小最
2022-01-11 09:47:17
278
原创 爬虫抓取单词网页
# -*- encoding:utf-8 -*-import urllib.requestfrom bs4 import BeautifulSoupimport osimport lxml #文档解析器 #os模块就是对操作系统进行操作import numpy as npimport pymysqlfor line in open("D:/json/words.txt"): url = r'https:/
2022-01-11 09:43:02
428
原创 数据导论实验一
from lenskit.datasets import ML100Kfrom lenskit import batch, topn, utilfrom lenskit import crossfold as xffrom lenskit.algorithms import Recommender, als, item_knn as knnfrom lenskit import topnimport pandas as pdimport matplotlib.pyplot as pltimpo
2022-01-11 09:36:42
539
原创 手写数字的降维
import numpy as npimport matplotlib.pyplot as pltfrom sklearn import manifold, datasetsdigits = datasets.load_digits(n_class=6)X, y = digits.data, digits.targetn_samples, n_features = X.shape'''显示原始数据'''n = 20 # 每行20个数字,每列20个数字img = np.zeros((10
2022-01-10 21:18:21
1239
原创 t-SNE对手写数字的降维可视化
#t-SNE对手写数字的降维可视化from time import timeimport numpy as npimport matplotlib.pyplot as pltfrom sklearn import datasetsfrom sklearn.manifold import TSNEdigits = datasets.load_digits(n_class=6)# print(digits)data = digits.datalabel = digits.target#
2022-01-10 21:16:28
414
原创 hbase连接数据库
#coding=utf-8from thrift.transport import TSocketfrom hbase import Hbasefrom hbase.ttypes import *import pymysql# 打开hbase数据库连接transport = TSocket.TSocket('ip', 9090)protocol = TBinaryProtocol.TBinaryProtocol(transport)client = Hbase.Client(protoc
2022-01-10 21:14:12
1548
原创 单个页面爬虫
from urllib import requestfrom bs4 import BeautifulSoupimport sslssl._create_default_https_context=ssl._create_unverified_context#一、网络请求页面base_url = "http://www.shanbay.com/wordlist/110521/232414/?page=1"response = request.urlopen(base_url)html =
2022-01-10 21:09:46
303
原创 k-means聚类算法
k-means聚类算法# K-means Algorithm is a clustering algorithmimport numpy as npimport matplotlib.pyplot as pltimport random def get_distance(p1, p2): diff = [x-y for x, y in zip(p1, p2)] distance = np.sqrt(sum(map(lambda x: x**2, diff))) retur
2022-01-10 21:05:59
98
原创 k-means聚类算法
k-means的相关算法k-means的相关算法随机生成2000个点,判断算法性能。# K-means Algorithm is a clustering algorithmimport numpy as npimport matplotlib.pyplot as pltimport random def get_distance(p1, p2): diff = [x-y for x, y in zip(p1, p2)] distance = np.sqrt(sum(map(
2022-01-10 21:02:54
521
原创 爬虫(爬取单词)
爬取5500个考研单词# -*- encoding:utf-8 -*-import urllib.requestfrom bs4 import BeautifulSoupimport osimport lxml #文档解析器 #os模块就是对操作系统进行操作import numpy as npurls=[]titles=[]#url = r'http://sousuo.gov.cn/s.htm?t=paper&a
2022-01-10 20:36:08
280
空空如也
空空如也
TA创建的收藏夹 TA关注的收藏夹
TA关注的人