get the max num in bash language

#!/bin/bash
#get the max num
echo;
echo "------test get the max num of two nums----- ";
EQUAL=0;
E_PARAM_ERR=-9999;
max2()
{
if [ -z "$2" ]
then
echo $E_PARAM_ERR;
return;
fi
if [ $1 -eq $2 ]
then
echo $EQUAL;
return;
else
if [ $1 -gt $2 ]
then
ret=$1;
else
ret=$2;
fi
fi
#can't use the return sentence,in here .
#because the method can return not big 255
echo $ret;

}
num=$(max2 200 500);
echo $num;
echo;
#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # This file is sourced when running various Spark programs. # Copy it as spark-env.sh and edit that to configure Spark for your site. # Options read when launching programs locally with # ./bin/run-example or ./bin/spark-submit # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program # Options read by executors and drivers running inside the cluster # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos # Options read in any mode # - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf) # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) # Options read in any cluster manager using HDFS # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files # Options read in YARN client/cluster mode # - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN # Options for the daemons used in the standalone deploy mode # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") # - SPARK_WORKER_CORES, to set the number of cores to use on this machine # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker # - SPARK_WORKER_DIR, to set the working directory of worker processes # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") # - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers # Options for launcher # - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y") # Generic options for the daemons used in the standalone deploy mode # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) # - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5. # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) # - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. # Options for native BLAS, like Intel MKL, OpenBLAS, and so on. # You might get better performance to enable these options if using native BLAS (see SPARK-21305). # - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL # - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS # Options for beeline # - SPARK_BEELINE_OPTS, to set config properties only for the beeline cli (e.g. "-Dx=y") # - SPARK_BEELINE_MEMORY, Memory for beeline (e.g. 1000M, 2G) (Default: 1G) #设置JAVA安装目录 JAVA_HOME=/export/server/jdk1.8.0_401 # HADOOP软件配置文件目录,读取HDFS上文件和运行YARN集群 HAD00P_CONF_DIR=/export/server/hadoop-3.3.6/etc/hadoop YARN_CONF_DIR=/export/server/hadoop-3.3.6/etc/hadoop #指定spark老大Master的IP和提交任务的通信端口 #告知Spark的master运行在哪个机器上 #export SPARK_MASTER_HOST=node1 #告知sparkmaster的通讯端囗 export SPARK_MASTER_PORT=7077 #告知spark master的 webui端端口 SPARK_MASTER_WEBUI_PORT=8080 # worker cpu可用核数 SPARK_WORKER_CORES=1 #worker可用内存 SPARK_WORKER_MEMORY=1g # worker的工作通讯地址 SPARK_WORKER_PORT=7078 # worker的 webui地址 SPARK_WORKER_WEBUI_PORT=8081 #设置历史服务器 #配置的意思是将spark程序运行的历史日志存到hdfs的/sparklog文件夹中 SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://node1:8020/sparklog/ -Dspark.history.fs.cleaner.enabled=true" SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark-ha" # spark.deploy.recoveryMode 指定HA模式基于zookeeper实现 #指定Zookeeper的连接地址#指定在zookeeper中注册临时节点的路径 在这个spark-env中如何把8080网址中的spark链接链接编程主机名node1:7077,node2:7077,node3:7077
08-02
import requests import time import random import pandas as pd from datetime import datetime from bs4 import BeautifulSoup # ================ 核心配置 ================ KEYWORD = "石楠花" # 搜索关键词 MAX_PAGES = 1 # 爬取页数(每页20条)建议从1开始测试 OUTPUT_FILE = f"B站_{KEYWORD}_视频数据_{datetime.now().strftime('%Y%m%d')}.xlsx" # 反爬配置 USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (Linux; Android 12; SM-S908E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Mobile Safari/537.36" ] # ================ 核心函数 ================ def get_random_headers(): """生成随机请求头""" return { "User-Agent": random.choice(USER_AGENTS), "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Referer": "https://search.bilibili.com/" } def convert_bilibili_number(num_str): """处理B站数字格式(万/亿转换)""" if isinstance(num_str, int): return num_str num_str = str(num_str).replace(',', '') if '万' in num_str: # 移除"万"字并乘以10000 return int(float(num_str.replace('万', '')) * 10000 if '亿' in num_str: # 移除"亿"字并乘以100000000 return int(float(num_str.replace('亿', '')) * 100000000 try: # 尝试转换为整数 return int(num_str) except: # 转换失败返回0 return 0 def get_bilibili_via_api(keyword, max_pages=3): """通过官方API获取数据(推荐方式)""" results = [] for page in range(1, max_pages + 1): try: url = "https://api.bilibili.com/x/web-interface/search/type" params = { "search_type": "video", "keyword": keyword, "page": page, "page_size": 20 } response = requests.get( url, params=params, headers=get_random_headers(), timeout=10 ) data = response.json() if data.get('code') != 0: print(f"API返回错误: {data.get('message')}") continue for video in data['data']['result']: # 确保所有字段都存在 video_data = { "平台": "B站", "视频ID": video.get('bvid', ''), "标题": video.get('title', ''), "播放量": convert_bilibili_number(video.get('play', 0)), "点赞量": convert_bilibili_number(video.get('like', 0)), "评论数": convert_bilibili_number(video.get('comment', 0)), "弹幕数": convert_bilibili_number(video.get('danmaku', 0)), "作者": video.get('author', ''), "时长": video.get('duration', ''), "发布时间": datetime.fromtimestamp(video.get('pubdate', 0)).strftime('%Y-%m-%d') if video.get('pubdate') else '', "采集时间": datetime.now().strftime('%Y-%m-%d %H:%M:%S') } results.append(video_data) print(f"已获取第{page}页数据,共{len(data['data']['result'])}条") # 控制请求频率 time.sleep(random.uniform(1.5, 3.0)) except Exception as e: print(f"第{page}页获取失败: {str(e)}") time.sleep(5) return results def save_to_excel(data, filename): """保存数据到Excel""" if not data: print("没有数据可保存") return df = pd.DataFrame(data) # 保存为Excel df.to_excel(filename, index=False) print(f"数据已保存至: {filename}") print(f"共{len(df)}条记录") # ================ 主执行程序 ================ if __name__ == "__main__": print(f"【B站视频数据采集开始】关键词: {KEYWORD}") start_time = time.time() try: # 使用API方式获取数据 video_data = get_bilibili_via_api(KEYWORD, MAX_PAGES) if video_data: save_to_excel(video_data, OUTPUT_FILE) else: print("未获取到有效数据") except Exception as e: print(f"程序异常终止: {str(e)}") print(f"任务完成! 耗时: {time.time() - start_time:.2f}秒") # 程序结束后暂停,方便查看结果 input("按Enter键退出...")
07-18
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值