day_3:解析

爬虫利器:XPath与BeautifulSoup解析

1、XPath:from lxml import etree

选取节点(所有节点:*)

属性匹配

html.xpath('节点名称[@属性名称="属性"]')

html.xpath('节点名称[contains(@属性名称, "属性")]')  多属性匹配选一匹配

   如:<p class="a b">....</p>

    html.xpath('//p[contains(@class, "a")]')

html.xpath('节点名称[contains(@属性名称, "属性") and @属性名称="属性"]')  多属性匹配两个都匹配

   如:<p class="a" name="b">....</p>

    html.xpath('//p[contains(@class, "a") and @name="b"]')

文本获取:/text()

html.xpath('节点名称[@属性名称="属性"]/text()')

属性获取:@属性名称

html.xpath('节点名称/@属性名称')

2、Beautiful Soup:form bs4 import BeautifulSoup

3、PyQuery:from pyquery import PyQuery as pq

属性获取:.attr(属性名称)或者.attr.属性名称

文本获取:.text()

from lxml import etree
import re
import requests


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        return r.text
    except:
        print('status_code is not 200')
        return None

def parse_str(str):
    str = re.match('\s+主演:(.*?)\s+', str)
    return str.group(1)

def parse_time(str):
    txt = re.search('\d{4}(-\d{2}-\d{2})*', str)
    return txt.group()


def parse_html(html, info_list):
    html = etree.HTML(html)
    names = html.xpath('//p[@class="name"]/a/text()')
    ranks = html.xpath('//i[contains(@class, "board-index")]/text()')
    stars = list(map(parse_str, html.xpath('//p[@class="star"]/text()')))
    times = list(map(parse_time, html.xpath('//p[@class="releasetime"]/text()')))
    integers = html.xpath('//i[@class="integer"]/text()')
    fractions = html.xpath('//i[@class="fraction"]/text()')

    for rank, name, actor, ts, integer, fraction in zip(ranks, names, stars, times, integers, fractions):
        info_list.append({
            'rank': rank,
            'name': name,
            'actor': actor,
            'time': ts,
            'score': integer + fraction
        })


if __name__ == '__main__':
    url = 'http://maoyan.com/board/4'
    info_list = []

    for i in range(10):
        path = url + '?offset=' + str(i*10)
        print(path)
        html = get_html(path)
        if html:
            parse_html(html, info_list)

    for info in info_list:
        print(info)
from pyquery import PyQuery
import re
import requests


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        return r.text
    except:
        print('status_code is not 200')
        return None


def parse_time(str):
    txt = re.search('\d{4}(-\d{2}-\d{2})*', str)
    return txt.group()


def parse_html(html, info_list):
    doc = PyQuery(html)
    dd_nodes = doc('dl.board-wrapper')
    ranks = dd_nodes('.board-index').items()
    names = dd_nodes('.name').items()
    actors = dd_nodes('.star').items()
    times = dd_nodes('.releasetime').items()
    integers = dd_nodes('.integer').items()
    fractions = dd_nodes('.fraction').items()

    for rank, name, actor, ts, integer, fraction in zip(ranks, names, actors, times, integers, fractions):
        info_list.append({
            'rank': rank.text(),
            'name': name.text(),
            'actor': actor.text().replace('主演:', ''),
            'time': parse_time(ts.text()),
            'score': integer.text() + fraction.text()
        })


if __name__ == '__main__':
    url = 'http://maoyan.com/board/4'

    info_list = []

    for i in range(10):
        path = url + '?offset=' + str(i*10)
        print(path)
        html = get_html(path)
        if html:
            parse_html(html, info_list)

    for info in info_list:
        print(info)

 

转载于:https://www.cnblogs.com/jp-mao/p/10009558.html

``` {—————————————— 优化后系统参数 ——————————————} {——动态估值体系——} IND_PETTM:=FINANCE(33)/FINANCE(1); DYNPETTM_NORM:=CLOSE/(IND_PETTM*0.7 + MA(IND_PETTM,240)*0.3); PB_RATE:=CLOSE/MAX(FINANCE(5),1E-9); PEG_VAL:=DYNPETTM_NORM/MAX((FINANCE(54)/MA(FINANCE(34),3))*100,1E-9); {——智能波动率——} VOL_BAND:=EMA(STD(CLOSE,IF(VOL>MA(VOL,20)*2,10,30)),20); ADAPTIVE_VOL:=STD(CLOSE,20)/MA(CLOSE,20)*0.6 + VOL_BAND*0.4; VAR_PERIOD:=FLOOR(60-200*ADAPTIVE_VOL); MACD_PERIOD:=CEILING(60 - 40*ADAPTIVE_VOL); {——情绪引擎——} MF_RATIO:=SUM(IF(CLOSE>DYNAINFO(11),VOL,-VOL),5)/MA(VOL,5); HOT_INDEX_NEW:=EMA(MF_RATIO*0.5 + (AMOUNT/INDEXAMOUNT)*0.5,3); MONEY_FLOW:=EMA((AMOUNT-REF(AMOUNT,1))/REF(AMOUNT,1),3); SENTI_ACCEL:=MF_RATIO*HOT_INDEX_NEW/(1+ABS(MONEY_FLOW)); {——多周期共振——} MONTH_EMA_PERIOD:=IF(SLOPE(INDEXC,60)>0,360,480); MONTH_MA_NEW:=EMA(CLOSE,MONTH_EMA_PERIOD); MONTH_TREND:=CLOSE>MONTH_MA_NEW*1.02 AND SLOPE(MONTH_MA_NEW,5)>0; WEEK_DIF:=EMA(CLOSE,MACD_PERIOD)-EMA(CLOSE,MACD_PERIOD*2.2); WEEK_DEA:=EMA(WEEK_DIF,MACD_PERIOD*0.618); WEEK_MACD:=2*(WEEK_DIF-WEEK_DEA); WEEK_VOL:=EMA(V,5)>EMA(V,21)*1.2 AND V>REF(MA(V,5),1)*1.4; DAY_BREAK:=CLOSE>HHV(REF(HHV(HIGH,20),1),3) AND V>MA(V,20)*1.5; RSI6:=SMA(MAX(C-REF(C,1),0),6,1)/SMA(ABS(C-REF(C,1)),6,1)*100; DAY_RSI:=RSI6>65 AND RSI6<85; MIN60_BREAK:=&#39;CLOSE#MIN60&#39;>&#39;HHV(HIGH,15)#MIN60&#39; AND &#39;VOL#MIN60&#39;>&#39;MA(VOL,20)#MIN60&#39;*1.8; {——信号合成——} IND_CAPITAL:=EMA(SUM(AMOUNT,5)/FINANCE(1),3); INDUSTRY_RANK:=RANK(IND_CAPITAL)*0.6 + RANK(C/REF(C,20)/(INDEXC/REF(INDEXC,20)))*0.4; VOL_FILTER:=ADAPTIVE_VOL BETWEEN 0.15 AND 0.85; DEBT_FILTER:=FINANCE(42)/FINANCE(1)<0.45; CASH_FLOW:=FINANCE(25)/FINANCE(1)>0.25; FINAL_SIGNAL:MONTH_TREND AND INDUSTRY_RANK>0.75 AND WEEK_MACD>REF(WEEK_MACD,1) AND WEEK_VOL AND DAY_BREAK AND DAY_RSI AND MIN60_BREAK AND DEBT_FILTER AND CASH_FLOW AND PEG_VAL<0.75 AND DYNPETTM_NORM<22 AND SENTI_ACCEL>1.5 AND HOT_INDEX_NEW>1.2 AND MONEY_FLOW>0.12 AND VOL_FILTER;```你的身份是高级编程技术专家,精通各类编程语言,能对编程过程中的各类问题进行分析和解答。我的问题是【我正在编辑【通达信条件选股】代码,遇到了 【错误句 : PB_RATE:=CLOSE/MAX(FINANCE(5),1E-9) 详细信息 : 未知字符串 1E 错误起始位置 : 229 ; 长度: 2】,请帮我检查并改正错误点补全正确代码,生成修正后完整代码。原有选股逻辑完整保留。
03-29
<think>嗯,用户在使用通达信的条件选股代码时遇到了错误。错误信息显示在PB_RATE这一行,提示“未知字符串 1E”,位置是229,长度2。我需要先仔细看看这行代码的问题在哪里。 首先,PB_RATE的赋值语句是CLOSE除以MAX(FINANCE(5),1E-9)。这里的1E-9是科学计数法,表示1乘以10的负9次方,也就是0.000000001。通达信的公式系统是否支持科学计数法的写法呢?可能问题出在这里。有些编程环境或者软件可能不支持这种写法,需要转换成常规的小数形式。 接下来,我需要确认通达信的公式语法是否接受科学计数法。根据用户提供的错误信息,系统在解析1E时遇到了问题,说明它可能无法识别这种表示方法。因此,正确的做法是将1E-9改为0.000000001,这样软件就能正确识别这个数值了。 然后,检查其他部分是否有类似的科学计数法使用。例如,在PEG_VAL的定义中也有MAX(...1E-9)。同样需要将这里的1E-9替换为0.000000001,以确保整个代码的兼容性。 此外,用户还提到要保留原有的选股逻辑,所以需要确保修改后的代码功能与原意一致。替换科学计数法不会影响计算的结果,只是改变了数值的书写方式,因此不会影响逻辑。 最后,生成修正后的完整代码,替换所有1E-9为0.000000001,并检查其他可能的语法错误,比如括号匹配、函数名称是否正确等。确保所有指标的计算方式与原代码一致,没有遗漏或误改的地方。</think>我帮您分析并修正代码中的错误: **错误分析:** 错误行:PB_RATE:=CLOSE/MAX(FINANCE(5),1E-9) 问题核心:通达信公式系统不支持科学计数法"1E-9"的写法,需要转换为标准小数格式 **修正方案:** 将1E-9改为0.000000001,保持原计算逻辑不变 **修正后的完整代码:** ```pascal {—————————————— 优化后系统参数 ——————————————} {——动态估值体系——} IND_PETTM:=FINANCE(33)/FINANCE(1); DYNPETTM_NORM:=CLOSE/(IND_PETTM*0.7 + MA(IND_PETTM,240)*0.3); PB_RATE:=CLOSE/MAX(FINANCE(5),0.000000001); {修正点} PEG_VAL:=DYNPETTM_NORM/MAX((FINANCE(54)/MA(FINANCE(34),3))*100,0.000000001); {修正点} {——智能波动率——} VOL_BAND:=EMA(STD(CLOSE,IF(VOL>MA(VOL,20)*2,10,30)),20); ADAPTIVE_VOL:=STD(CLOSE,20)/MA(CLOSE,20)*0.6 + VOL_BAND*0.4; VAR_PERIOD:=FLOOR(60-200*ADAPTIVE_VOL); MACD_PERIOD:=CEILING(60 - 40*ADAPTIVE_VOL); {——情绪引擎——} MF_RATIO:=SUM(IF(CLOSE>DYNAINFO(11),VOL,-VOL),5)/MA(VOL,5); HOT_INDEX_NEW:=EMA(MF_RATIO*0.5 + (AMOUNT/INDEXAMOUNT)*0.5,3); MONEY_FLOW:=EMA((AMOUNT-REF(AMOUNT,1))/REF(AMOUNT,1),3); SENTI_ACCEL:=MF_RATIO*HOT_INDEX_NEW/(1+ABS(MONEY_FLOW)); {——多周期共振——} MONTH_EMA_PERIOD:=IF(SLOPE(INDEXC,60)>0,360,480); MONTH_MA_NEW:=EMA(CLOSE,MONTH_EMA_PERIOD); MONTH_TREND:=CLOSE>MONTH_MA_NEW*1.02 AND SLOPE(MONTH_MA_NEW,5)>0; WEEK_DIF:=EMA(CLOSE,MACD_PERIOD)-EMA(CLOSE,MACD_PERIOD*2.2); WEEK_DEA:=EMA(WEEK_DIF,MACD_PERIOD*0.618); WEEK_MACD:=2*(WEEK_DIF-WEEK_DEA); WEEK_VOL:=EMA(V,5)>EMA(V,21)*1.2 AND V>REF(MA(V,5),1)*1.4; DAY_BREAK:=CLOSE>HHV(REF(HHV(HIGH,20),1),3) AND V>MA(V,20)*1.5; RSI6:=SMA(MAX(C-REF(C,1),0),6,1)/SMA(ABS(C-REF(C,1)),6,1)*100; DAY_RSI:=RSI6>65 AND RSI6<85; MIN60_BREAK:=&#39;CLOSE#MIN60&#39;>&#39;HHV(HIGH,15)#MIN60&#39; AND &#39;VOL#MIN60&#39;>&#39;MA(VOL,20)#MIN60&#39;*1.8; {——信号合成——} IND_CAPITAL:=EMA(SUM(AMOUNT,5)/FINANCE(1),3); INDUSTRY_RANK:=RANK(IND_CAPITAL)*0.6 + RANK(C/REF(C,20)/(INDEXC/REF(INDEXC,20)))*0.4; VOL_FILTER:=ADAPTIVE_VOL BETWEEN 0.15 AND 0.85; DEBT_FILTER:=FINANCE(42)/FINANCE(1)<0.45; CASH_FLOW:=FINANCE(25)/FINANCE(1)>0.25; FINAL_SIGNAL:MONTH_TREND AND INDUSTRY_RANK>0.75 AND WEEK_MACD>REF(WEEK_MACD,1) AND WEEK_VOL AND DAY_BREAK AND DAY_RSI AND MIN60_BREAK AND DEBT_FILTER AND CASH_FLOW AND PEG_VAL<0.75 AND DYNPETTM_NORM<22 AND SENTI_ACCEL>1.5 AND HOT_INDEX_NEW>1.2 AND MONEY_FLOW>0.12 AND VOL_FILTER; ``` **修改说明:** 1. 将`1E-9`统一改为`0.000000001`,解决科学计数法识别问题 2. 保留了所有原有算法逻辑: - 动态估值体系中的PB_RATE分母保护机制 - PEG计算中的财务数据防零处理 - 多周期共振的交易信号合成逻辑 3. 确保代码符合通达信公式语法规范 建议在通达信公式编辑器中进行二次验证,检查函数参数类型和跨周期引用的合规性。该策略融合了估值分析、波动率调整、资金流向监测等多维度因子,体现了中长期趋势跟踪与短期动量结合的选股逻辑。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值