workflow

<!DOCTYPE workflow-resource PUBLIC "Spider-Core workflow dtd"
        "http://rc.91yong.com/spider.dtd"
        >
<workflow-resource prefix="ganji" >
    <workflow id="_validateLogin">

        <exe type="SessionVar" name="getInputPara">
            <output>
                <var id="inputPara"/>
            </output>
        </exe>


        <exe id="inputPara" name="getUsername">
            <output>
                <var id="username"/>
            </output>
        </exe>

<!--        <exe type="ZhangzhouUtil" name="keyEcoder">
            <param><var id="username"/></param>
            <output>
                <var id="username"/>
            </output>
        </exe>-->


        <var id="sb" type="StringBuilder">
             <field name="append">http://www.ganji.com/user/login_success.php?username=</field>
             <field name="append"><var id="username"/></field>
        </var>

        <exe type="logger" name="info">
            <param>sb========</param>
            <param><var id="sb"/></param>
        </exe>

        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url"><var id="sb"/></field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>


        <exe id="httpClientParam" name="getHtml">
            <output><var id="html"/></output>
        </exe>

        <exe type="RegularNodeParserUtil" name="buildParser">
            <param><var id="html"/></param>

            <param>validateLogin</param>
            <output >
                <var id="parser"></var>
            </output>
        </exe>

        <exe type="ParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

    </workflow>

    <workflow id="_login">


        <exe type="SessionVar" name="setValue">
            <param>doNot</param>
            <output>
                <var id="_retry" global="true"/>
            </output>
        </exe>

        <exe type="FlowControlUtil" name="if_exception">
            <param>ganji:checkCode</param>

            <param>ganji:checkCode_submit</param>

            <param></param>
        </exe>

        <exe type="ExceptionUtil" name="throwRetryIfEmpty">

            <param><var id="_retry" global="true"/> </param>
        </exe>

        <exe type="SessionVar" name="getInputPara">
            <output>
                <var id="inputPara"/>
            </output>
        </exe>


        <exe id="inputPara" name="getUsername">
            <output>
                <var id="username"/>
            </output>
        </exe>
        <exe id="inputPara" name="getPassword">
            <output>
                <var id="password"/>
            </output>
        </exe>

        <var id="loginForm" type="FormUtil">

            <field name="name">checkCode</field>
            <field name="value">undefined</field>

            <field name="name">username</field>
            <field name="value">
                <var id="username"/>
            </field>

            <field name="name">password</field>
            <field name="value">
                <var id="password"/>
            </field>

            <field name="name">expireDays</field>
            <field name="value">90</field>

            <field name="name">next</field>
            <field name="value">http://www.ganji.com/user/login_success.php?username=</field>

            <field name="name">setcookies</field>
            <field name="value">90</field>

            <field name="name">source</field>
            <field name="value">passport</field>

        </var>

        <exe id="loginForm" name="getPostData">
            <output>
                <var id="postData"></var>
            </output>
        </exe>

        <var id="_loginParam" global="true" type="HttpClientUtilParam">
            <field name="url">http://www.ganji.com/user/login.php</field>
            <field name="method">POST</field>
            <field name="data"><var id="postData"/></field>
        </var>

        <exe id="_loginParam" global="true" name="getHtml">
            <output>
                <var id="_html" global="true"></var>
            </output>
        </exe>

    </workflow>

    <workflow id="_searchResume">

        <var id="sb" type="StringBuilder">
            <field name="append"><![CDATA[http://CITY.ganji.com/qiuzhi/s/_KEYWORD/]]></field>
        </var>

        <exe type="SessionVar" name="getInputPara">
            <output>
                <var id="inputPara"/>
            </output>
        </exe>


        <exe type="SessionVar" name="getParam">
            <param>currentPlace</param>
            <output><var id="currentPlace"/></output>
        </exe>

        <exe type="logger" name="info">
            <param>currentPlace===</param>
            <param><var id="currentPlace"/></param>
        </exe>

        <exe type="SessionVar" name="setValue">
            <param><var id="currentPlace"/></param>
            <output>
                <var id="_city" global="true"/>
            </output>
        </exe>


        <exe type="GanjiUtils" name="currentPlacereplace">
            <param><var id="currentPlace"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>


        <exe type="SessionVar" name="getParam">
            <param>keyword</param>
            <output><var id="keyword"/></output>
        </exe>

 


        <exe type="GanjiUtils" name="encoderKeyword">
            <param><var id="keyword"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>

        <exe type="logger" name="info">
            <param>sb===</param>
            <param><var id="sb"/></param>
        </exe>


<!--        <exe type="logger" name="info">
            <param>sb1==========</param>
            <param><var id="sb"/></param>

        </exe>-->


        <exe type="SessionVar" name="getParam">
            <param>pageIndex</param>
            <output>
                <var id="pageIndex"></var>
            </output>
        </exe>


        <exe type="logger" name="info">
            <param>pageIndex===</param>
            <param><var id="pageIndex"/></param>
        </exe>


        <exe type="GanjiUtils" name="pageIndexChange">
            <param><var id="pageIndex"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>


<!--        <exe type="logger" name="info">
            <param>sb2==========</param>
            <param><var id="sb"/></param>

        </exe>-->


        <exe type="SessionVar" name="getParam">
            <param>gender</param>
            <output>
                <var id="gender"/>
            </output>
        </exe>

        <exe type="logger" name="info">
            <param>gender----</param>
            <param><var id="gender"/></param>
        </exe>

        <exe type="GanjiUtils" name="genderChange">
            <param><var id="gender"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>


        <exe type="SessionVar" name="getParam">
            <param>lastUpdate</param>
            <output>
                <var id="lastUpdate"/>
            </output>
        </exe>

        <exe type="GanjiUtils" name="lastUpdateChange">
            <param><var id="lastUpdate"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>

        <exe type="SessionVar" name="getParam">
            <param>degree</param>
            <output>
                <var id="degree"/>
            </output>
        </exe>

        <exe type="GanjiUtils" name="degreeChange">
            <param><var id="degree"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>

        <exe type="SessionVar" name="getParam">
            <param>age</param>
            <output>
                <var id="age"/>
            </output>
        </exe>

        <exe type="GanjiUtils" name="ageChange">
            <param><var id="age"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>


        <exe type="SessionVar" name="getParam">
            <param>serviceYear</param>
            <output>
                <var id="serviceYear"/>
            </output>
        </exe>

        <exe type="GanjiUtils" name="serviceYearChange">
            <param><var id="serviceYear"/></param>
            <param><var id="sb"/></param>
            <output>
                <var id="sb"/>
            </output>
        </exe>

 

<!--        <exe type="logger" name="info">
            <param>serviceYearsb==========</param>
            <param><var id="sb"/></param>

        </exe>-->


<!--
        <var id="sb" type="StringBuilder">
            <field name="append"><![CDATA[&work_years_higher_check=en]]></field>
        </var>
-->

 

        <exe type="logger" name="info">
            <param>sb123==========</param>
            <param><var id="sb"/></param>

        </exe>

        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url"><var id="sb"/></field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>


        <exe id="httpClientParam" name="getHtml">
            <output><var id="_html" /></output>
        </exe>

        <exe type="logger" name="info">
            <param>html123==========</param>
            <param><var id="_html" global="true"/></param>

        </exe>


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="_html" />
            </param>
            <param>searchResumePage</param>
            <output>
                <var id="pagePaser"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param>

            </param>
            <param>
                <var id="_html" />
            </param>
        </exe>-->

 

        <exe type="RegularNodeParserUtil" name="getMultiValue">
            <param>
                <var id="pagePaser"/>
            </param>
            <param>resumes</param>
            <output>
                <var id="_resumeList" global="true"/>
            </output>
        </exe>

        <exe type="SessionVar" name="getResult">
            <output>
                <var id="result"/>
            </output>
        </exe>
        <exe type="EntityBuilder" name="build">
            <param>
                <var id="result"></var>
            </param>
            <param>
                <var id="pagePaser"/>
            </param>
        </exe>


    </workflow>


    <workflow id="_resumeInSearchResumeList">


        <var type="ResumeSpiderResume" id="resume"></var>

        <exe type="EntityBuilder" name="build">
            <param>
                <var id="resume"></var>
            </param>
            <param>
                <var id="_resume" global="true"/>
            </param>
        </exe>

        <exe id="resume" name="getDateOfBirth">
            <output>
                <var id="getDateOfBirth"/>
            </output>
        </exe>

 


<!--处理年龄30转成19xx-MM-dd形式,在放入resumeEntity-->
        <exe id="resume" name="getDateOfBirth">
            <output>
                <var id="getDateOfBirth"/>
            </output>
        </exe>

        <exe type="DateUtil" name="yearBack">
            <param><var id="getDateOfBirth"/></param>
            <param>yyyy-MM-dd</param>
            <output>
                <var id="dateOfBirth"/>
            </output>
        </exe>

        <exe id="resume" name="setDateOfBirth">
            <param>
                <var id="dateOfBirth"/>
            </param>
        </exe>


        <exe id="resume" name="getOutterId">
            <output>
                <var id="getOutterId"/>
            </output>
        </exe>

        <var id="outterId" type="StringBuilder">
            <field name="append">http://</field>
            <field name="append"><var id="_city" global="true"/></field>
            <field name="append">.ganji.com/</field>
        </var>

        <var id="outterId" type="StringBuilder">
            <field name="append"><var id="getOutterId"/></field>
        </var>


        <exe id="resume" name="setOutterId">
            <param>
                <var id="outterId"/>
            </param>
        </exe>


        <!--处理搜索简历列表解析出来的更新时间的格式-->
        <exe id="resume" name="getUpdateTime">
            <output>
                <var id="updateTime"/>
            </output>
        </exe>

        <exe type="DateUtil" name="getYearChange">

            <param>
                <var id="updateTime"/>
            </param>
            <output>
                <var id="updateTime"/>
            </output>
        </exe>


        <exe id="resume" name="setUpdateTime">
            <param>
                <var id="updateTime"/>
            </param>
        </exe>

 


        <exe type="AddResumeToResult" name="add">
            <param>
                <var id="resume"></var>
            </param>
            <param>search</param>
        </exe>

    </workflow>


    <workflow id="_getResumePoint">

 

        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url">http://www.ganji.com/vip/wanted/resume_points.php?source=uc</field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>

        <exe id="httpClientParam" name="getHtml">
            <output><var id="resumeStatHtml" global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param>resumeStatHtml===</param>
            <param><var id="resumeStatHtml" global="true"/></param>
        </exe>-->

        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="resumeStatHtml" global="true"/>
            </param>
            <param>getResumePoint</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>reumePoint</param>
            <output>
                <var id="_resumePoint" global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param >_resumePoint==</param>
            <param><var id="_resumePoint" global="true"/></param>
        </exe>-->


    </workflow>


    <workflow id="_searchResumeBuy">

        <exe type="SessionVar" name="getInputPara">
            <output>
                <var id="inputPara"/>
            </output>
        </exe>
        <exe id="inputPara" name="getOutterId">
            <output>
                <var id="outterId"></var>
            </output>
        </exe>

        <exe id="inputPara" name="getOutterId">
            <output>
                <var id="outterIdchange" global="true"></var>
            </output>
        </exe>


        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url"><var id="outterId"/></field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>

        <exe id="httpClientParam" name="getHtml">
            <output><var id="resumeSearchHtml1" global="true"/>
            </output>
        </exe>


<!--        <exe type="logger" name="info">

            <param>resumeSearchHtml1=====</param>
            <param><var id="resumeSearchHtml1" global="true"/></param>
        </exe>-->

        <exe type="SessionVar" name="setValue">
            <param>doNot</param>
            <output>
                <var id="_retry" global="true"/>
            </output>
        </exe>

        <exe type="FlowControlUtil" name="if_exception">
            <param>ganji:checkCode</param>

            <param>ganji:checkCode_submit</param>

            <param></param>
        </exe>

        <exe type="ExceptionUtil" name="throwRetryIfEmpty">

            <param><var id="_retry" global="true"/> </param>
        </exe>


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="resumeSearchHtml1" global="true"/>
            </param>
            <param>validate1</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>group1</param>
            <output>
                <var id="group1" global="true"/>
            </output>
        </exe>

        <exe type="logger" name="info">
            <param >group1==</param>
            <param><var id="group1" global="true"/></param>
        </exe>

        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url"><var id="group1" global="true"/></field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>

        <exe id="httpClientParam" name="getHtml">
            <output><var id="resumeSearchHtml2" global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">

            <param>resumeSearchHtml2=====</param>
            <param><var id="resumeSearchHtml2" global="true"/></param>
        </exe>-->

        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="resumeSearchHtml2" global="true"/>
            </param>
            <param>checkFreeCount</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>freecheck</param>
            <output>
                <var id="freecheck"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param>freecheck====</param>
            <param><var id="freecheck"/></param>
        </exe>-->

        <exe type="ExceptionUtil" name="throwExceptionIfEmpty">
            <param><var id="freecheck" /></param>
            <param>下载失败</param>
            <param>validateFalse</param>
        </exe>


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="group1" global="true"/>
            </param>
            <param>getCityCode</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>getCityCode</param>
            <output>
                <var id="getCityCode" />
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param >getCityCode==</param>
            <param><var id="getCityCode"/></param>
        </exe>-->

 


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="group1" global="true"/>
            </param>
            <param>getPostId</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>getPostId</param>
            <output>
                <var id="getPostId"  />
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param >getPostId==</param>
            <param><var id="getPostId"/></param>
        </exe>-->


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="group1" global="true"/>
            </param>
            <param>getJob_postion</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>getJob_postion</param>
            <output>
                <var id="getJob_postion"  />
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param >getJob_postion==</param>
            <param><var id="getJob_postion"/></param>
        </exe>-->


        <var id="buyForm" type="FormUtil">

            <field name="name"><![CDATA[_rdm]]></field>
            <field name="value">guid_22_13456178203020_8098342504343473</field>

            <field name="name">callback</field>
            <field name="value">
                <var id="show_contact"/>
            </field>

            <field name="name">cityCode</field>
            <field name="value">
                <var id="getCityCode"/>
            </field>

            <field name="name">postId</field>
            <field name="value"><var id="getPostId"/></field>

            <field name="name">job_postion</field>
            <field name="value"><var id="getJob_postion"/></field>

            <field name="name">resume_type</field>
            <field name="value">0</field>

        </var>

        <exe id="buyForm" name="getPostData">
            <output>
                <var id="postData"></var>
            </output>
        </exe>

        <var id="_buyParam" global="true" type="HttpClientUtilParam">
            <field name="url"><var id="group1" global="true"/></field>
            <field name="method">POST</field>
            <field name="data"><var id="postData"/></field>
        </var>

        <exe id="_buyParam" global="true" name="getHtml">
            <output>
                <var id="_html" global="true"></var>
            </output>
        </exe>

 

<!--        <exe type="logger" name="info">
            <param>_html===</param>
            <param><var id="_html" global="true"/></param>
        </exe>-->

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="_html" global="true"/>
            </param>
            <param><![CDATA[src="/tel_img/]]></param>
            <param>
                <![CDATA[src="http://www.ganji.com/tel_img/]]>
            </param>
            <output>
                <var id="resumeBuyHtml" global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param>_html123===</param>
            <param><var id="resumeBuyHtml" global="true"/></param>
        </exe>-->


        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="resumeBuyHtml" global="true"/>
            </param>
            <param>getPhone</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="RegularNodeParserUtil" name="parse">
            <param>
                <var id="parser"/>
            </param>
        </exe>

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>phoneAndEmail</param>
            <output>
                <var id="phoneAndEmail"  global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">

            <param>phoneAndEmail==</param>
             <param><var id="phoneAndEmail" global="true"/></param>
        </exe>-->

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="phoneAndEmail" global="true"/>
            </param>
            <param><![CDATA[phone]]></param>
            <param>
                <![CDATA[电话]]>
            </param>
            <output>
                <var id="phoneAndEmail" global="true"/>
            </output>
        </exe>

 

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="phoneAndEmail" global="true"/>
            </param>
            <param><![CDATA[email]]></param>
            <param>
                <![CDATA[电子邮箱]]>
            </param>
            <output>
                <var id="phoneAndEmail" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="phoneAndEmail" global="true"/>
            </param>
            <param><![CDATA[,]]></param>
            <param>
                <![CDATA[</br>]]>
            </param>
            <output>
                <var id="phoneAndEmail" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="phoneAndEmail" global="true"/>
            </param>
            <param><![CDATA[']]></param>
            <param>

            </param>
            <output>
                <var id="phoneAndEmail" global="true"/>
            </output>
        </exe>

 

 

        <exe type="WorkflowExecutor" name="executeWorkflowByName">

            <param>ganji:_viewSearchResume</param>
        </exe>


        <exe type="WorkflowExecutor" name="executeWorkflowByName">
            <param>ganji:byteResourceDownload</param>

        </exe>

        <exe type="SessionVar" name="getResult">
            <output>
                <var id="resumeResult"/>
            </output>
        </exe>
        <exe id="resumeResult" name="getResume">
            <output>
                <var id="resumeEntity"/>
            </output>
        </exe>

 

        <var id="resumeEntity">
            <field name="mobile">#IMG0#</field>
        </var>

    </workflow>

    <workflow id="checkCode">
        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="_html" global="true"/>
            </param>
            <param>checkCodeValidate</param>
            <output>
                <var id="loginParser"/>
            </output>
        </exe>
        <exe type="ParserUtil" name="parse">
            <param>
                <var id="loginParser"/>
            </param>
        </exe>
    </workflow>


    <workflow id="_viewSearchResume">

        <exe type="SessionVar" name="getInputPara">
            <output>
                <var id="inputPara"/>
            </output>
        </exe>
        <exe id="inputPara" name="getOutterId">
            <output>
                <var id="outterId"></var>
            </output>
        </exe>

        <var id="sb" type="StringBuilder">
            <field name="append"><var id="outterId"/></field>
        </var>

<!--        <exe type="logger" name="info">
            <param>outterId====</param>
            <param><var id="outterId"/></param>
        </exe>-->

        <var id="httpClientParam" type="HttpClientUtilParam">
            <field name="url"><var id="sb"/></field>
            <field name="method">GET</field>
            <field name="encoder">utf-8</field>
        </var>

        <exe id="httpClientParam" name="getHtml">
            <output><var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

<!--        <exe type="logger" name="info">
            <param>resumeNoBuyHtml13====</param>
            <param><var id="resumeNoBuyHtml" global="true"/></param>
        </exe>-->

 

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<em>查看联系方式</em>]]></param>
            <param>
                <var id="phoneAndEmail" global="true"/>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

 

        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param>detailResume</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <exe type="SessionVar" name="getResult">
            <output>
                <var id="result"/>
            </output>
        </exe>

        <var type="ResumeSpiderResume" id="resumeEntity"/>
        <exe type="EntityBuilder" name="build">
            <param>
                <var id="resumeEntity"/>
            </param>
            <param>
                <var id="parser"/>
            </param>
        </exe>


        <!--将outterid设入resumeEntity-->
        <var id="resumeEntity">
            <field name="outterId"><var id="outterId"/></field>
        </var>

 

        <exe type="RegularNodeParserUtil" name="getSingleValue">
            <param>
                <var id="parser"/>
            </param>
            <param>dateOfBirth</param>
            <output>
                <var id="dateOfBirth"  />
            </output>
        </exe>

        <exe type="DateUtil" name="yearBack">
            <param><var id="dateOfBirth"/></param>
            <param>yyyy-MM-dd hh:mm:ss</param>
            <output>
                <var id="dateOfBirth"/>
            </output>
        </exe>

        <var id="resumeEntity">
            <field name="dateOfBirth"><var id="dateOfBirth"/></field>
        </var>


         <!--详细简历查看的更新时间格式处理-->
        <exe id="resumeEntity" name="getUpdateTime">
            <output>
                <var id="updateTime"/>
            </output>
        </exe>

        <exe type="DateUtil" name="getYearCompletion">

            <param>
                <var id="updateTime"/>
            </param>
            <output>
                <var id="updateTime"/>
            </output>
        </exe>

        <var id="resumeEntity">
            <field name="updateTime">
                <var id="updateTime"/>
            </field>
        </var>

 


        <exe type="WorkflowExecutor" name="executeWorkflowByName">
            <param>ganji:resumeHtmlDeal</param>
        </exe>

        <var id="resumeEntity" type="ResumeSpiderResume">
            <field name="html">
                <var id="resumeNoBuyHtml" global="true"/>
            </field>
        </var>

<!--        <exe type="logger" name="info">
            <param>resumeNoBuyHtml2====</param>
            <param><var id="resumeNoBuyHtml" global="true"/></param>
        </exe>-->

<!--
        <exe type="WorkflowExecutor" name="executeWorkflowByName">
            <param>ganji:byteResourceDownload</param>
        </exe>

        <var id="resumeEntity">
            <field name="mobile">#IMG0#</field>
        </var>
        -->

        <var id="result" type="ResumeSpiderResume">
            <field name="resume">
                <var id="resumeEntity"/>
            </field>
        </var>

 


    </workflow>

    <workflow id="byteResourceDownload">

        <exe type="SessionVar" name="getResult">
            <output>
                <var id="result"/>
            </output>
        </exe>
        <exe  id="result" name="getResume">
            <output>
                <var id="resumeEntity"/>
            </output>
        </exe>
        <exe id="resumeEntity" name="getHtml">
            <output>
                <var id="html"/>
            </output>
        </exe>

        <!--workflow id="_viewSearchResume" 拿到的resumeNoBuyHtml-->
        <exe type="RegularNodeParserUtil" name="buildParser">
            <param>
                <var id="html"/>
            </param>
            <!--<param>imgUrl</param>-->
            <param>mobile_pic</param>
            <output>
                <var id="parser"/>
            </output>
        </exe>

        <var type="BytesResourceBuilder" id="brb">
            <field name="parser"><var id="parser"/></field>
        </var>

        <exe id="brb" name="getResourceList">
            <output><var id="brs"/></output>
        </exe>

        <var id="resumeEntity">
            <field name="bytesResources"><var id="brs"/></field>
        </var>

 

    </workflow>

    <workflow id="resumeHtmlDeal" >
        <!--ResumeHtmlParse-->
        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[[\r\n]]]></param>
            <param></param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[(?iu)<a\s[^>]+>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[(?iu)</a[^>]*>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

 

      <!--  <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[(?iu)<script.*?</script>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>
-->

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<input[^>]+>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<div class=\"head\">\\s*<div class=\"tbd\">\\s*<div id=\"where\">\\s*<div class=\"tophelp\">[^录]+录.*?[^务]+务.*?</div>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[电话:<img\\s*src=\""]]></param>
            <param>
                <![CDATA[电话:<img src=\"http://www.ganji.com]]>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[电子邮件:<img\\s*src=\"]]></param>
            <param>
                <![CDATA[电子邮件:<img src=\"http://www.ganji.com]]>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<div class=\"nav clr\">\\s*<ul class=\"nav-cont clr\">\\s*<li>.*?</li>.*?</div>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[(?is)<div[^>]+class=['\"]infor-bot[\"'].*</body]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
        <param>
            <var id="resumeNoBuyHtml" global="true"/>
        </param>
        <param><![CDATA[<div class=\"nav clr\">\\s*<ul class=\"nav-cont clr\">\\s*<li>.*?</li>.*?</div>]]></param>
        <param>
            <![CDATA[</body]]>
        </param>
        <output>
            <var id="resumeNoBuyHtml" global="true"/>
        </output>
    </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<span class=\"city\">.*?</span>]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<div class=\"detail-where\">.*?</div>]]></param>
            <param>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <!--去掉头-->
        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<ul class="nav-cont clr".*?[^<]+.*?[^<]+</li>]]></param>
            <param>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<span><em id="top_banner">.*?导航.*?[^<]]]></param>
            <param>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<span class="fa-new"></span>]]></param>
            <param>
                <![CDATA[<span class=""></span>]]>
            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

         <!--除去页面的头尾-->


        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<div.*?id="hea.*?<div.*?<!--wrapper s-->]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>

        <exe type="StringUtil" name="regexReplace">
            <param>
                <var id="resumeNoBuyHtml" global="true"/>
            </param>
            <param><![CDATA[<div\s+class="GcnW980">.*?<div\s+id="footBox">.*?<!-- footer_js s-->]]></param>
            <param>

            </param>
            <output>
                <var id="resumeNoBuyHtml" global="true"/>
            </output>
        </exe>


    </workflow>

 

</workflow-resource>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值