Programmer VS DBA

 

...

 

 

class LabDataLine {
    static col_names = ['LABDT','LABTM','REQUISITIONNO','SUBJECT','SUBJECTINITIALS','SEX','SUBJECTDOB','SPONSOR','PROTOCOL','PHYSICIANNM','SITENO','VISIT','VISITNO','GROUPPNL','TESTNM','RANGEN','RESULTTXT','RESULTREAL','RANGELOW','RANGEHI','FLAG','UNIT','FOOTNOTE','LOCCODE','LOCADDR']
    static pks = ['PHYSICIANNM','SITENO','SUBJECT','VISIT','VISITNO','LABDT','LABTM','GROUPPNL','TESTNM'], cols = [:]
    static{col_names.eachWithIndex{o,i->
        cols[o]=i
    }}
    static REGEXP = ['SUBJECT':/00\d{1}\-\d{4}/,'VISITNO':/^NA$|^[1-9]{1}\d*/,'LABDT':/(([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})(((0[13578]|1[02])(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)(0[1-9]|[12][0-9]|30))|(02(0[1-9]|[1][0-9]|2[0-8]))))|((([0-9]{2})(0[48]|[2468][048]|[13579][26])|((0[48]|[2468][048]|[3579][26])00))0229)/,'LABTM':/^$|((0[0-9])|(1[0-9])|(2[0-3]))([0-5][0-9])([0-5][0-9])/,'RESULTREAL':/^$|^[-+]?\d+(\.\d+)?$/,'RANGELOW':/^$|^[-+]?\d+(\.\d+)?$/,'RANGEHI':/^$|^[-+]?\d+(\.\d+)?$/]

    def location, line, arr
    public LabDataLine(m){
      location = m.location
      line = m.line
      arr = this.line.split('\\|')
    }
    public static String genHd() {
        def s = []
        col_names.each{s << it}
        "<th>${s.join('</th><th>')}</th>"
    }
    public String toString(){
        def s = []
        col_names.each{
            def data = arr[cols[it]]
            s << (data==''?'&nbsp;':data)
        }
        "<td>${s.join('</td><td>')}</td>"
    }
    public String toString(highlightedProps){
        def s = []
        col_names.each{
            def data = arr[cols[it]]
            def hl = highlightedProps.contains(it)
            s << (data==''?'&nbsp;':"${hl?'<b>':''}$data${hl?'</b>':''}")
        }
        "<td>${s.join('</td><td>')}</td>"
    }
    public int hashCode(){
        def s = []
        pks.each{s << arr[cols[it]]}
        s.join('|').hashCode()
    }
    public boolean equals(Object o){
        this.hashCode() == o.hashCode()
    }
}

def startAt = new Date().time
def files = new File('../data').listFiles().findAll{it.name=~/(?i)\.txt$/}
files.sort{f1,f2->
	f1.lastModified()<=>f2.lastModified()
}

def dataFiles = [files[-1], files[-2]].sort{f1,f2->f1.name<=>f2.name}
def dataset = [:], h2dl = [[:],[:],[:]]

// preparation
(-2..-1).each{i->
    dataFiles[i].eachLine{l,n->
        if(n>1){
            def location = "$n@file${i+3}"
            def dl = new LabDataLine(location:location,line:l)
            dataset[location] = dl

            def hc = l.hashCode()
            h2dl[i+2][hc] = h2dl[i+2][hc]?:[]
            h2dl[i+2][hc] << location

            def hc2 = dl.hashCode()
            h2dl[-1][hc2] = h2dl[-1][hc2]?:[:]
            h2dl[-1][hc2][hc] = h2dl[-1][hc2][hc] ?: []
            h2dl[-1][hc2][hc] << dl
        }
    }
}
def report = []
report << "<html><head><title>Lab Data Validation Report</title><style type=\"text/css\">table, tr, td {border-width: 1px;border-spacing: 2px;border-style: outset;border-color: blue;border-collapse: separate;}th{background-color:red;}.even-row{background-color:#CECEF6;}.odd-row{background-color:#A9D0F5;}</style></head><body>file1: ${dataFiles[0].name}<br>file2: ${dataFiles[1].name}<hr>"

//#1. unit validation
report << "---unit validation---<br>"
report << "<table><tr><th>Location</th><th>Field</th><th>Value</th></tr>"
def j=0
dataset.each{dlkv->
    def dl = dlkv.value
    dl.pks.each{pk->
        if(dl.arr[dl.cols[pk]].trim() == ''){
            report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>$pk</td><td>NULL</td></tr>"
        }
    }
    dl.REGEXP.each{kv->
        if(!(dl.arr[dl.cols[kv.key]] =~ kv.value)){
            report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>${kv.key}</td><td>${dl.arr[dl.cols[kv.key]]}</td><tr>"
        }
    }
}
report << "</table><hr>"

//#2. integration validation :: duplicated lines in each file
report << "---duplicated lines in each file---<br>"
report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"
(0..1).each{i->
    h2dl[i].each{kv->
        if(kv.value.size()>1){
            def tmp = []
            kv.value.each{e->
                tmp << "<tr><td>$e</td>${dataset[e]}</tr>"
            }
            report << tmp
        }
    }
}
report << "</table><hr>"

//#3. integration validation :: unequal lines with equal PKs in the two files
report << "---unequal lines with equal PKs in the two files---<br>"
report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"

def seq1 = 1, colors = ['Aqua', 'Orange'], ci = 0
h2dl[-1].each{
    if(it.value.size()>1){
         def seq2 = 1
         it.value.each{dl->
            def d = dl.value[0]
            def pkgrp = it.value
            def highlightedProps = []
            d.col_names.each{col->
                if(pkgrp.findAll{kv->kv.value[0].arr[d.cols[col]]==d.arr[d.cols[col]]}.size()==1){
                    highlightedProps << col
                }
            }

            report << "<tr style=\"background:${colors[ci]};\"><td>${seq1}.${seq2}<br>${dl.value.location.join('<br>')}</td>${dl.value[0].toString(highlightedProps)}</tr>"
            seq2++
         }
         ci = seq1++%2
    }
}
report << "</table><hr>"
//#4. integration validation :: lines in file1 but not in file2(by PKs)
report << "---lines in file1 but not in file2(by PKs)---<br>"
report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"
def hss = [new HashSet(), new HashSet()]
dataset.each{
    hss[it.value.location=~/@file1/?0:1] << it.value
}
hss[0].minus(hss[1]).eachWithIndex{o,i->
    report << "<tr class=\"${i%2==0?'even-row':'odd-row'}\"><td>${o.location}</td>${o}</tr>"
}
report << "</table><hr>"

def msg = "Validation script by S.C. Global Research Services, LLC.<br>Time elapsed: ${new Date().time - startAt} ms"
report << "<p>$msg</p>"

new File("../output/report.htm").write(report.join('\n'))

 

...

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值