Scala解析Nginx日志为对象

最新推荐文章于 2025-04-22 15:22:41 发布

原创最新推荐文章于 2025-04-22 15:22:41 发布 · 698 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#scala #nginx数据转换

Scala 专栏收录该内容

26 篇文章

订阅专栏

/**
  * Nginx日志数据转换类
  */
object NginxLogParser{
  /**
    * 解析正则表达式
    * .r用于指明PARTTERN是一个正则表达式对象
    * 9个值：客户端访问IP、用户标识clientIdentd、用户userId、访问时间dateTime、请求方式mode、请求状态responseCode、返回文件的大小contentSize、跳转来源referrer、UA信息
    */
  val PATTERN =
    """(\S+) (\S+) (\S+) (\[.*\]) (\".*\") (\d{3}) (\d+) (\".*?\") (\".*?\")""".r

  def parseLog2Line(log: String): AccessLog = {
      def makeWifiLogs(): AccessLog = {
        new AccessLog("", "", "", "", "", 0, 0, "", "")
      }

      if (log.isEmpty) {
        val logs = PATTERN.findFirstMatchIn(log)
        if (logs.isEmpty) {
          throw new RuntimeException("Cannot parse log line: " + log)
        }
        val m = logs.get
        new AccessLog(m.group(1), m.group(2), m.group(3), m.group(4),m.group(5), m.group(6).toInt,
        m.group(7).toLong, m.group(8), m.group(9))
      }else{
      makeWifiLogs()
    }
  }

  def main(args: Array[String]) {
    val line = """111.128.69.30 - - [11/Sep/2018:00:01:00 +0800] "GET /api/getinfo.php?tid=1e35c7b357cd&rid=059e15c97800&gw=gaoke.com&did=2424826&sn=GAOKE_Q330&action=run HTTP/1.1" 200 315 "http://gaoke.com:8848/wx.html?tid=1e35c7b357cd&rid=059e15c97800&gw=gaoke.com&did=2424826&sn=GAOKE_Q330" "Mozilla/5.0 (Linux; Android 6.0; S9 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Mobile Safari/537.36" "-""""

    val log = NginxLogParser.parseLog2Line(line);
    println(log.ip)
    println(log.clientIdent)
    println(log.userId)
    println(log.timestamp)
    println(log.request)
    println(log.responseCode)
    println(log.contentSize)
    println(log.referrer)
  }
}

/**
  * 日志文件对象
  */
case class AccessLog(
  ip: String,            //设备用户的真实ip地址
  clientIdent: String,   //用户标识
  userId: String,        //用户
  timestamp: String,     //访问日期时间
  request: String,       //请求信息，get/post，mac值等
  responseCode: Int,     //请求状态 200成功，304静态加载
  contentSize: Long,     //返回文件的大小
  referrer: String,      //跳转来源
  ua: String             //UA信息
//  forward:String //跳转页面

) extends Serializable {
  override def toString: String = ip+","+clientIdent+","+userId+","+timestamp+","+request+","+responseCode+","+contentSize+","+referrer+","+ua
}