Scala处理文件

import java.io.File
import java.util.concurrent.ConcurrentHashMap

import com.alibaba.fastjson.JSONObject

import scala.collection.{JavaConversions, mutable}
import scala.io.Source
import scala.reflect.io.{Directory, Path}
import scala.util.matching.Regex



class KafkaAnalysis {
   val fileregex : Regex = ".*\\.log$".r
   val dir : String = "D:\\securedownload"
   val cmaps : ConcurrentHashMap[String, JSONObject] = new ConcurrentHashMap()

   def listFiles() : Iterator[File] = {
     val dirs = new File(dir)
     val dirsFilter = Directory(Path.jfile2path(dirs)).walkFilter(path => path.isFile && fileregex.findAllIn(path.name).nonEmpty)
     dirsFilter.map(_.jfile)
   }

   def compact() : Unit = {
     val files = listFiles()
     files.toList.par.foreach{ file =>
       fetchParams(Source.fromFile(file).getLines())
     }
   }

   def fetchParams(iter : Iterator[String]) : Unit = {
     val lineRegex = ".*Topic:\\s*(\\S+)\\s*Partition:\\s*(\\d).*Leader:\\s*(\\d+)\\s*.*".r
     val timeRegex = "\\s*(2017-\\d+-\\d+\\s\\d+:\\d+:\\d+)\\s+.*".r
     var time : Option[String] = None
     val maps = new mutable.HashMap[String, String]()
     iter.foreach{ line =>
       val lineMatch = lineRegex findFirstMatchIn line
       if(lineMatch.isDefined && lineMatch.get.groupCount == 3){
         val key = lineMatch.get.group(1) + "-" + lineMatch.get.group(2)
         val value = lineMatch.get.group(3)
         maps.put(key, value)
       }else if(lineMatch.isEmpty){
         val timeMatch = timeRegex findFirstMatchIn line
         if(timeMatch.isDefined && timeMatch.get.groupCount == 1) {
           time = Some(timeMatch.get.group(1))
         }
       }
     }

     if(time.isDefined) {
       maps.foreach { map =>
         val key = map._1
         if(cmaps.containsKey(key)) {
           val json = cmaps.get(key)
           json.put(time.get, map._2)
         }else{
           val json = new JSONObject()
           json.put(time.get, map._2)
             if(!cmaps.containsKey(key)){
               cmaps.put(key, json)
             }else {
               val json = cmaps.get(key)
               json.put(time.get, map._2)
             }
         }
       }
     }
   }
}

object KafkaAnalysis {
  def main(args : Array[String]) : Unit = {
    val leo = new KafkaAnalysis()
    leo.compact()
    JavaConversions.mapAsScalaMap(leo.cmaps).toList.sortWith(_._1 > _._1)foreach{ case (k, v) =>
      print(k + "    ")
      JavaConversions.asScalaIterator(v.values().iterator())
                    .map(param => (param, 1)).toList.groupBy(_._1)
                    .foreach{case (gk, gc) =>
                        print(gk + ":")
                        print(gc.size)
                        print("  ")
                    }
      println()
    }
  }
}

文件数据为:

2017-06-01 22:00:01 checkpoint the topics information
Topic:A PartitionCount:20 ReplicationFactor:3 Configs:retention.ms=604800000
Topic: A Partition: 0 Leader: 11250 Replicas: 11250,11251,11252 Isr: 11251,11250,11252
Topic: A Partition: 1 Leader: 11251 Replicas: 11251,11252,11254 Isr: 11251,11252,11254
Topic: A Partition: 2 Leader: 11252 Replicas: 11252,11254,11255 Isr: 11255,11252,11254
Topic: A Partition: 3 Leader: 11254 Replicas: 11254,11255,11256 Isr: 11255,11256,11254
Topic: A Partition: 4 Leader: 11255 Replicas: 11255,11256,11257 Isr: 11257,11255,11256
Topic: A Partition: 5 Leader: 11256 Replicas: 11256,11257,11258 Isr: 11256,11258,11257
Topic: A Partition: 6 Leader: 11257 Replicas: 11257,11258,11247 Isr: 11257,11258,11247
Topic: A Partition: 7 Leader: 11258 Replicas: 11258,11247,11249 Isr: 11258,11247,11249
Topic: A Partition: 8 Leader: 11247 Replicas: 11247,11249,11250 Isr: 11250,11247,11249
Topic: A Partition: 9 Leader: 11249 Replicas: 11249,11250,11251 Isr: 11251,11250,11249
Topic: A Partition: 10 Leader: 11250 Replicas: 11250,11252,11254 Isr: 11250,11252,11254
Topic: A Partition: 11 Leader: 11251 Replicas: 11251,11254,11255 Isr: 11251,11255,11254
Topic: A Partition: 12 Leader: 11252 Replicas: 11252,11255,11256 Isr: 11255,11256,11252
Topic: A Partition: 13 Leader: 11254 Replicas: 11254,11256,11257 Isr: 11257,11256,11254
Topic: A Partition: 14 Leader: 11255 Replicas: 11255,11257,11258 Isr: 11257,11255,11258
Topic: A Partition: 15 Leader: 11256 Replicas: 11256,11258,11247 Isr: 11256,11258,11247
Topic: A Partition: 16 Leader: 11257 Replicas: 11257,11247,11249 Isr: 11257,11247,11249
Topic: A Partition: 17 Leader: 11258 Replicas: 11258,11249,11250 Isr: 11250,11258,11249
Topic: A Partition: 18 Leader: 11247 Replicas: 11247,11250,11251 Isr: 11251,11250,11247
Topic: A Partition: 19 Leader: 11249 Replicas: 11249,11251,11252 Isr: 11251,11252,11249
Topic:B PartitionCount:20 ReplicationFactor:3 Configs:retention.ms=604800000
Topic: B Partition: 0 Leader: 11249 Replicas: 11249,11247,11250 Isr: 11250,11247,11249
Topic: B Partition: 1 Leader: 11250 Replicas: 11250,11249,11251 Isr: 11251,11250,11249
Topic: B Partition: 2 Leader: 11251 Replicas: 11251,11250,11252 Isr: 11251,11250,11252
Topic: B Partition: 3 Leader: 11252 Replicas: 11252,11251,11254 Isr: 11251,11252,11254
Topic: B Partition: 4 Leader: 11254 Replicas: 11254,11252,11255 Isr: 11255,11252,11254
Topic: B Partition: 5 Leader: 11255 Replicas: 11255,11254,11256 Isr: 11255,11256,11254
Topic: B Partition: 6 Leader: 11256 Replicas: 11256,11255,11257 Isr: 11255,11256,11257
Topic: B Partition: 7 Leader: 11257 Replicas: 11257,11256,11258 Isr: 11257,11256,11258
Topic: B Partition: 8 Leader: 11258 Replicas: 11258,11257,11247 Isr: 11257,11258,11247
Topic: B Partition: 9 Leader: 11247 Replicas: 11247,11258,11249 Isr: 11258,11247,11249
Topic: B Partition: 10 Leader: 11249 Replicas: 11249,11250,11251 Isr: 11251,11250,11249
Topic: B Partition: 11 Leader: 11250 Replicas: 11250,11251,11252 Isr: 11251,11250,11252
Topic: B Partition: 12 Leader: 11251 Replicas: 11251,11252,11254 Isr: 11251,11252,11254
Topic: B Partition: 13 Leader: 11252 Replicas: 11252,11254,11255 Isr: 11255,11252,11254
Topic: B Partition: 14 Leader: 11254 Replicas: 11254,11255,11256 Isr: 11255,11256,11254
Topic: B Partition: 15 Leader: 11255 Replicas: 11255,11256,11257 Isr: 11257,11255,11256
Topic: B Partition: 16 Leader: 11256 Replicas: 11256,11257,11258 Isr: 11256,11258,11257
Topic: B Partition: 17 Leader: 11257 Replicas: 11257,11258,11247 Isr: 11257,11258,11247
Topic: B Partition: 18 Leader: 11258 Replicas: 11258,11247,11249 Isr: 11258,11247,11249
Topic: B Partition: 19 Leader: 11247 Replicas: 11247,11249,11250 Isr: 11250,11247,11249

### Scala 文件创建、读取与写入示例 以下是关于如何在 Scala 中实现文件的创建、读取和写入的具体方法。这些功能可以通过调用 Java 的 `java.io` 类库来完成。 #### 1. 创建并写入文件 Scala 并未提供内置的文件写入支持,因此可以利用 Java 提供的 `PrintWriter` 或者 `BufferedWriter` 来实现文件写入操作: ```scala import java.io.{File, PrintWriter} def writeFileExample(): Unit = { val writer = new PrintWriter(new File("example.txt")) writer.write("这是第一个例子。\n") writer.write("我们正在学习如何使用 Scala 进行文件写入。") writer.close() } ``` 此代码片段展示了如何通过 `PrintWriter` 将字符串写入到名为 `example.txt` 的文件中[^2]。 #### 2. 使用 Source 对象读取文件内容 Scala 提供了一个便捷的方式用于读取文件的内容——即使用 `Source.fromFile()` 方法。下面是一个简单的例子展示如何逐行读取文件中的数据: ```scala import scala.io.Source def readFileExample(filePath: String): Unit = { try { val source = Source.fromFile(filePath) for (line <- source.getLines()) println(line) source.close() } catch { case e: Exception => println(s"Error reading the file $filePath : ${e.getMessage}") } } ``` 在这个函数里,如果指定路径下的文件存在,则会打印每一行的内容;否则捕获异常并显示错误消息[^3]。 #### 3. 结合正则表达式处理文件内容 除了基本的文件读写外,在实际应用过程中可能还需要对文件内的特定模式的数据进行匹配或者替换等更复杂的操作。这时就可以引入正则表达式的概念了: ```scala val regexPattern = """(\d+)""".r // 定义一个简单数字匹配的正则表达式 // 假设已经有一个包含若干整数的文本文件 readFileExample("numbers.txt").foreach{ line => regexPattern.findAllIn(line).toList.foreach(println(_)) } ``` 这里定义了一个用来寻找任意长度连续数字串的正则表达式,并将其应用于之前提到过的 `readFileExample` 函数返回的结果集上[^4]。 --- ### 总结 以上分别介绍了三种常见的场景:一是怎样新建以及向某个具体位置存储信息至本地磁盘上的文档;二是从已存在的资源获取其内部储存的信息;最后还探讨了一下当面对更加复杂需求时该如何灵活运用额外工具比如正则表达式来进行高级加工。
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值