F#match的强大和List.tail方法的真实含义

本文介绍F#中match表达式的强大功能及应用实例,展示如何使用match表达式进行模式匹配,实现简洁高效的代码编写。

这几天在学习F#,感觉F#在很多方面确实比较简洁而强大,其match表达式就是其中之一,match with 跟C#的Switch类似,但功能上要强大很多,下面是例子:

    let print_any x = printfn "%A" x
    let rec findSequence l =
        match l with
            | [a; b; c; d] -> //a,b,c,d其实只是占位符号,表示4个元素的数组,当然,如果匹配到,你可以通过abcd访问到这4个元素
                printfn "Last 4 numbers in the list were %i %i %i %i" a b c d
            | 1 :: 2 :: 3 :: tail -> //形如1,2,3,任意元素
                printfn "Found sequence 1, 2, 3 within the list"
                findSequence tail
            | head :: mid ::tail -> //匹配3个元素的元组,因为每个数组最后都隐含一个空,所以只要是数组元素大于2,都可匹配到.
                printfn "xxxx:%A %A %A" head mid tail
                findSequence tail
            | head :: tail -> //同上,匹配一个二元组。
                printfn "xxxx:%A %A" head tail
            | [] -> ()
    let testSequence = [1; 2; 3;4; 5; 6; 7; 8; 9; 8; 7; 6; 5; 4; 3; 2; 1]
    let b1 = findSequence testSequence
    print_any b1

match with确实很强大,特别是在集合做分支的时候.但这里面有个误导,就是上面的head,tail并不是我们所理解的链表中的头和位,在这里其实只是一个占位符号.当然这里还是标示符号命名造成的,而List的head和tail就有误导人的之嫌:

    let listOfList = [[2; 3; 5]; [7; 11; 13]; [17; 19; 23; 29]]
    let rec concatList l =
        match l with
        | head :: tail -> head @ (concatList tail)
        | [] -> []
    let rec concatListOrg l =
        if List.isEmpty l = false then
            let head = List.head l in
            let tail = List.tail l in
                head @ (concatListOrg tail)
        else
        []
    let print_any x = printfn "%A" x
    let primes = concatList listOfList
    print_any primes

head指向的是序列的头元素,而tail指向的是除了head元素之外的剩下序列.当然,这个也没有对何错,只是容易让人误解.我们在学链表的时候,head,tail一般都指首尾元素.如果用remain剩下来表达tail所指也有不好.当然大家弄明白了也就无所谓了.

帮我优化检查脚本”#!/bin/bash #SBATCH -p amd_256 # 集群分区(根据实际调整) #SBATCH -N 1 # 单节点运行 #SBATCH -n 32 # 线程数(与集群资源匹配) #SBATCH -t 24:00:00 # 运行时间(可调整) #SBATCH --mem=64G # 内存(可调整) #SBATCH -J juicebox_hic # 作业名 #SBATCH -o /public1/home/scb3201/rawdata/Hic/logs/%j_hic_out.log #SBATCH -e /public1/home/scb3201/rawdata/Hic/logs/%j_hic_err.log ########################################################## # 1. 环境激活与关键工具检查 ########################################################## source ~/.bashrc && conda activate Hic || { echo "[ERROR] Hic环境激活失败!"; exit 1; } # 检查核心工具是否存在 which samtools >/dev/null 2>&1 || { echo "[ERROR] samtools未安装在Hic环境中"; exit 1; } which bedtools >/dev/null 2>&1 || { echo "[ERROR] bedtools未安装在Hic环境中"; exit 1; } which juicer_tools >/dev/null 2>&1 || { echo "[ERROR] juicer_tools未安装在Hic环境中"; exit 1; } ########################################################## # 2. 路径定义与必要目录创建 ########################################################## # 固定路径(根据用户实际路径保持不变) REF_GENOME="/public1/home/scb3201/results/genome/Medaka/genome.nextpolish.medaka.fasta" ALIGN_DIR="/public1/home/scb3201/results/genome/Hic/alignment" SCAFFOLD_DIR="/public1/home/scb3201/results/genome/Hic/scaffold" JUICEBOX_DIR="/public1/home/scb3201/results/genome/Hic/juicebox" LOG_DIR="/public1/home/scb3201/rawdata/Hic/logs" TMP_DIR="/public1/home/scb3201/rawdata/Hic/tmp" # 临时目录(避免磁盘空间不足) # 创建所有依赖目录(确保不存在时自动创建) mkdir -p ${LOG_DIR} ${JUICEBOX_DIR} ${ALIGN_DIR} ${SCAFFOLD_DIR} ${TMP_DIR} || { echo "[ERROR] 目录创建失败"; exit 1; } ########################################################## # 3. 生成染色体大小文件(chrom.sizes) ########################################################## echo "[INFO] 生成染色体大小文件..." if [ ! -f "${REF_GENOME}.fai" ]; then samtools faidx ${REF_GENOME} 2>> ${LOG_DIR}/chrom_sizes.log || { echo "[ERROR] 参考基因组索引生成失败!"; exit 1; } fi cut -f1,2 ${REF_GENOME}.fai > ${JUICEBOX_DIR}/chrom.sizes 2>> ${LOG_DIR}/chrom_sizes.log || { echo "[ERROR] chrom.sizes生成失败!"; exit 1; } #重命名为medaka_genome.chrom.sizes(匹配基因组ID) cp ${JUICEBOX_DIR}/chrom.sizes ${JUICEBOX_DIR}/medaka_genome.chrom.sizes || { echo "[ERROR] 染色体大小文件重命名失败"; exit 1; } ######################################################### # 4. 检查输入BAM文件有效性 ########################################################## echo "[INFO] 检查输入BAM文件..." INPUT_BAM="${ALIGN_DIR}/aligned_sorted.bam" if [ ! -f "${INPUT_BAM}" ]; then echo "[ERROR] 输入BAM文件不存在:${INPUT_BAM}"; exit 1; fi ########################################################## # 5. BAM转bedpe格式(juicer_tools输入要求) ########################################################## echo "[INFO] BAM转换为bedpe格式..." BEDPE_FILE="${JUICEBOX_DIR}/aligned.bedpe" bedtools bamtobed -bedpe -i ${INPUT_BAM} > ${BEDPE_FILE} 2>> ${LOG_DIR}/bam_to_bedpe.log || { echo "[ERROR] BAM转bedpe失败!"; exit 1; } ########################################################## # 6. 生成DPNII限制酶位点文件(适配用户实验酶) ########################################################## echo "[INFO] 生成DPNII酶位点文件(识别位点:GATC)..." ENZYME_FILE="${JUICEBOX_DIR}/dpnII_sites.txt" echo -e "chrom\tsite\tstrand" > ${ENZYME_FILE} # 符合juicer_tools格式要求 # 提取参考基因组中所有GATC位点的字节偏移量(作为位点位置) samtools faidx ${REF_GENOME} | cut -f1 | while read chr; do grep -b -o "GATC" ${REF_GENOME} | awk -v chr=${chr} '{print chr"\t"$1"\t+"}' >> ${ENZYME_FILE} done 2>> ${LOG_DIR}/dpnII_sites.log || { echo "[WARNING] DPNII位点文件生成失败(非必需,可跳过)"; } echo "[INFO] 生成hic文件(DPNII酶适配+染色体文件修正)..." # 切换到JUICEBOX_DIR目录,确保染色体大小文件被找到 cd ${JUICEBOX_DIR} || { echo "[ERROR] 无法切换到JUICEBOX_DIR"; exit 1; } if [ ! -f "medaka_genome.chrom.sizes" ]; then echo "[ERROR] 染色体文件缺失"; exit1; fi if [ ! -r "medaka_genome.chrom.sizes" ]; then echo "[ERROR] 文件无读权限"; exit1; fi ########################################################## # 7. juicer_tools生成hic文件(核心参数修正) ########################################################## echo "[INFO] 生成hic文件(DPNII酶适配版)..." juicer_tools pre \ --threads ${SLURM_NTASKS} \ -t ${TMP_DIR} \ -f dpnII_sites.txt \ aligned.bedpe \ medaka_hic_final.hic \ medaka_genome \ 2>> ${LOG_DIR}/juicer_tools_pre.log || { echo "[ERROR] hic文件生成失败!"; exit 1; } # 正确指定线程数(利用集群资源) # 临时目录(避免磁盘溢出) # DPNII酶位点文件(提升结果分辨率) # 输入bedpe文件 # 输出hic文件 # 自定义基因组ID(唯一即可) ########################################################## # 8. 清理临时文件(释放磁盘空间) ########################################################## echo "[INFO] 清理临时文件..." rm -f ${BEDPE_FILE} 2>> ${LOG_DIR}/cleanup.log || { echo "[WARNING] 临时文件清理失败,不影响结果"; } ########################################################## # 9. MultiQC报告生成(含BAM统计信息) ########################################################## echo "[INFO] 生成MultiQC分析报告..." MULTIQC_DIR="${JUICEBOX_DIR}/multiqc_report" mkdir -p ${MULTIQC_DIR} || { echo "[ERROR] MultiQC目录创建失败"; exit 1; } # 添加BAM统计信息(让报告有实际内容) samtools stats ${INPUT_BAM} > ${LOG_DIR}/bam_stats.txt 2>&1 # 生成报告 multiqc ${LOG_DIR} -o ${MULTIQC_DIR} --title "HI-C Analysis Report" --filename "hic_report.html" 2>> ${LOG_DIR}/multiqc.log || { echo "[WARNING] MultiQC报告生成失败,结果保留"; } ########################################################## # 运行完成提示 ########################################################## echo "[SUCCESS] 全流程运行完成!" echo "结果文件路径:" echo "1. hic文件:${JUICEBOX_DIR}/medaka_hic_final.hic" echo "2. MultiQC报告:${MULTIQC_DIR}/hic_report.html" echo "3. 染色体大小文件:${JUICEBOX_DIR}/chrom.sizes" “我提供参考内容”juicebox_assembly_converter.py usage: juicebox_assembly_converter.py [-h] -a ASSEMBLY -f FASTA [-p PREFIX] [-c] [-s] [-v] juicebox_assembly_converter.py: error: the following arguments are required: -a/--assembly, -f/--fasta (Hic) [scb3201@ln134%bscc-a6 Hic]$ python degap_assembly.py python: can't open file '/public1/home/scb3201/rawdata/Hic/degap_assembly.py': [Errno 2] No such file or directory (Hic) [scb3201@ln134%bscc-a6 Hic]$ degap_assembly.py Traceback (most recent call last): File "/public1/home/scb3201/anaconda3/envs/Hic/bin/degap_assembly.py", line 6, in <module> with open(sys.argv[1]) as file: IndexError: list index out of range (Hic) [scb3201@ln134%bscc-a6 Hic]$ makeAgpFromFasta.py makeAgpFromFasta.py usage: makeAgpFromFasta.py <fasta_file> <agp_out_file> (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicebox_assembly_converter.py -h usage: juicebox_assembly_converter.py [-h] -a ASSEMBLY -f FASTA [-p PREFIX] [-c] [-s] [-v] optional arguments: -h, --help show this help message and exit -a ASSEMBLY, --assembly ASSEMBLY juicebox assembly file -f FASTA, --fasta FASTA the fasta file -p PREFIX, --prefix PREFIX the prefix to use for writing outputs. Default: the assembly file, minus the file extension -c, --contig_mode ignore scaffold specification and just output contigs. useful when only trying to obtain a fasta reflecting juicebox breaks. Default: False -s, --simple_chr_names use simple chromosome names ("ChromosomeX") for scaffolds instead of detailed chromosome names ("PGA_scaffold_X__Y_contigs__length_Z"). Has no effect in contig_mode. -v, --verbose print summary of processing steps to stdout, otherwise silent. Default: True (Hic) [scb3201@ln134%bscc-a6 Hic]$ bwa -h [main] unrecognized command '-h' (Hic) [scb3201@ln134%bscc-a6 Hic]$ bwa Program: bwa (alignment via Burrows-Wheeler transformation) Version: 0.7.19-r1273 Contact: Heng Li <hli@ds.dfci.harvard.edu> Usage: bwa <command> [options] Command: index index sequences in the FASTA format mem BWA-MEM algorithm fastmap identify super-maximal exact matches pemerge merge overlapping paired ends (EXPERIMENTAL) aln gapped/ungapped alignment samse generate alignment (single ended) sampe generate alignment (paired ended) bwasw BWA-SW for long queries (DEPRECATED) shm manage indices in shared memory fa2pac convert FASTA to PAC format pac2bwt generate BWT from PAC pac2bwtgen alternative algorithm for generating BWT bwtupdate update .bwt to the new format bwt2sa generate SA from BWT and Occ Note: To use BWA, you need to first index the genome with `bwa index'. There are three alignment algorithms in BWA: `mem', `bwasw', and `aln/samse/sampe'. If you are not sure which to use, try `bwa mem' first. Please `man ./bwa.1' for the manual. (Hic) [scb3201@ln134%bscc-a6 Hic]$ samtools Program: samtools (Tools for alignments in the SAM format) Version: 1.22.1 (using htslib 1.22.1) Usage: samtools <command> [options] Commands: -- Indexing dict create a sequence dictionary file faidx index/extract FASTA fqidx index/extract FASTQ index index alignment -- Editing calmd recalculate MD/NM tags and '=' bases fixmate fix mate information reheader replace BAM header targetcut cut fosmid regions (for fosmid pool only) addreplacerg adds or replaces RG tags markdup mark duplicates ampliconclip clip oligos from the end of reads -- File operations collate shuffle and group alignments by name cat concatenate BAMs consensus produce a consensus Pileup/FASTA/FASTQ merge merge sorted alignments mpileup multi-way pileup sort sort alignment file split splits a file by read group quickcheck quickly check if SAM/BAM/CRAM file appears intact fastq converts a BAM to a FASTQ fasta converts a BAM to a FASTA import Converts FASTA or FASTQ files to SAM/BAM/CRAM reference Generates a reference from aligned data reset Reverts aligner changes in reads -- Statistics bedcov read depth per BED region coverage alignment depth and percent coverage depth compute the depth flagstat simple stats idxstats BAM index stats cram-size list CRAM Content-ID and Data-Series sizes phase phase heterozygotes stats generate stats (former bamcheck) ampliconstats generate amplicon specific stats checksum produce order-agnostic checksums of sequence content -- Viewing flags explain BAM flags head header viewer tview text alignment viewer view SAM<->BAM<->CRAM conversion depad convert padded BAM to unpadded BAM samples list the samples in a set of SAM/BAM/CRAM files -- Misc help [cmd] display this help message or help for [cmd] version detailed version information (Hic) [scb3201@ln134%bscc-a6 Hic]$ bedtools bedtools is a powerful toolset for genome arithmetic. Version: v2.31.1 About: developed in the quinlanlab.org and by many contributors worldwide. Docs: http://bedtools.readthedocs.io/ Code: https://github.com/arq5x/bedtools2 Mail: https://groups.google.com/forum/#!forum/bedtools-discuss Usage: bedtools <subcommand> [options] The bedtools sub-commands include: [ Genome arithmetic ] intersect Find overlapping intervals in various ways. window Find overlapping intervals within a window around an interval. closest Find the closest, potentially non-overlapping interval. coverage Compute the coverage over defined intervals. map Apply a function to a column for each overlapping interval. genomecov Compute the coverage over an entire genome. merge Combine overlapping/nearby intervals into a single interval. cluster Cluster (but don't merge) overlapping/nearby intervals. complement Extract intervals _not_ represented by an interval file. shift Adjust the position of intervals. subtract Remove intervals based on overlaps b/w two files. slop Adjust the size of intervals. flank Create new intervals from the flanks of existing intervals. sort Order the intervals in a file. random Generate random intervals in a genome. shuffle Randomly redistribute intervals in a genome. sample Sample random records from file using reservoir sampling. spacing Report the gap lengths between intervals in a file. annotate Annotate coverage of features from multiple files. [ Multi-way file comparisons ] multiinter Identifies common intervals among multiple interval files. unionbedg Combines coverage intervals from multiple BEDGRAPH files. [ Paired-end manipulation ] pairtobed Find pairs that overlap intervals in various ways. pairtopair Find pairs that overlap other pairs in various ways. [ Format conversion ] bamtobed Convert BAM alignments to BED (& other) formats. bedtobam Convert intervals to BAM records. bamtofastq Convert BAM records to FASTQ records. bedpetobam Convert BEDPE intervals to BAM records. bed12tobed6 Breaks BED12 intervals into discrete BED6 intervals. [ Fasta manipulation ] getfasta Use intervals to extract sequences from a FASTA file. maskfasta Use intervals to mask sequences from a FASTA file. nuc Profile the nucleotide content of intervals in a FASTA file. [ BAM focused tools ] multicov Counts coverage from multiple BAMs at specific intervals. tag Tag BAM alignments based on overlaps with interval files. [ Statistical relationships ] jaccard Calculate the Jaccard statistic b/w two sets of intervals. reldist Calculate the distribution of relative distances b/w two files. fisher Calculate Fisher statistic b/w two feature files. [ Miscellaneous tools ] overlap Computes the amount of overlap from two intervals. igv Create an IGV snapshot batch script. links Create a HTML page of links to UCSC locations. makewindows Make interval "windows" across a genome. groupby Group by common cols. & summarize oth. cols. (~ SQL "groupBy") expand Replicate lines based on lists of values in columns. split Split a file into multiple files with equal records or base pairs. summary Statistical summary of intervals in a file. [ General Parameters ] --cram-ref Reference used by a CRAM input [ General help ] --help Print this help menu. --version What version of bedtools are you using?. --contact Feature requests, bugs, mailing lists, etc. (Hic) [scb3201@ln134%bscc-a6 Hic]$ assembly-stats usage: stats [options] <list of fasta/q files> Reports sequence length statistics from fasta and/or fastq files options: -l <int> Minimum length cutoff for each sequence. Sequences shorter than the cutoff will be ignored [1] -s Print 'grep friendly' output -t Print tab-delimited output -u Print tab-delimited output with no header line -v Print version and exit (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicer Usage: juicer <command> <arguments> Version: 1.2.2 Command: pre generate files compatible with Juicebox toolset post generate assembly files after Juicebox curation (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicertools bash: juicertools: command not found... (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicer_tools WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance. WARN [2025-12-06T22:43:06,303] [Globals.java:138] [main] Development mode is enabled Juicer Tools Version 1.22.01 Usage: dump <observed/oe> <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize> [outfile] dump <norm/expected> <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr> <BP/FRAG> <binsize> [outfile] dump <loops/domains> <hicFile URL> [outfile] pre [options] <infile> <outfile> <genomeID> addNorm <input_HiC_file> [input_vector_file] pearsons [-p] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr> <BP/FRAG> <binsize> [outfile] eigenvector -p <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr> <BP/FRAG> <binsize> [outfile] apa <hicFile(s)> <PeaksFile> <SaveFolder> arrowhead <hicFile(s)> <output_file> hiccups <hicFile> <outputDirectory> hiccupsdiff <firstHicFile> <secondHicFile> <firstLoopList> <secondLoopList> <outputDirectory> validate <hicFile> -h, --help print help -v, --verbose verbose mode -V, --version print version Type juicer_tools <commandName> for more detailed usage instructions (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicer_tools pre WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance. WARN [2025-12-06T22:43:17,904] [Globals.java:138] [main] Development mode is enabled Usage: juicer_tools pre [options] <infile> <outfile> <genomeID> : -d only calculate intra chromosome (diagonal) [false] : -f <restriction site file> calculate fragment map : -m <int> only write cells with count above threshold m [0] : -q <int> filter by MAPQ score greater than or equal to q [not set] : -c <chromosome ID> only calculate map on specific chromosome [not set] : -r <comma-separated list of resolutions> Only calculate specific resolutions [not set] : -t <tmpDir> Set a temporary directory for writing : -s <statistics file> Add the text statistics file to the Hi-C file header : -g <graphs file> Add the text graphs file to the Hi-C file header : -n Don't normalize the matrices : -z <double> scale factor for hic file : -a <1, 2, 3, 4, 5> filter based on inner, outer, left-left, right-right, tandem pairs respectively : --randomize_position randomize positions between fragment sites : --random_seed <long> for seeding random number generator : --frag_site_maps <fragment site files> for randomization : -k normalizations to include : -j number of CPU threads to use : --threads <int> number of threads : --mndindex <filepath> to mnd chr block indices (Hic) [scb3201@ln134%bscc-a6 Hic]$ juicer_tools dump WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance. WARN [2025-12-06T22:44:15,812] [Globals.java:138] [main] Development mode is enabled Usage: juicer_tools dump <observed/oe> <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize> [outfile] dump <norm/expected> <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr> <BP/FRAG> <binsize> [outfile] dump <loops/domains> <hicFile URL> [outfile]“
最新发布
12-07
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值