:::::::::::::: freq :::::::::::::: #!/bin/sh ## Output words in order of frequency (with counts) in merge of given files ## SYS V /usr/bin/tr and BSD /usr/ucb/tr treat strings differently. ## You could use just plain tr instead of /usr/ucb/tr by uncommenting: # PATH=/usr/ucb:/usr/bin cat "$@" | ## concatenate all given input files /usr/ucb/tr 'A-Z' 'a-z' | ## lower case words /usr/ucb/tr -cs 'a-z' '\012' | ## -c: complement string 1 ## -s: suppress repeated characters of string 2 ## In Sys V, string 2 is not automatically padded out unless use [x*] notation ## and ranges are specified as [a-z] although a-z also works. Thus: ## /usr/bin/tr -cs '[a-z]' '[\n*]' ## Many versions of tr now allow specifications of type ## [:alpha:] for a-zA-Z, [:upper:] for A-Z, [:lower:] for a-z etc. sort | uniq -c | ## replace n copies of 'line' with: n line sort -nr ## -n : numerical sort on 1st column -r: reverse order :::::::::::::: freqnocount :::::::::::::: #!/bin/sh # Output frequent words (WITHOUT counts) in merge of given files. freq "$@" | awk '{print $2}' # just print second field (the words, not the counts) ## or: freq "$@" | tr -d '0-9 ' ## or: freq "$@" | while read COUNT WORD ; do echo $WORD ; done :::::::::::::: freqseg :::::::::::::: #!/bin/bash # Output $1 to $2 most frequent words (with counts) in merge of given files # so "freqseg 3 7 ... " gives 3, 4, 5, 6, 7th most freq words. first=$1; last=$2; shift; shift ## copy first two arguments and shift away freq "$@" | head -$last | ## initial 7 lines tail -$(( last - first + 1 )) ## final 5 = 7 - 3 + 1 of the first 7 lines ## Alternate: In sh "expr" does algebra: tail -`expr $last - $first + 1` ## Alternate: "sed" could be used instead of "head" and "tail", as follows: # freq "$@" | sed -n $first,$last' p' :::::::::::::: freqeach :::::::::::::: #!/bin/sh # Output top ten most freq words (with counts) separately for each file. for i ## With no "in" clause this means: for i in "$@" do if [ -r "$i" ] ; then ## check to see if file readable echo $i: ## print name of file freq $i | head ## give top ten else ## file does not exist, print message echo >&2 `basename $0`: bad file argument: "$i" # exit 1 ## uncomment if missing files should be fatal fi done