#!/bin/sh
# sub_prefix - subtract prefixes
# usage
#     sub_prefix < in > out
#
# search for lines that start with 
# anything except
#      a letter
#      a number
#      a space or tab
# which may be duplicated
# followed by 0 or 1 spaces
# the above is called the prefix
# followed by a word - Keep the word with every case
# some explanation of the sed script
# We use the \(...\) to "remember" parts of the pattern
# and use \1, \2 to "recall" the remembered pattern
# this is how we keep the first word, or look for
# duplicate patterns
#
# the pattern
#      [a-zA-Z0-9      ]
# matches upper/lower case letters, numbers space, tab
#
# the pattern
#      [^a-zA-Z0-9     ]
# matches everything EXCEPT upper/lower case letters, numbers space, tab
#
# portability warning:
# The comment lines within the sed command may have to be 
# removed for non-SunOS Systems
sed '
# this line is a sed comment
#
# remove leading spaces
s/^[   ]*//
# now deal with the prefixes, different cases
# case where prefix = non-alphanumeric
s/^[^a-zA-Z0-9         ]\([a-zA-Z0-9]\)/\1/
# prefix = non-alphanumeric followed by 0 or more spaces/tabs
s/^[^a-zA-Z0-9         ][      ]*\([a-zA-Z0-9]\)/\1/
# prefix = duplicate non-alphanumeric followed by 0 or more spaces/tabs
# note the use of \1 in the pattern to be matched
s/^\([^a-zA-Z0-9       ]\)\1[  ]*\([a-zA-Z0-9]\)/\2/
# prefix = triplicate non-alphanumeric followed by 0 or more spaces/tabs
s/^\([^a-zA-Z0-9       ]\)\1\1[        ]*\([a-zA-Z0-9]\)/\2/
# prefix = one or more right brackets followed by 0 or more spaces/tabs
s/^[]}>][]}>]*[        ]*//
'

AskApache Web Development