1#!/usr/bin/qsh
2# Copyright (C) 2016 and later: Unicode, Inc. and others.
3# License & terms of use: http://www.unicode.org/copyright.html
4#   Copyright (C) 2000-2011, International Business Machines
5#   Corporation and others.  All Rights Reserved.
6#
7# Authors:
8# Ami Fixler
9# Barry Novinger
10# Steven R. Loomis
11# George Rhoten
12# Jason Spieth
13#
14# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
15# After extracting to EBCDIC, binary files are re-extracted without the
16# EBCDIC conversion, thus restoring them to original codepage.
17
18if [ -z "$QSH_VERSION" ];
19then
20	QSH=0
21    echo "QSH not detected (QSH_VERSION not set) - just testing."
22else
23	QSH=1
24	#echo "QSH version $QSH_VERSION"
25fi
26export QSH
27
28# set this to "v" to list files as they are unpacked (default)
29VERBOSE_UNPACK="v"
30
31# Set the following variable to the list of binary file suffixes (extensions)
32
33
34#****************************************************************************
35#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
36#ICU specific binary files
37#****************************************************************************
38binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE'
39data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
40
41#****************************************************************************
42# Function:     usage
43# Description:  Prints out text that describes how to call this script
44# Input:        None
45# Output:       None
46#****************************************************************************
47usage()
48{
49  echo "Enter archive filename as a parameter: $0 icu-archive.tar"
50}
51
52#****************************************************************************
53# first make sure we at least one arg and it's a file we can read
54#****************************************************************************
55
56# check for no arguments
57if [ $# -eq 0 ]; then
58  usage
59  exit
60fi
61
62# tar file is argument 1
63tar_file=$1
64
65# check that the file is valid
66if [ ! -r $tar_file ]; then
67  echo "$tar_file does not exist or cannot be read."
68  usage
69  exit
70fi
71
72# treat all data files as ebcdic
73ebcdic_data=$data_files
74
75#****************************************************************************
76# Extract files.  We do this in two passes.  One pass for 819 files and a
77# second pass for 37 files
78#****************************************************************************
79echo ""
80echo "Extracting from $tar_file ..."
81echo ""
82
83# extract everything as iso-8859-1 except these directories
84pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data
85
86# extract files while converting them to EBCDIC
87echo ""
88echo "Extracting files which must be in ibm-37 ..."
89echo ""
90pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data
91
92#****************************************************************************
93# For files we have restored as CCSID 37, check the BOM to see if they
94# should be processed as 819.  Also handle files with special paths. Files
95# that match will be added to binary files lists.  The lists will in turn
96# be processed to restore files as 819.
97#****************************************************************************
98echo ""
99echo "Determining binary files by BOM ..."
100echo ""
101bin_count=0
102# Process BOMs
103if [ -f icu/as_is/bomlist.txt ];
104then
105    echo "Using icu/as_is/bomlist.txt"
106    pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt`
107else
108   for file in `find ./icu \( -name \*.txt -print \)`; do
109    bom8=`head -n 1 $file|\
110          od -t x1|\
111          head -n 1|\
112          sed 's/  */ /g'|\
113          cut -f2-4 -d ' '|\
114          tr 'A-Z' 'a-z'`;
115    #Find a converted UTF-8 BOM
116    if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ]
117    then
118        file="`echo $file | cut -d / -f2-`"
119
120        if [ `echo $binary_files | wc -w` -lt 200 ]
121        then
122            bin_count=`expr $bin_count + 1`
123            binary_files="$binary_files $file";
124        else
125            echo "Restoring binary files by BOM ($bin_count)..."
126            rm $binary_files;
127            pax -C 819 -rvf $tar_file $binary_files;
128            echo "Determining binary files by BOM ($bin_count)..."
129            binary_files="$file";
130            bin_count=`expr $bin_count + 1`
131        fi
132    fi
133  done
134  # now see if a re-extract of binary files is necessary
135  if [ `echo $binary_files | wc -w` -gt 0 ]
136  then
137      echo "Restoring binary files ($bin_count) ..."
138      rm $binary_files
139      pax -C 819 -rvf $tar_file $binary_files
140  fi
141fi
142
143echo "# Processing special paths."
144# Process special paths
145more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g')
146# echo "Looking for additional files: find ... $more_bin_opts"
147more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \)  -print)
148echo "Restoring binary files by special paths ($bin_count) ..."
149rm $more_bin_files
150pax -C 819 -rvf $tar_file $more_bin_files
151
152#****************************************************************************
153# Generate and run the configure script
154#****************************************************************************
155
156echo ""
157echo "Generating qsh compatible configure ..."
158echo ""
159
160sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp
161del -f icu/source/configure
162mv icu/source/configureTemp icu/source/configure
163chmod 755 icu/source/configure
164
165echo ""
166echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."
167
168