sample_scripts/rek2hs at main · hammer-space/sample_scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#! /bin/sh
# AWS Rekognition to Hammerspace Metadata
version="0.02"
# 3/9/2024
script="rek2hs"

# Peter Learmonth, Hammerspace
# peter@hammerspace.com

# Script to get AWS Recognition result for a file on an OSV set up for
# Rekognition and insert top level tags into HS metadata.

# Prerequisites
# - AWS CLI installed on system running the script
#     "aws configure" run to set up creds or access+secret keys, and default region
# - AWS S3 bucket with Rekognition configured and that bucket a volume (OSV) on Hammerspace
#     User may need permissions to use Rekognition
# - HSTK installed on system running the script
# - Hammerspace share mounted and files to be processed are in this share
# - Place-on or other objective used to place an instance in the bucket with Rekognition configured

# Setup notes on HS internal Confluence.  Some of these notes are stale.
# For example, you don't need to turn off doEncryption and useCompression in
# cloud-mover-conf.yaml.  If you add the OSV with --native, it takes care of that.
# https://confluence.it.hammer.space/pages/viewpage.action?spaceKey=DEV&title=Steps+to+manually+setup+the+AWS+Lambda+function+for+metadata+harvesting

# TODO
#  Check files are on HS share

# BUGS
# Script will only works with one bucket configured for Rek

# No way to know the bucket for sure from the OSV since admin can rename it.
# (If this ran on Anvil or we had admin ssh or API access, we could...)

hsosv='rekbucket'
bucket=$rekognitionbucket #From env variable set by wrapper
#echo "bucket = $bucket"

tmpfile=/tmp/rek2hs$$

for file in $*
do
# Find files on S3 Rek bucket and get the object ID / path
#x=`hs rekognition-tag has rek2hs_date $file`
#echo $x
  #if [ `hs rekognition-tag has rek2hs_date $file` == "TRUE" ]
  if [ `hs tag has rek2hs_date $file` == "TRUE" ]
  then
    #echo "$file has rek tags ... skipping"
    echo -e "#\c"
    continue
  fi

  #objpath=`hs eval -e instances $file | grep -A10 "STORAGE_VOLUME('$hsosv')" | grep '|PATH = ' |cut -f2 -d\"` 2>/dev/null
  #if [ $? -eq 0 ]
  if objpath=`hs eval -e instances $file | grep -A10 "STORAGE_VOLUME('$hsosv')" | grep '|PATH = '` #2>/dev/null
  then
    objpath=`echo $objpath | cut -f2 -d\"`
    #echo $objpath
    # There may be a better way to run the loop and get the indent ($spaces)
    # below, but for now I'll just use a tmp file and grep for space before I loop...
    aws rekognition detect-labels --image "S3Object={Bucket=$bucket,Name=Hammerspace_v2/$objpath}" >$tmpfile

  else
    #echo "$file not on an OSV with Rekognition configured ... skipping"
    echo -e ".\c"
    continue
  fi

# Assumes properly formatted JSON.  Could use jq to clean it up.
# There are way better ways to parse this
  echo
  spaces=`grep '"Name"' $tmpfile | head -1 | sed 's/.Name.*//' 2>/dev/null `
#echo "$spaces xxx"

  tags_in=0
  grep -E "^$spaces\"(Name|Confidence)\""  $tmpfile 2>/dev/null | { while read line
  do
    #echo $line
    if echo $line | grep '^ *"Name"' >/dev/null 2>&1
    then
      #echo Line with name
      key=`echo $line | cut -f4 -d\"| sed 's/ /_/g'`
    fi
    if echo $line | grep '^ *"Confidence"' >/dev/null 2>&1
    then
      #echo Line with Confidence
      conf=`echo $line | sed -e 's/.*: //' -e 's/,.*//'`
      hs tag add rek_$key -e $conf $file
      #hs rekognition-tag add $key -e $conf $file
      #hs rekognition-tag delete $key  $file
      tags_in=`expr $tags_in + 1`
      tput hpa 1
      echo -e "$tags_in\c"

    fi

  done
  }
  dt=`date +%Y%m%d_%H%M%S`
  hs tag add rek2hs_date -e "'$dt'" $file
  #hs rekognition-tag add rek2hs_date -e "'$dt'" $file
  #hs rekognition-tag delete $key  $file

  echo " rekognition Tags inserted for file $file."

done
rm -f $tmpfile