From 0dc62ea51454a75294837be04b839b8073e4710a Mon Sep 17 00:00:00 2001 From: nl_0 Date: Wed, 22 May 2019 23:41:37 +0300 Subject: [PATCH] Bucket: package tree: limit entries (temp workaround) --- catalog/app/containers/Bucket/PackageTree.js | 65 ++++++-------- catalog/app/containers/Bucket/requests.js | 93 ++++++++++++++------ 2 files changed, 93 insertions(+), 65 deletions(-) diff --git a/catalog/app/containers/Bucket/PackageTree.js b/catalog/app/containers/Bucket/PackageTree.js index ffbe1875..076459ae 100644 --- a/catalog/app/containers/Bucket/PackageTree.js +++ b/catalog/app/containers/Bucket/PackageTree.js @@ -27,51 +27,40 @@ import * as requests from './requests' const TreeDisplay = tagged([ 'File', // S3Handle - 'Dir', // { files, dirs } + 'Dir', // { files, dirs, truncated } 'NotFound', ]) -const mkHandle = ({ logical_key: logicalKey, physical_keys: [key], size }) => ({ - ...parseS3Url(key), +const mkHandle = ({ logicalKey, physicalKey, size }) => ({ + ...parseS3Url(physicalKey), size, logicalKey, }) const getParents = (path) => (path ? [...getParents(up(path)), path] : []) -const computeTree = ({ bucket, name, revision, path }) => - R.pipe( - R.prop('keys'), - R.ifElse( - () => isDir(path), - R.pipe( - R.applySpec({ - dirs: R.pipe( - // eslint-disable-next-line camelcase - R.map((info) => getPrefix(info.logical_key)), - R.uniq, - R.chain(getParents), - R.uniq, - R.filter((dir) => up(dir) === path), - ), - files: R.pipe( - // eslint-disable-next-line camelcase - R.filter((info) => getPrefix(info.logical_key) === path), - R.map(mkHandle), - ), - bucket: () => bucket, - name: () => name, - revision: () => revision, - path: () => path, - }), - TreeDisplay.Dir, - ), - (keys) => { - const key = keys.find(R.propEq('logical_key', path)) - return key ? TreeDisplay.File(mkHandle(key)) : TreeDisplay.NotFound() - }, - ), - ) +const computeTree = ({ bucket, name, revision, path }) => ({ keys, truncated }) => { + if (isDir(path)) { + return TreeDisplay.Dir({ + dirs: R.pipe( + R.map((info) => getPrefix(info.logicalKey)), + R.uniq, + R.chain(getParents), + R.uniq, + R.filter((dir) => up(dir) === path), + )(keys), + files: keys.filter((info) => getPrefix(info.logicalKey) === path).map(mkHandle), + bucket, + name, + revision, + path, + truncated, + }) + } + + const key = keys.find(R.propEq('logicalKey', path)) + return key ? TreeDisplay.File(mkHandle(key)) : TreeDisplay.NotFound() +} const formatListing = ({ urls }, r) => { const dirs = r.dirs.map((dir) => @@ -219,9 +208,9 @@ export default ({ ), - Dir: (dir) => ( + Dir: ({ truncated, ...dir }) => ( - + {/* TODO: use proper versions */} diff --git a/catalog/app/containers/Bucket/requests.js b/catalog/app/containers/Bucket/requests.js index 7ec11666..b332aaaa 100644 --- a/catalog/app/containers/Bucket/requests.js +++ b/catalog/app/containers/Bucket/requests.js @@ -184,12 +184,6 @@ const loadRevisionHash = ({ s3, bucket, key }) => .promise() .then((res) => res.Body.toString('utf-8')) -const parseJSONL = R.pipe( - R.split('\n'), - R.map(R.tryCatch(JSON.parse, () => null)), - R.reject(R.isNil), -) - const getRevisionIdFromKey = (key) => key.substring(key.lastIndexOf('/') + 1) const getRevisionKeyFromId = (name, id) => `${PACKAGES_PREFIX}${name}/${id}` @@ -238,15 +232,70 @@ export const getPackageRevisions = withErrorHandling( }, ) +const s3Select = ({ + s3, + ExpressionType = 'SQL', + InputSerialization = { JSON: { Type: 'LINES' } }, + ...rest +}) => + s3 + .selectObjectContent({ + ExpressionType, + InputSerialization, + OutputSerialization: { JSON: {} }, + ...rest, + }) + .promise() + .then( + R.pipe( + R.prop('Payload'), + R.reduce((acc, evt) => { + if (!evt.Records) return acc + const s = evt.Records.Payload.toString() + return acc + s + }, ''), + R.trim, + R.split('\n'), + R.map(JSON.parse), + ), + ) + export const fetchPackageTree = withErrorHandling( async ({ s3, bucket, name, revision }) => { const hashKey = getRevisionKeyFromId(name, revision) const hash = await loadRevisionHash({ s3, bucket, key: hashKey }) const manifestKey = `${MANIFESTS_PREFIX}${hash}` - const r = await s3.getObject({ Bucket: bucket, Key: manifestKey }).promise() - const [info, ...keys] = parseJSONL(r.Body.toString('utf-8')) - const modified = r.LastModified - return { id: revision, hash, info, keys, modified } + + const [[{ total }], keys] = await Promise.all([ + s3Select({ + s3, + Bucket: bucket, + Key: manifestKey, + Expression: ` + SELECT COUNT(*) AS total + FROM S3Object[*] o + WHERE o.logical_key IS NOT MISSING + `, + }), + s3Select({ + s3, + Bucket: bucket, + Key: manifestKey, + Expression: ` + SELECT + o.logical_key AS logicalKey, + o.physical_keys[0] AS physicalKey, + o."size" AS "size" + FROM S3Object[*] o + WHERE o.logical_key IS NOT MISSING + LIMIT 1000 + `, + }), + ]) + + const truncated = total > keys.length + + return { id: revision, hash, keys, truncated } }, ) @@ -334,23 +383,13 @@ const queryAccessCounts = async ({ window = 365, }) => { try { - const { Payload } = await s3 - .selectObjectContent({ - Bucket: analyticsBucket, - Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`, - Expression: query, - ExpressionType: 'SQL', - InputSerialization: { CSV: { FileHeaderInfo: 'Use' } }, - OutputSerialization: { JSON: {} }, - }) - .promise() - - const recordedCounts = Payload.reduce((acc, i) => { - if (!i.Records) return acc - const [json] = i.Records.Payload.toString().split('\n') - const data = JSON.parse(json) - return JSON.parse(data.counts) - }, {}) + const [{ counts: recordedCounts }] = await s3Select({ + s3, + Bucket: analyticsBucket, + Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`, + Expression: query, + InputSerialization: { CSV: { FileHeaderInfo: 'Use' } }, + }) const counts = R.times((i) => { const date = dateFns.subDays(today, window - i - 1)