html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,issue,performed_via_github_app https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248621072,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248621072,IC_kwDOCGYnMM5KbHIQ,9599,2022-09-15T20:56:09Z,2022-09-15T20:56:09Z,OWNER,"Prototype so far: ```diff diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 767b170..d96c507 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1762,6 +1762,17 @@ def query( is_flag=True, help=""Analyze resulting tables and output results"", ) +@click.option(""--key"", help=""read data from this key of the root object"") +@click.option( + ""--auto-key"", + is_flag=True, + help=""Find a key in the root object that is a list of objects"", +) +@click.option( + ""--analyze"", + is_flag=True, + help=""Analyze resulting tables and output results"", +) @load_extension_option def memory( paths, @@ -1784,6 +1795,8 @@ def memory( schema, dump, save, + key, + auto_key, analyze, load_extension, ): @@ -1838,7 +1851,9 @@ def memory( csv_table = stem stem_counts[stem] = stem_counts.get(stem, 1) + 1 csv_fp = csv_path.open(""rb"") - rows, format_used = rows_from_file(csv_fp, format=format, encoding=encoding) + rows, format_used = rows_from_file( + csv_fp, format=format, encoding=encoding, key=key, auto_key=auto_key + ) tracker = None if format_used in (Format.CSV, Format.TSV) and not no_detect_types: tracker = TypeTracker() diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py index 8754554..2e69c26 100644 --- a/sqlite_utils/utils.py +++ b/sqlite_utils/utils.py @@ -231,6 +231,8 @@ def rows_from_file( encoding: Optional[str] = None, ignore_extras: Optional[bool] = False, extras_key: Optional[str] = None, + key: Optional[str] = None, + auto_key: Optional[bool] = False, ) -> Tuple[Iterable[dict], Format]: """""" Load a sequence of dictionaries from a file-like object containing one of four different formats. @@ -271,13 +273,31 @@ def rows_from_file( :param encoding: the character encoding to use when reading CSV/TSV data :param ignore_extras: ignore any extra fields on rows :param extras_key: put any extra fields in a list with this key + :param key: read data from this key of the root object + :param auto_key: find a key in the root object that is a list of objects """""" if ignore_extras and extras_key: raise ValueError(""Cannot use ignore_extras= and extras_key= together"") + if key and auto_key: + raise ValueError(""Cannot use key= and auto_key= together"") if format == Format.JSON: decoded = json.load(fp) if isinstance(decoded, dict): - decoded = [decoded] + if auto_key: + list_keys = [ + k + for k in decoded + if isinstance(decoded[k], list) + and decoded[k] + and all(isinstance(o, dict) for o in decoded[k]) + ] + if len(list_keys) == 1: + decoded = decoded[list_keys[0]] + elif key: + # Raises KeyError, I think that's OK + decoded = decoded[key] + if not isinstance(decoded, list): + decoded = [decoded] if not isinstance(decoded, list): raise RowsFromFileBadJSON(""JSON must be a list or a dictionary"") return decoded, Format.JSON @@ -305,7 +325,9 @@ def rows_from_file( first_bytes = buffered.peek(2048).strip() if first_bytes.startswith(b""["") or first_bytes.startswith(b""{""): # TODO: Detect newline-JSON - return rows_from_file(buffered, format=Format.JSON) + return rows_from_file( + buffered, format=Format.JSON, key=key, auto_key=auto_key + ) else: dialect = csv.Sniffer().sniff( first_bytes.decode(encoding or ""utf-8-sig"", ""ignore"") ```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248522618,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248522618,IC_kwDOCGYnMM5KavF6,9599,2022-09-15T19:29:20Z,2022-09-15T19:29:20Z,OWNER,I think refactoring `sqlite-utils insert` to use `rows_from_file` needs to happen as part of this work.,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248512739,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248512739,IC_kwDOCGYnMM5Kasrj,9599,2022-09-15T19:18:24Z,2022-09-15T19:21:01Z,OWNER,"Why doesn't `sqlite-utils insert` use the `rows_from_file` function I wonder? https://github.com/simonw/sqlite-utils/issues/279#issuecomment-864207841 says: > I can refactor `sqlite-utils insert` to use this new code too. Maybe I forgot to do that?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248501824,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248501824,IC_kwDOCGYnMM5KaqBA,9599,2022-09-15T19:10:48Z,2022-09-15T19:10:48Z,OWNER,"This feels pretty good: ``` % sqlite-utils memory ~/Downloads/CVR_Export_20220908084311/*.json --schema --auto-key CREATE TABLE [BallotTypeContestManifest] ( [BallotTypeId] INTEGER, [ContestId] INTEGER ); CREATE VIEW t1 AS select * from [BallotTypeContestManifest]; CREATE VIEW t AS select * from [BallotTypeContestManifest]; CREATE TABLE [BallotTypeManifest] ( [Description] TEXT, [Id] INTEGER, [ExternalId] TEXT ); ```","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248484094,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248484094,IC_kwDOCGYnMM5Kalr-,9599,2022-09-15T18:56:31Z,2022-09-15T18:56:31Z,OWNER,"Actually I quite like `--key X` - it could work for single nested objects too. You could insert a single record like this: ```json { ""record"" { ""id"": 1 } } ``` ``` sqlite-utils insert db.db records record.json --key record ``` ","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248481303,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248481303,IC_kwDOCGYnMM5KalAX,9599,2022-09-15T18:54:30Z,2022-09-15T18:55:14Z,OWNER,"Maybe this would make more sense as a mechanism where you can say ""Use the data in the key called X"" - but there's a special option for ""figure out that key automatically"". The syntax then could be: `--list-key List` Or for automatic detection: `--list-key-auto` Could also go with `--key List` and `--key-auto` - but would that be as obvious as `--list-key`?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248479485,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248479485,IC_kwDOCGYnMM5Kakj9,9599,2022-09-15T18:52:52Z,2022-09-15T18:53:45Z,OWNER,"The most similar option I have at the moment is probably `--flatten`. What would good names for this option be? - `--auto-list` - `--auto-key` - `--inner-key` - `--auto-json` - `--find-list` - `--find-key` Those are all bad. Another option: introduce a new explicit format for it. Right now the explicit formats you can use are: https://github.com/simonw/sqlite-utils/blob/d9b9e075f07a20f1137cd2e34ed5d3f1a3db4ad8/docs/cli-reference.rst#L153-L158 So I could add a `:autojson` format.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248475718,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248475718,IC_kwDOCGYnMM5KajpG,9599,2022-09-15T18:49:05Z,2022-09-15T18:49:53Z,OWNER,"Here's how I used my prototype to build [that Gist](https://gist.github.com/simonw/0e6901974a14ab7d56c2746a04d72c8c): sqlite-utils memory ~/Downloads/CVR_Export_20220908084311/*.json --schema > database.sql ","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463, https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248474806,https://api.github.com/repos/simonw/sqlite-utils/issues/489,1248474806,IC_kwDOCGYnMM5Kaja2,9599,2022-09-15T18:48:09Z,2022-09-15T18:48:09Z,OWNER,"Built a prototype of this that works really well: ```diff diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py index c0b7bf1..f9a482c 100644 --- a/sqlite_utils/utils.py +++ b/sqlite_utils/utils.py @@ -272,7 +272,19 @@ def rows_from_file( if format == Format.JSON: decoded = json.load(fp) if isinstance(decoded, dict): - decoded = [decoded] + # TODO: Solve for if this isn't what people want + # Does it have just one key that is a list of dicts? + list_keys = [ + k + for k in decoded + if isinstance(decoded[k], list) + and decoded[k] + and all(isinstance(o, dict) for o in decoded[k]) + ] + if len(list_keys) == 1: + decoded = decoded[list_keys[0]] + else: + decoded = [decoded] if not isinstance(decoded, list): raise RowsFromFileBadJSON(""JSON must be a list or a dictionary"") return decoded, Format.JSON ``` I used that to build this: https://gist.github.com/simonw/0e6901974a14ab7d56c2746a04d72c8c One problem though: right now, if you do this `sqlite-utils` treats it as a single object and adds a `tags` column with JSON in it: ``` echo '{""title"": ""Hi"", ""tags"": [{""t"": ""one""}]}` | sqlite-utils insert db.db t - ``` If I implement this new mechanism the above line would behave differently - which would be a backwards incompatible change. So I probably need some kind of opt-in mechanism for this. And I need a good name for it.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",1374939463,