3 datasets found
  1. Landmark Recognition 2021 - test data

    • kaggle.com
    zip
    Updated Sep 24, 2021
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    Mark Babayev (2021). Landmark Recognition 2021 - test data [Dataset]. https://www.kaggle.com/markbquant/landmark-recognition-2021-test-dataset
    Explore at:
    zip(99208796 bytes)Available download formats
    Dataset updated
    Sep 24, 2021
    Authors
    Mark Babayev
    License

    http://www.gnu.org/licenses/lgpl-3.0.htmlhttp://www.gnu.org/licenses/lgpl-3.0.html

    Description

    Create dataset

    global_test_dataset = tf.keras.preprocessing.image_dataset_from_directory(BASE_DIR+'/test', label_mode=None, shuffle=False, batch_size=1, image_size=(224, 224))
    filepath = [x[:-4] for x in map(os.path.basename, global_test_dataset.file_paths)]
    filepath_ds = tf.data.Dataset.from_tensor_slices(filepath)
    dev_test_dataset = tf.data.Dataset.zip((global_test_dataset.unbatch(), filepath_ds))
    global_test_dataset_size = len(filepath)
    print('test images: ', global_test_dataset_size)
    
    with tf.io.TFRecordWriter('landmark-recognition-2021-test.tfrec') as file_writer:
      for img, path in tqdm(dev_test_dataset.as_numpy_iterator(), total=global_test_dataset_size):
        img = tf.cast(tf.image.resize(img, [224, 224], method='nearest'), 'uint8')
        img_jpeg = tf.io.encode_jpeg(img, quality=70, optimize_size=True).numpy()
        record_bytes = tf.train.Example(features=tf.train.Features(feature={
          'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_jpeg])),
          'id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[path])),
        })).SerializeToString()
        file_writer.write(record_bytes)
    

    Load dataset

    def decode_tfrecord(record_bytes):
      features = tf.io.parse_single_example(record_bytes, {
        'image': tf.io.FixedLenFeature([], tf.string), 
        'id': tf.io.FixedLenFeature([], tf.string)
      })
      img = tf.io.decode_jpeg(features['image'])
      img = tf.reshape(img, [224, 224, 3])
      return {'image': img, 'id': features['id']}
    
    
    FNAMES_TRAIN_TFRECORDS = np.sort(tf.io.gfile.glob(BASE_DIR+'/landmark-recognition-2021-test.tfrec'))
    global_train_ds = tf.data.TFRecordDataset(FNAMES_TRAIN_TFRECORDS, num_parallel_reads=None)
    global_train_ds = global_train_ds.map(decode_tfrecord, num_parallel_calls=AUTO)
    
  2. T

    tiny_shakespeare

    • tensorflow.org
    • huggingface.co
    Updated Feb 11, 2023
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    (2023). tiny_shakespeare [Dataset]. https://www.tensorflow.org/datasets/catalog/tiny_shakespeare
    Explore at:
    Dataset updated
    Feb 11, 2023
    Description

    40,000 lines of Shakespeare from a variety of Shakespeare's plays. Featured in Andrej Karpathy's blog post 'The Unreasonable Effectiveness of Recurrent Neural Networks': http://karpathy.github.io/2015/05/21/rnn-effectiveness/.

    To use for e.g. character modelling:

    d = tfds.load(name='tiny_shakespeare')['train']
    d = d.map(lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))
    # train split includes vocabulary for other splits
    vocabulary = sorted(set(next(iter(d)).numpy()))
    d = d.map(lambda x: {'cur_char': x[:-1], 'next_char': x[1:]})
    d = d.unbatch()
    seq_len = 100
    batch_size = 2
    d = d.batch(seq_len)
    d = d.batch(batch_size)
    

    To use this dataset:

    import tensorflow_datasets as tfds
    
    ds = tfds.load('tiny_shakespeare', split='train')
    for ex in ds.take(4):
     print(ex)
    

    See the guide for more informations on tensorflow_datasets.

  3. Landmark Recognition 2021 (16 tfrecs)

    • kaggle.com
    zip
    Updated Oct 1, 2021
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    Mark Babayev (2021). Landmark Recognition 2021 (16 tfrecs) [Dataset]. https://www.kaggle.com/markbquant/landmark-recognition-2021-16-tfrecs
    Explore at:
    zip(15797316844 bytes)Available download formats
    Dataset updated
    Oct 1, 2021
    Authors
    Mark Babayev
    License

    http://www.gnu.org/licenses/lgpl-3.0.htmlhttp://www.gnu.org/licenses/lgpl-3.0.html

    Description

    Create the dataset

    for i in range(16):
      idx = hex(i)[2:]
      record_ds = tf.keras.preprocessing.image_dataset_from_directory(BASE_DIR+'/train/'+idx, label_mode=None, shuffle=False, batch_size=1, image_size=(224, 224))
      chunk_size = len(record_ds.file_paths)
      record_ds = record_ds.unbatch()
        
      with tf.io.TFRecordWriter('landmark-recognition-2021-part'+idx+'-'+str(chunk_size)+'.tfrec') as file_writer:
        for img in tqdm(record_ds.as_numpy_iterator(), total=chunk_size):
          img = tf.cast(tf.image.resize(img, [224, 224], method='nearest'), 'uint8')
          img_jpeg = tf.io.encode_jpeg(img, quality=70, optimize_size=True).numpy()
          record_bytes = tf.train.Example(features=tf.train.Features(feature={
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_jpeg]))
          })).SerializeToString()
          file_writer.write(record_bytes)
    

    Load the dataset

    The images in the dataset will correspond one-by-one to the labels in CSV file. ```python def decode_tfrecord(record_bytes): features = tf.io.parse_single_example(record_bytes, { 'image': tf.io.FixedLenFeature([], tf.string), }) img = tf.io.decode_jpeg(features['image']) img = tf.reshape(img, [224, 224, 3]) return img

    FNAMES_TRAIN_TFRECORDS = np.sort(tf.io.gfile.glob(BASE_DIR+'/*.tfrec')) global_train_ds = tf.data.TFRecordDataset(FNAMES_TRAIN_TFRECORDS, num_parallel_reads=None) global_train_ds = global_train_ds.map(decode_tfrecord, num_parallel_calls=AUTO)

    labels = pd.read_csv(BASE_DIR+'/train.csv', index_col='id') labels.sort_index(inplace=True) labels['label'] = labels['landmark_id'].astype('category').cat.codes

    labels_ds = tf.data.Dataset.from_tensor_slices(labels['label'].values) dev_dataset = tf.data.Dataset.zip((global_train_ds, labels_ds)) ```

  4. Not seeing a result you expected?
    Learn how you can add new datasets to our index.

Share
FacebookFacebook
TwitterTwitter
Email
Click to copy link
Link copied
Close
Cite
Mark Babayev (2021). Landmark Recognition 2021 - test data [Dataset]. https://www.kaggle.com/markbquant/landmark-recognition-2021-test-dataset
Organization logo

Landmark Recognition 2021 - test data

Test dataset for "Landmark Recognition 2021"

Explore at:
zip(99208796 bytes)Available download formats
Dataset updated
Sep 24, 2021
Authors
Mark Babayev
License

http://www.gnu.org/licenses/lgpl-3.0.htmlhttp://www.gnu.org/licenses/lgpl-3.0.html

Description

Create dataset

global_test_dataset = tf.keras.preprocessing.image_dataset_from_directory(BASE_DIR+'/test', label_mode=None, shuffle=False, batch_size=1, image_size=(224, 224))
filepath = [x[:-4] for x in map(os.path.basename, global_test_dataset.file_paths)]
filepath_ds = tf.data.Dataset.from_tensor_slices(filepath)
dev_test_dataset = tf.data.Dataset.zip((global_test_dataset.unbatch(), filepath_ds))
global_test_dataset_size = len(filepath)
print('test images: ', global_test_dataset_size)

with tf.io.TFRecordWriter('landmark-recognition-2021-test.tfrec') as file_writer:
  for img, path in tqdm(dev_test_dataset.as_numpy_iterator(), total=global_test_dataset_size):
    img = tf.cast(tf.image.resize(img, [224, 224], method='nearest'), 'uint8')
    img_jpeg = tf.io.encode_jpeg(img, quality=70, optimize_size=True).numpy()
    record_bytes = tf.train.Example(features=tf.train.Features(feature={
      'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_jpeg])),
      'id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[path])),
    })).SerializeToString()
    file_writer.write(record_bytes)

Load dataset

def decode_tfrecord(record_bytes):
  features = tf.io.parse_single_example(record_bytes, {
    'image': tf.io.FixedLenFeature([], tf.string), 
    'id': tf.io.FixedLenFeature([], tf.string)
  })
  img = tf.io.decode_jpeg(features['image'])
  img = tf.reshape(img, [224, 224, 3])
  return {'image': img, 'id': features['id']}


FNAMES_TRAIN_TFRECORDS = np.sort(tf.io.gfile.glob(BASE_DIR+'/landmark-recognition-2021-test.tfrec'))
global_train_ds = tf.data.TFRecordDataset(FNAMES_TRAIN_TFRECORDS, num_parallel_reads=None)
global_train_ds = global_train_ds.map(decode_tfrecord, num_parallel_calls=AUTO)
Search
Clear search
Close search
Google apps
Main menu